diff --git a/.github/actions/setup_rust/action.yml b/.github/actions/setup_rust/action.yml index bf3ef5c7fc..233ff6da98 100644 --- a/.github/actions/setup_rust/action.yml +++ b/.github/actions/setup_rust/action.yml @@ -15,3 +15,4 @@ runs: uses: dtolnay/rust-toolchain@master with: toolchain: ${{ steps.rust-version.outputs.version }} + components: cargo,clippy,rust-docs,rust-src,rust-std,rustc,rustfmt diff --git a/.github/workflows/_release_docker.yml b/.github/workflows/_release_docker.yml index eabea2205d..91d8f49492 100644 --- a/.github/workflows/_release_docker.yml +++ b/.github/workflows/_release_docker.yml @@ -38,9 +38,10 @@ jobs: - name: Print input run: echo '${{ toJSON(inputs) }}' - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 with: ref: ${{ inputs.base }} + token: '${{ secrets.PULL_TOKEN }}' - name: Read rust version run: echo "RUST_VERSION=$(cargo metadata --no-deps --format-version 1 | jq -r '.packages[0].rust_version')" >> $GITHUB_ENV @@ -71,17 +72,17 @@ jobs: tags: ${{ inputs.tag }} - name: Login to Docker Hub - uses: docker/login-action@v3 + uses: docker/login-action@v4 with: username: ${{ secrets.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB_TOKEN }} - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 + uses: docker/setup-buildx-action@v4 - name: Build and push by digest id: build - uses: docker/build-push-action@v6 + uses: docker/build-push-action@v7 with: context: . build-args: | @@ -114,20 +115,20 @@ jobs: - build steps: - name: Download digests - uses: actions/download-artifact@v4 + uses: actions/download-artifact@v8 with: path: /tmp/digests pattern: digests-${{ inputs.python }}-* merge-multiple: true - name: Login to Docker Hub - uses: docker/login-action@v3 + uses: docker/login-action@v4 with: username: ${{ secrets.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB_TOKEN }} - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 + uses: docker/setup-buildx-action@v4 - name: Docker meta id: meta diff --git a/.github/workflows/_release_github.yml b/.github/workflows/_release_github.yml index 4483aa5e0f..f45528318b 100644 --- a/.github/workflows/_release_github.yml +++ b/.github/workflows/_release_github.yml @@ -24,7 +24,10 @@ jobs: include: - os: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v5 + with: + submodules: recursive + token: '${{ secrets.PULL_TOKEN }}' - run: | git config --global user.email "ben.steer@pometry.com" git config --global user.name "Pometry-Team" diff --git a/.github/workflows/_release_python.yml b/.github/workflows/_release_python.yml index 0604eb058a..d0ffb60e66 100644 --- a/.github/workflows/_release_python.yml +++ b/.github/workflows/_release_python.yml @@ -41,8 +41,10 @@ jobs: ~/.cargo/git/db/ key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} restore-keys: ${{ runner.os }}-cargo- - - uses: actions/checkout@v3 + - uses: actions/checkout@v5 with: + submodules: recursive + token: '${{ secrets.PULL_TOKEN }}' ref: ${{ inputs.base }} - uses: actions/setup-python@v4 with: @@ -134,9 +136,11 @@ jobs: uses: arduino/setup-protoc@v3 with: repo-token: ${{ secrets.GITHUB_TOKEN }} - - uses: actions/checkout@v3 + - uses: actions/checkout@v5 with: ref: ${{ inputs.base }} + submodules: recursive + token: '${{ secrets.PULL_TOKEN }}' - uses: actions/setup-python@v4 with: python-version: | @@ -179,9 +183,11 @@ jobs: ~/.cargo/git/db/ key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} restore-keys: ${{ runner.os }}-cargo- - - uses: actions/checkout@v3 + - uses: actions/checkout@v5 with: ref: ${{ inputs.base }} + submodules: recursive + token: '${{ secrets.PULL_TOKEN }}' - uses: actions/setup-python@v4 with: python-version: | @@ -207,7 +213,7 @@ jobs: name: Publish to PyPi runs-on: ubuntu-latest steps: - - uses: actions/download-artifact@v4 + - uses: actions/download-artifact@v8 with: pattern: pyartifacts-* merge-multiple: true @@ -225,7 +231,7 @@ jobs: skip_existing: true verify_metadata: false - name: Deleting artifacts - uses: geekyeggo/delete-artifact@v2 + uses: geekyeggo/delete-artifact@v6 with: failOnError: false name: | diff --git a/.github/workflows/_release_rust.yml b/.github/workflows/_release_rust.yml index 7e0ded59b8..376bb5a8e8 100644 --- a/.github/workflows/_release_rust.yml +++ b/.github/workflows/_release_rust.yml @@ -35,8 +35,10 @@ jobs: uses: arduino/setup-protoc@v3 with: repo-token: ${{ secrets.GITHUB_TOKEN }} - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 with: + submodules: recursive + token: '${{ secrets.PULL_TOKEN }}' ref: ${{ inputs.base }} - name: Setup rust uses: dtolnay/rust-toolchain@master diff --git a/.github/workflows/bench-graphql.yml b/.github/workflows/bench-graphql.yml index 78d2eeb88f..675525d9e3 100644 --- a/.github/workflows/bench-graphql.yml +++ b/.github/workflows/bench-graphql.yml @@ -14,8 +14,11 @@ jobs: name: GraphQL Benchmark runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v5 name: Checkout + with: + submodules: recursive + token: '${{ secrets.PULL_TOKEN }}' - name: Install Protoc uses: arduino/setup-protoc@v3 with: @@ -31,7 +34,7 @@ jobs: make install-python clean # ↑ this is for freeing up space used by cargo before installing raphtory - name: Set up pnpm - uses: pnpm/action-setup@v4 + uses: pnpm/action-setup@v5 with: version: 9 - uses: grafana/setup-k6-action@v1 @@ -39,8 +42,10 @@ jobs: k6-version: "1.0.0" - name: Run GraphQL benchmarks run: cd graphql-bench && make bench-local - - name: Restore metadata file - run: git restore graphql-bench/data/apache/master/.raph # otherwise github-action-benchmark fails to create the commit + - name: Restore modified files + run: | + git restore Cargo.lock # modified by build; github-action-benchmark can't switch to gh-pages with dirty working tree + git restore graphql-bench/data/apache/master # otherwise github-action-benchmark fails to create the commit - name: Print bench results run: cat graphql-bench/output.json - name: Store benchmark results from master branch diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index 01421a1dfa..fb8a561e63 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -26,8 +26,11 @@ jobs: include: - os: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v5 name: Checkout + with: + submodules: recursive + token: '${{ secrets.PULL_TOKEN }}' - uses: ./.github/actions/setup_rust name: Setup Rust - name: Install Protoc @@ -38,7 +41,7 @@ jobs: uses: Swatinem/rust-cache@v2 with: cache-all-crates: true - - uses: webfactory/ssh-agent@v0.9.0 + - uses: webfactory/ssh-agent@v0.10.0 name: Load raphtory-disk_graph key with: ssh-private-key: ${{ secrets.RA_SSH_PRIVATE_KEY }} @@ -46,8 +49,8 @@ jobs: run: | set -o pipefail cargo bench --bench base --bench algobench -p raphtory-benchmark -- --output-format=bencher | tee benchmark-result.txt - - name: Delete cargo.lock if it exists - run: rm -f Cargo.lock + - name: Restore Cargo.lock to avoid dirty working tree + run: git checkout -- Cargo.lock - name: Store benchmark results from master branch if: github.ref == 'refs/heads/master' uses: benchmark-action/github-action-benchmark@v1 diff --git a/.github/workflows/code_coverage.yml b/.github/workflows/code_coverage.yml index 7db020c751..6eb89401c8 100644 --- a/.github/workflows/code_coverage.yml +++ b/.github/workflows/code_coverage.yml @@ -18,8 +18,12 @@ SCCACHE_DIR: /home/runner/.cache/sccache SCCACHE_PATH: /home/runner/.cache/sccache steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v5 name: Checkout + with: + ref: ${{ github.head_ref }} + submodules: recursive + token: '${{ secrets.PULL_TOKEN }}' - name: Cache sccache uses: actions/cache@v3 with: diff --git a/.github/workflows/format.yml b/.github/workflows/format.yml index 8e711ca6aa..dc27e2bc49 100644 --- a/.github/workflows/format.yml +++ b/.github/workflows/format.yml @@ -8,10 +8,13 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 name: Checkout with: ref: ${{ github.head_ref }} + submodules: recursive + token: '${{ secrets.PULL_TOKEN }}' + persist-credentials: 'false' - uses: ./.github/actions/setup_rust name: Setup Rust - name: Setup nightly rust @@ -34,7 +37,7 @@ jobs: run: | python -m pip install black mypy pandas-stubs - name: Setup Node.js - uses: actions/setup-node@v4 + uses: actions/setup-node@v6 with: node-version: '20' - name: Install raphtory @@ -76,10 +79,9 @@ jobs: echo "Changes detected. Committing and pushing..." git config user.name "github-actions[bot]" git config user.email "github-actions[bot]@users.noreply.github.com" - git checkout ${{ github.head_ref }} git add . git commit -m "chore: apply tidy-public auto-fixes" - git push --force-with-lease origin HEAD:${{ github.head_ref }} + git push https://oauth2:$GITHUB_TOKEN@github.com/Pometry/Raphtory.git HEAD:${{ github.head_ref }} else echo "No changes to commit." fi \ No newline at end of file diff --git a/.github/workflows/manual_release_docker_cloud.yml b/.github/workflows/manual_release_docker_cloud.yml index fd9fe972fc..3cdfd2217f 100644 --- a/.github/workflows/manual_release_docker_cloud.yml +++ b/.github/workflows/manual_release_docker_cloud.yml @@ -19,8 +19,10 @@ jobs: docker: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 with: + submodules: recursive + token: '${{ secrets.PULL_TOKEN }}' ref: ${{ inputs.base }} - name: Read rust version @@ -35,20 +37,20 @@ jobs: fi - name: Login to Docker Hub - uses: docker/login-action@v3 + uses: docker/login-action@v4 with: username: ${{ secrets.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB_TOKEN }} - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 + uses: docker/setup-buildx-action@v4 with: driver: cloud endpoint: "pometry/raphtory" install: true - name: Build and push - uses: docker/build-push-action@v6 + uses: docker/build-push-action@v7 with: context: . build-args: RUST_VERSION=${{ env.RUST_VERSION }} diff --git a/.github/workflows/manual_retag_docker.yml b/.github/workflows/manual_retag_docker.yml index f4ea0fb997..ff34919727 100644 --- a/.github/workflows/manual_retag_docker.yml +++ b/.github/workflows/manual_retag_docker.yml @@ -21,13 +21,13 @@ jobs: runs-on: ubuntu-latest steps: - name: Login to Docker Hub - uses: docker/login-action@v3 + uses: docker/login-action@v4 with: username: ${{ secrets.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB_TOKEN }} - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 + uses: docker/setup-buildx-action@v4 - name: Retag multi-platform image run: | diff --git a/.github/workflows/nightly_release.yml b/.github/workflows/nightly_release.yml index ecccb14694..26772113f7 100644 --- a/.github/workflows/nightly_release.yml +++ b/.github/workflows/nightly_release.yml @@ -15,7 +15,10 @@ jobs: outputs: should_run: ${{ steps.should_run.outputs.should_run }} steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 + with: + submodules: recursive + token: '${{ secrets.PULL_TOKEN }}' - id: should_run continue-on-error: true name: Check latest commit is less than a day diff --git a/.github/workflows/release_auto.yml b/.github/workflows/release_auto.yml index 9b4fcb52cd..e1d36ec27e 100644 --- a/.github/workflows/release_auto.yml +++ b/.github/workflows/release_auto.yml @@ -26,9 +26,10 @@ jobs: outputs: version: ${{ steps.version.outputs.version }} steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 with: ref: ${{ inputs.base }} + token: '${{ secrets.PULL_TOKEN }}' - id: version run: echo "version=$(make print-version)" >> $GITHUB_OUTPUT call-release-rust-workflow: diff --git a/.github/workflows/release_bump_versions.yml b/.github/workflows/release_bump_versions.yml index 7f6d5c1056..0de369dda9 100644 --- a/.github/workflows/release_bump_versions.yml +++ b/.github/workflows/release_bump_versions.yml @@ -46,7 +46,10 @@ jobs: ~/.cargo/git/db/ key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} restore-keys: ${{ runner.os }}-cargo- - - uses: actions/checkout@v3 + - uses: actions/checkout@v5 + with: + submodules: recursive + token: '${{ secrets.PULL_TOKEN }}' - run: | git config --global user.email "ben.steer@pometry.com" git config --global user.name "Pometry-Team" @@ -70,7 +73,7 @@ jobs: echo CRATE_VERSION=$(cat Cargo.toml | grep version | head -n1 | cut -d '"' -f2) >> $GITHUB_ENV echo "Crate version is $CRATE_VERSION" - name: "Make a PR to bump version" - uses: peter-evans/create-pull-request@v5 + uses: peter-evans/create-pull-request@v8 with: base: ${{ inputs.base }} author: ${{ github.actor }} <${{ github.actor }}@users.noreply.github.com> diff --git a/.github/workflows/rust_format_check.yml b/.github/workflows/rust_format_check.yml index 150fbd314b..9d60a342a1 100644 --- a/.github/workflows/rust_format_check.yml +++ b/.github/workflows/rust_format_check.yml @@ -15,7 +15,10 @@ jobs: rust-format-check: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 + with: + submodules: recursive + token: '${{ secrets.PULL_TOKEN }}' - name: Setup nightly rust run: | rustup toolchain install nightly --allow-downgrade -c rustfmt diff --git a/.github/workflows/stress-test.yml b/.github/workflows/stress-test.yml index 9878f3fce6..6ffa7b25eb 100644 --- a/.github/workflows/stress-test.yml +++ b/.github/workflows/stress-test.yml @@ -11,8 +11,11 @@ jobs: name: GraphQL Stress Test runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v5 name: Checkout + with: + submodules: recursive + token: '${{ secrets.PULL_TOKEN }}' - name: Install Protoc uses: arduino/setup-protoc@v3 with: @@ -21,23 +24,22 @@ jobs: uses: Swatinem/rust-cache@v2 with: cache-all-crates: true - - name: Install maturin - run: pip install maturin==1.8.3 - - name: Build raphtory - run: | - make install-python clean - # ↑ this is for freeing up space used by cargo before installing raphtory + - uses: ./.github/actions/setup_rust + name: Setup Rust - name: Set up pnpm - uses: pnpm/action-setup@v4 + uses: pnpm/action-setup@v5 with: version: 9 - uses: grafana/setup-k6-action@v1 with: k6-version: "1.0.0" - name: Run stress test + env: + RUST_BACKTRACE: 1 run: | + cargo build --package raphtory-server --bin raphtory-server --profile=build-fast + ./target/build-fast/raphtory-server server --work-dir graphs & cd graphql-bench - raphtory server & make stress-test - name: Upload k6 report if: always() diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 9b25a61435..1d3f3741be 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -17,22 +17,12 @@ jobs: name: Run Rust tests uses: ./.github/workflows/test_rust_workflow.yml secrets: inherit - call-test-rust-storage-workflow-in-local-repo: - name: Run Rust storage tests - uses: ./.github/workflows/test_rust_disk_storage_workflow.yml - secrets: inherit call-test-python-workflow-in-local-repo: name: Run Python tests uses: ./.github/workflows/test_python_workflow.yml with: test_python_lower: true secrets: inherit - call-test-python-disk-storage-workflow-in-local-repo: - name: Run Python storage tests - uses: ./.github/workflows/test_python_disk_storage_workflow.yml - with: - test_python_lower: true - secrets: inherit call-test-ui: name: Run UI Tests uses: ./.github/workflows/test_ui.yml diff --git a/.github/workflows/test_during_pr.yml b/.github/workflows/test_during_pr.yml index eaaa3874ab..8f5d4dc419 100644 --- a/.github/workflows/test_during_pr.yml +++ b/.github/workflows/test_during_pr.yml @@ -3,7 +3,8 @@ on: pull_request: branches: - master - - "0.16" + - db_v4 + - v16 concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} @@ -14,22 +15,12 @@ jobs: name: Run Rust tests uses: ./.github/workflows/test_rust_workflow.yml secrets: inherit - call-test-rust-storage-workflow-in-local-repo: - name: Run Rust storage tests - uses: ./.github/workflows/test_rust_disk_storage_workflow.yml - secrets: inherit call-test-python-workflow-in-local-repo: name: Run Python tests uses: ./.github/workflows/test_python_workflow.yml with: test_python_lower: false secrets: inherit - call-test-python-disk-storage-workflow-in-local-repo: - name: Run Python storage tests - uses: ./.github/workflows/test_python_disk_storage_workflow.yml - with: - test_python_lower: false - secrets: inherit call-test-ui-in-local-repo: name: Run UI Tests uses: ./.github/workflows/test_ui.yml @@ -54,10 +45,8 @@ jobs: secrets: inherit needs: [ call-test-rust-workflow-in-local-repo, - call-test-rust-storage-workflow-in-local-repo, call-test-python-workflow-in-local-repo, - call-test-python-disk-storage-workflow-in-local-repo, - call-test-ui-in-local-repo, + # call-test-ui-in-local-repo, call-benchmark-workflow-in-local-repo, call-graphql-bench-workflow-in-local-repo, call-stress-test-workflow-in-local-repo diff --git a/.github/workflows/test_python_disk_storage_workflow.yml b/.github/workflows/test_python_disk_storage_workflow.yml deleted file mode 100644 index 8983052f86..0000000000 --- a/.github/workflows/test_python_disk_storage_workflow.yml +++ /dev/null @@ -1,67 +0,0 @@ -name: Run Python storage test -permissions: { } -on: - workflow_call: - inputs: - skip_tests: - type: boolean - default: false - required: false - test_python_lower: - type: boolean - default: false - required: false -# DO NOT CHANGE NAME OF WORKFLOW, USED IN OTHER WORKFLOWS KEEP "Rust Tests" -jobs: - select-strategy: - runs-on: ubuntu-latest - outputs: - python-versions: ${{ steps.set-matrix.outputs.python-versions }} - steps: - - id: set-matrix - run: | - echo "python-versions=[\"3.11\",\"3.14.0\"]" >> $GITHUB_OUTPUT - python-test: - if: ${{ !inputs.skip_tests }} - name: Python Tests - needs: select-strategy - strategy: - matrix: - python: ${{ fromJson(needs.select-strategy.outputs.python-versions) }} - os: [ macos-latest, ubuntu-latest, windows-latest ] - runs-on: '${{ matrix.os }}' - steps: - - uses: actions/checkout@v3 - name: Checkout - - uses: maxim-lobanov/setup-xcode@v1 - name: Xcode version - if: "contains(matrix.os, 'macOS')" - with: - xcode-version: latest-stable - - uses: ./.github/actions/setup_rust - name: Setup Rust - - name: Install Protoc - uses: arduino/setup-protoc@v3 - with: - repo-token: ${{ secrets.GITHUB_TOKEN }} - - uses: webfactory/ssh-agent@v0.7.0 - name: Load raphtory-disk_graph key - with: - ssh-private-key: ${{ secrets.RA_SSH_PRIVATE_KEY }} - - uses: Swatinem/rust-cache@v2 - name: Cargo cache - with: - cache-all-crates: true - - name: Setup Python ${{ matrix.python }} - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python }} - cache: 'pip' - - name: Activate pometry-storage in Cargo.toml - run: make pull-storage - - name: Install Python dependencies - run: | - python -m pip install tox - - name: Run Python tests - run: | - cd python && tox run -e storage diff --git a/.github/workflows/test_python_workflow.yml b/.github/workflows/test_python_workflow.yml index 102547627d..df333f4043 100644 --- a/.github/workflows/test_python_workflow.yml +++ b/.github/workflows/test_python_workflow.yml @@ -32,9 +32,11 @@ jobs: os: [ macos-latest, ubuntu-latest, windows-latest ] runs-on: '${{ matrix.os }}' steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 name: Checkout with: + submodules: recursive + token: '${{ secrets.PULL_TOKEN }}' ref: ${{ github.head_ref }} - uses: maxim-lobanov/setup-xcode@v1 name: Xcode version diff --git a/.github/workflows/test_rust_disk_storage_workflow.yml b/.github/workflows/test_rust_disk_storage_workflow.yml deleted file mode 100644 index d8d0bafad9..0000000000 --- a/.github/workflows/test_rust_disk_storage_workflow.yml +++ /dev/null @@ -1,82 +0,0 @@ -name: Run Rust test -permissions: { } -on: - workflow_call: - inputs: - skip_tests: - type: boolean - default: false - required: false -# DO NOT CHANGE NAME OF WORKFLOW, USED IN OTHER WORKFLOWS KEEP "Rust Tests" -jobs: - rust-test: - if: ${{ !inputs.skip_tests }} - name: Rust Tests - runs-on: '${{ matrix.os }}' - env: - RUST_BACKTRACE: 1 - strategy: - matrix: - include: - - { os: macos-latest, flags: "" } - - { os: ubuntu-latest, flags: "-C link-arg=-fuse-ld=lld" } - - { os: windows-latest, flags: "" } - steps: - - uses: maxim-lobanov/setup-xcode@v1 - name: Xcode version - if: "contains(matrix.os, 'macOS')" - with: - xcode-version: latest-stable - - uses: actions/checkout@v3 - name: Checkout - - uses: ./.github/actions/setup_rust - name: Setup Rust - - name: Free up space (ubuntu) - if: "contains(matrix.os, 'ubuntu')" - run: | - sudo rm -rf /usr/share/dotnet - sudo rm -rf /usr/local/lib/android - sudo rm -rf /opt/ghc - sudo rm -rf "$AGENT_TOOLSDIRECTORY" - - name: Install LLD - if: "contains(matrix.os, 'ubuntu')" - run: | - sudo apt-get install lld - - uses: webfactory/ssh-agent@v0.7.0 - name: Load pometry-storage key - with: - ssh-private-key: ${{ secrets.RA_SSH_PRIVATE_KEY }} - - name: Rust version - run: rustc --version --verbose - - uses: Swatinem/rust-cache@v2 - name: Cargo cache - with: - cache-all-crates: true - - name: Install Protoc - uses: arduino/setup-protoc@v3 - with: - repo-token: ${{ secrets.GITHUB_TOKEN }} - - name: Install nextest - uses: taiki-e/install-action@v2 - with: - tool: nextest@0.9.99 - - name: Install cargo-hack - uses: taiki-e/install-action@cargo-hack - - name: Activate pometry-storage in Cargo.toml - run: make pull-storage - - uses: actions/setup-python@v5 - with: - python-version: '3.12' - - name: Run all Tests (disk_graph) - env: - RUSTFLAGS: -Awarnings ${{ matrix.flags }} - TEMPDIR: ${{ runner.temp }} - run: | - cargo nextest run --all --no-default-features --features "storage" --cargo-profile build-fast - - name: Check all features - env: - RUSTFLAGS: -Awarnings - run: | - cargo hack check --workspace --all-targets --each-feature --skip extension-module,default - - diff --git a/.github/workflows/test_rust_workflow.yml b/.github/workflows/test_rust_workflow.yml index 87a4b9cefd..0caa121552 100644 --- a/.github/workflows/test_rust_workflow.yml +++ b/.github/workflows/test_rust_workflow.yml @@ -1,5 +1,5 @@ name: Run Rust test -permissions: {} +permissions: { } on: workflow_call: inputs: @@ -23,13 +23,10 @@ jobs: - os: ubuntu-latest - os: windows-latest steps: - - uses: maxim-lobanov/setup-xcode@v1 - name: Xcode version - if: "contains(matrix.os, 'macOS')" + - uses: actions/checkout@v5 with: - xcode-version: latest-stable - - uses: actions/checkout@v3 - name: Checkout + submodules: recursive + token: '${{ secrets.PULL_TOKEN }}' - uses: ./.github/actions/setup_rust name: Setup Rust - name: Free up space (ubuntu) @@ -50,9 +47,9 @@ jobs: with: cache-all-crates: true - name: Install nextest - uses: taiki-e/install-action@v2 - with: - tool: nextest@0.9.99 + uses: taiki-e/install-action@nextest + - name: Install cargo-hack + uses: taiki-e/install-action@cargo-hack - uses: actions/setup-python@v5 with: python-version: "3.12" @@ -61,7 +58,12 @@ jobs: RUSTFLAGS: -Awarnings TEMPDIR: ${{ runner.temp }} run: | - cargo nextest run --all --no-default-features --cargo-profile build-fast + cargo nextest run --workspace --no-default-features --cargo-profile build-fast + - name: Check all features + env: + RUSTFLAGS: -Awarnings + run: | + cargo hack check --workspace --all-targets --each-feature --skip extension-module,default doc-test: if: ${{ !inputs.skip_tests }} name: "Doc tests" @@ -72,7 +74,10 @@ jobs: - os: ubuntu-latest steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v5 + with: + submodules: recursive + token: '${{ secrets.PULL_TOKEN }}' - uses: ./.github/actions/setup_rust name: Setup Rust - name: Install Protoc diff --git a/.github/workflows/test_ui.yml b/.github/workflows/test_ui.yml index ebe27b7e0b..54c7247bc0 100644 --- a/.github/workflows/test_ui.yml +++ b/.github/workflows/test_ui.yml @@ -18,23 +18,25 @@ jobs: strategy: matrix: python: ${{ fromJson(needs.select-strategy.outputs.python-versions) }} - os: [ ubuntu-latest ] - runs-on: '${{ matrix.os }}' + os: [ubuntu-latest] + runs-on: "${{ matrix.os }}" steps: - uses: actions/checkout@v5 name: Checkout with: ref: ${{ github.head_ref }} + submodules: recursive + token: "${{ secrets.PULL_TOKEN }}" - uses: ./.github/actions/setup_rust name: Setup Rust - - name: Setup SSH for UI tests submodule + - name: Setup SSH for UI tests run: | - mkdir -p ~/.ssh - echo "${{ secrets.UI_TESTS_SSH_KEY }}" > ~/.ssh/id_ed25519 - chmod 600 ~/.ssh/id_ed25519 + mkdir -p ~/.ssh + echo "${{ secrets.UI_TESTS_SSH_KEY }}" > ~/.ssh/id_ed25519 + chmod 600 ~/.ssh/id_ed25519 ssh-keyscan github.com >> ~/.ssh/known_hosts - - name: Init and update UI tests submodule - run: make pull-ui-tests + - name: Update UI tests + run: make update-ui-tests - name: Install Protoc uses: arduino/setup-protoc@v3 with: @@ -49,13 +51,13 @@ jobs: uses: actions/setup-python@v4 with: python-version: ${{ matrix.python }} - cache: 'pip' + cache: "pip" - name: Create python venv for Maturin run: python -m venv .venv - name: Setup Node.js - uses: actions/setup-node@v4 + uses: actions/setup-node@v6 with: - node-version: '20' + node-version: "20" - name: Build raphtory-python uses: PyO3/maturin-action@v1 with: @@ -80,4 +82,3 @@ jobs: name: playwright-report-with-py-${{ matrix.python }} path: ui-tests/playwright-report retention-days: 7 - diff --git a/.gitignore b/.gitignore index 0a0a442ff0..afd90f8332 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,7 @@ massif.* .zed/ .fleet/ **/proptest-regressions/ +**/*.proptest-regressions # these are generated by flamegraph *.svg # this is for raphtory diff --git a/.gitmodules b/.gitmodules index 83994a118e..0b8628a385 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,6 +1,6 @@ -[submodule "pometry-storage-private"] - path = pometry-storage-private - url = git@github.com:Pometry/pometry-storage.git -[submodule "ui-tests"] - path = ui-tests - url = git@github.com:Pometry/ui-tests.git +[submodule "optd"] + path = optd + url = git@github.com:Pometry/optd.git +[submodule "clam-core"] + path = clam-core + url = git@github.com:Pometry/optd-graph.git diff --git a/Cargo.lock b/Cargo.lock index 374836bda5..73b3a06f36 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,6 +1,6 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. -version = 3 +version = 4 [[package]] name = "Inflector" @@ -26,7 +26,7 @@ checksum = "b169f7a6d4742236a0a00c541b845991d0ac43e546831af1249753ab4c3aa3a0" dependencies = [ "cfg-if", "cipher", - "cpufeatures", + "cpufeatures 0.2.17", ] [[package]] @@ -97,9 +97,9 @@ checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" [[package]] name = "anstream" -version = "0.6.21" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a" +checksum = "824a212faf96e9acacdbd09febd34438f8f711fb84e09a8916013cd7815ca28d" dependencies = [ "anstyle", "anstyle-parse", @@ -112,15 +112,15 @@ dependencies = [ [[package]] name = "anstyle" -version = "1.0.13" +version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78" +checksum = "940b3a0ca603d1eade50a4846a2afffd5ef57a9feac2c0e2ec2e14f9ead76000" [[package]] name = "anstyle-parse" -version = "0.2.7" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2" +checksum = "52ce7f38b242319f7cabaa6813055467063ecdc9d355bbb4ce0c68908cd8130e" dependencies = [ "utf8parse", ] @@ -147,18 +147,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.100" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" - -[[package]] -name = "ar_archive_writer" -version = "0.2.0" +version = "1.0.102" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0c269894b6fe5e9d7ada0cf69b5bf847ff35bc25fc271f08e1d080fce80339a" -dependencies = [ - "object", -] +checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" [[package]] name = "arbitrary" @@ -171,9 +162,9 @@ dependencies = [ [[package]] name = "arc-swap" -version = "1.8.0" +version = "1.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51d03449bb8ca2cc2ef70869af31463d1ae5ccc8fa3e334b307203fbf815207e" +checksum = "6a3a1fd6f75306b68087b831f025c712524bcb19aad54e557b1129cfa0a2b207" dependencies = [ "rustversion", ] @@ -198,9 +189,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" -version = "56.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e833808ff2d94ed40d9379848a950d995043c7fb3e81a30b383f4c6033821cc" +checksum = "e4754a624e5ae42081f464514be454b39711daae0458906dacde5f4c632f33a8" dependencies = [ "arrow-arith", "arrow-array", @@ -219,23 +210,23 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "56.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad08897b81588f60ba983e3ca39bda2b179bdd84dced378e7df81a5313802ef8" +checksum = "f7b3141e0ec5145a22d8694ea8b6d6f69305971c4fa1c1a13ef0195aef2d678b" dependencies = [ "arrow-array", "arrow-buffer", "arrow-data", "arrow-schema", "chrono", - "num", + "num-traits", ] [[package]] name = "arrow-array" -version = "56.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8548ca7c070d8db9ce7aa43f37393e4bfcf3f2d3681df278490772fd1673d08d" +checksum = "4c8955af33b25f3b175ee10af580577280b4bd01f7e823d94c7cdef7cf8c9aef" dependencies = [ "ahash", "arrow-buffer", @@ -245,29 +236,33 @@ dependencies = [ "chrono-tz 0.10.4", "half", "hashbrown 0.16.1", - "num", + "num-complex", + "num-integer", + "num-traits", ] [[package]] name = "arrow-buffer" -version = "56.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e003216336f70446457e280807a73899dd822feaf02087d31febca1363e2fccc" +checksum = "c697ddca96183182f35b3a18e50b9110b11e916d7b7799cbfd4d34662f2c56c2" dependencies = [ "bytes", "half", - "num", + "num-bigint", + "num-traits", ] [[package]] name = "arrow-cast" -version = "56.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "919418a0681298d3a77d1a315f625916cb5678ad0d74b9c60108eb15fd083023" +checksum = "646bbb821e86fd57189c10b4fcdaa941deaf4181924917b0daa92735baa6ada5" dependencies = [ "arrow-array", "arrow-buffer", "arrow-data", + "arrow-ord", "arrow-schema", "arrow-select", "atoi", @@ -276,15 +271,15 @@ dependencies = [ "comfy-table", "half", "lexical-core", - "num", + "num-traits", "ryu", ] [[package]] name = "arrow-csv" -version = "56.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfa9bf02705b5cf762b6f764c65f04ae9082c7cfc4e96e0c33548ee3f67012eb" +checksum = "8da746f4180004e3ce7b83c977daf6394d768332349d3d913998b10a120b790a" dependencies = [ "arrow-array", "arrow-cast", @@ -297,21 +292,22 @@ dependencies = [ [[package]] name = "arrow-data" -version = "56.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5c64fff1d142f833d78897a772f2e5b55b36cb3e6320376f0961ab0db7bd6d0" +checksum = "1fdd994a9d28e6365aa78e15da3f3950c0fdcea6b963a12fa1c391afb637b304" dependencies = [ "arrow-buffer", "arrow-schema", "half", - "num", + "num-integer", + "num-traits", ] [[package]] name = "arrow-ipc" -version = "56.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d3594dcddccc7f20fd069bc8e9828ce37220372680ff638c5e00dea427d88f5" +checksum = "abf7df950701ab528bf7c0cf7eeadc0445d03ef5d6ffc151eaae6b38a58feff1" dependencies = [ "arrow-array", "arrow-buffer", @@ -319,15 +315,15 @@ dependencies = [ "arrow-schema", "arrow-select", "flatbuffers", - "lz4_flex", + "lz4_flex 0.12.1", "zstd", ] [[package]] name = "arrow-json" -version = "56.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88cf36502b64a127dc659e3b305f1d993a544eab0d48cce704424e62074dc04b" +checksum = "0ff8357658bedc49792b13e2e862b80df908171275f8e6e075c460da5ee4bf86" dependencies = [ "arrow-array", "arrow-buffer", @@ -336,20 +332,22 @@ dependencies = [ "arrow-schema", "chrono", "half", - "indexmap 2.13.0", + "indexmap 2.14.0", + "itoa", "lexical-core", "memchr", - "num", - "serde", + "num-traits", + "ryu", + "serde_core", "serde_json", "simdutf8", ] [[package]] name = "arrow-ord" -version = "56.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c8f82583eb4f8d84d4ee55fd1cb306720cddead7596edce95b50ee418edf66f" +checksum = "f7d8f1870e03d4cbed632959498bcc84083b5a24bded52905ae1695bd29da45b" dependencies = [ "arrow-array", "arrow-buffer", @@ -360,9 +358,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "56.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d07ba24522229d9085031df6b94605e0f4b26e099fb7cdeec37abd941a73753" +checksum = "18228633bad92bff92a95746bbeb16e5fc318e8382b75619dec26db79e4de4c0" dependencies = [ "arrow-array", "arrow-buffer", @@ -373,34 +371,34 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "56.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3aa9e59c611ebc291c28582077ef25c97f1975383f1479b12f3b9ffee2ffabe" +checksum = "8c872d36b7bf2a6a6a2b40de9156265f0242910791db366a2c17476ba8330d68" dependencies = [ "bitflags", - "serde", + "serde_core", "serde_json", ] [[package]] name = "arrow-select" -version = "56.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c41dbbd1e97bfcaee4fcb30e29105fb2c75e4d82ae4de70b792a5d3f66b2e7a" +checksum = "68bf3e3efbd1278f770d67e5dc410257300b161b93baedb3aae836144edcaf4b" dependencies = [ "ahash", "arrow-array", "arrow-buffer", "arrow-data", "arrow-schema", - "num", + "num-traits", ] [[package]] name = "arrow-string" -version = "56.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53f5183c150fbc619eede22b861ea7c0eebed8eaac0333eaa7f6da5205fd504d" +checksum = "85e968097061b3c0e9fe3079cf2e703e487890700546b5b0647f60fca1b5a8d8" dependencies = [ "arrow-array", "arrow-buffer", @@ -408,7 +406,7 @@ dependencies = [ "arrow-schema", "arrow-select", "memchr", - "num", + "num-traits", "regex", "regex-syntax", ] @@ -433,20 +431,14 @@ dependencies = [ [[package]] name = "async-compression" -version = "0.4.19" +version = "0.4.41" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06575e6a9673580f52661c92107baabffbf41e2141373441cbcdc47cb733003c" +checksum = "d0f9ee0f6e02ffd7ad5816e9464499fba7b3effd01123b515c41d1697c43dad1" dependencies = [ - "brotli 7.0.0", - "bzip2 0.5.2", - "flate2", - "futures-core", - "memchr", + "compression-codecs", + "compression-core", "pin-project-lite", "tokio", - "xz2", - "zstd", - "zstd-safe", ] [[package]] @@ -468,8 +460,8 @@ dependencies = [ "fnv", "futures-util", "handlebars", - "http 1.4.0", - "indexmap 2.13.0", + "http", + "indexmap 2.14.0", "mime", "multer", "num-traits", @@ -480,7 +472,7 @@ dependencies = [ "serde_urlencoded", "static_assertions_next", "tempfile", - "thiserror 2.0.17", + "thiserror 2.0.18", ] [[package]] @@ -496,8 +488,8 @@ dependencies = [ "proc-macro2", "quote", "strum 0.27.2", - "syn 2.0.114", - "thiserror 2.0.17", + "syn 2.0.117", + "thiserror 2.0.18", ] [[package]] @@ -514,13 +506,13 @@ dependencies = [ [[package]] name = "async-graphql-poem" -version = "7.1.0" +version = "7.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ff5480bfb998f5405f4ad3d861b69a0467e8667f9f4174e86e85bf896c1f8c9" +checksum = "5484e75de31fd28a2827010e5170b61a37bdb472467b3ba9efc5535c192da664" dependencies = [ "async-graphql", "futures-util", - "http 1.4.0", + "http", "mime", "poem", "serde_json", @@ -536,7 +528,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3e3ef112905abea9dea592fc868a6873b10ebd3f983e83308f995d6284e9ba41" dependencies = [ "bytes", - "indexmap 2.13.0", + "indexmap 2.14.0", "serde", "serde_json", ] @@ -554,7 +546,7 @@ dependencies = [ "futures-lite", "parking", "polling", - "rustix 1.1.3", + "rustix 1.1.4", "slab", "windows-sys 0.61.2", ] @@ -589,7 +581,7 @@ dependencies = [ "secrecy", "serde", "serde_json", - "thiserror 2.0.17", + "thiserror 2.0.18", "tokio", "tokio-stream", "tokio-util", @@ -604,7 +596,7 @@ checksum = "81872a8e595e8ceceab71c6ba1f9078e313b452a1e31934e6763ef5d308705e4" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -615,7 +607,7 @@ checksum = "3b43422f69d8ff38f95f1b2bb76517c91589a924d1559a0e935d7c8ce0274c11" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -637,7 +629,7 @@ checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -648,7 +640,7 @@ checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -691,53 +683,11 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" -[[package]] -name = "aws-config" -version = "1.8.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96571e6996817bf3d58f6b569e4b9fd2e9d2fcf9f7424eed07b2ce9bb87535e5" -dependencies = [ - "aws-credential-types", - "aws-runtime", - "aws-sdk-sso", - "aws-sdk-ssooidc", - "aws-sdk-sts", - "aws-smithy-async", - "aws-smithy-http", - "aws-smithy-json", - "aws-smithy-runtime", - "aws-smithy-runtime-api", - "aws-smithy-types", - "aws-types", - "bytes", - "fastrand", - "hex", - "http 1.4.0", - "ring", - "time", - "tokio", - "tracing", - "url", - "zeroize", -] - -[[package]] -name = "aws-credential-types" -version = "1.2.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3cd362783681b15d136480ad555a099e82ecd8e2d10a841e14dfd0078d67fee3" -dependencies = [ - "aws-smithy-async", - "aws-smithy-runtime-api", - "aws-smithy-types", - "zeroize", -] - [[package]] name = "aws-lc-rs" -version = "1.15.3" +version = "1.16.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e84ce723ab67259cfeb9877c6a639ee9eb7a27b28123abd71db7f0d5d0cc9d86" +checksum = "a054912289d18629dc78375ba2c3726a3afe3ff71b4edba9dedfca0e3446d1fc" dependencies = [ "aws-lc-sys", "untrusted 0.7.1", @@ -746,9 +696,9 @@ dependencies = [ [[package]] name = "aws-lc-sys" -version = "0.36.0" +version = "0.39.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43a442ece363113bd4bd4c8b18977a7798dd4d3c3383f34fb61936960e8f4ad8" +checksum = "83a25cf98105baa966497416dbd42565ce3a8cf8dbfd59803ec9ad46f3126399" dependencies = [ "cc", "cmake", @@ -756,325 +706,6 @@ dependencies = [ "fs_extra", ] -[[package]] -name = "aws-runtime" -version = "1.5.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "959dab27ce613e6c9658eb3621064d0e2027e5f2acb65bc526a43577facea557" -dependencies = [ - "aws-credential-types", - "aws-sigv4", - "aws-smithy-async", - "aws-smithy-http", - "aws-smithy-runtime", - "aws-smithy-runtime-api", - "aws-smithy-types", - "aws-types", - "bytes", - "fastrand", - "http 0.2.12", - "http-body 0.4.6", - "percent-encoding", - "pin-project-lite", - "tracing", - "uuid", -] - -[[package]] -name = "aws-sdk-dynamodb" -version = "1.103.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6df2a8b03419775bfaf4f3ebbb65a9772e9e69eed4467a1b33f22226722340fb" -dependencies = [ - "aws-credential-types", - "aws-runtime", - "aws-smithy-async", - "aws-smithy-http", - "aws-smithy-json", - "aws-smithy-observability", - "aws-smithy-runtime", - "aws-smithy-runtime-api", - "aws-smithy-types", - "aws-types", - "bytes", - "fastrand", - "http 0.2.12", - "regex-lite", - "tracing", -] - -[[package]] -name = "aws-sdk-sso" -version = "1.92.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7d63bd2bdeeb49aa3f9b00c15e18583503b778b2e792fc06284d54e7d5b6566" -dependencies = [ - "aws-credential-types", - "aws-runtime", - "aws-smithy-async", - "aws-smithy-http", - "aws-smithy-json", - "aws-smithy-observability", - "aws-smithy-runtime", - "aws-smithy-runtime-api", - "aws-smithy-types", - "aws-types", - "bytes", - "fastrand", - "http 0.2.12", - "regex-lite", - "tracing", -] - -[[package]] -name = "aws-sdk-ssooidc" -version = "1.94.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "532d93574bf731f311bafb761366f9ece345a0416dbcc273d81d6d1a1205239b" -dependencies = [ - "aws-credential-types", - "aws-runtime", - "aws-smithy-async", - "aws-smithy-http", - "aws-smithy-json", - "aws-smithy-observability", - "aws-smithy-runtime", - "aws-smithy-runtime-api", - "aws-smithy-types", - "aws-types", - "bytes", - "fastrand", - "http 0.2.12", - "regex-lite", - "tracing", -] - -[[package]] -name = "aws-sdk-sts" -version = "1.96.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "357e9a029c7524db6a0099cd77fbd5da165540339e7296cca603531bc783b56c" -dependencies = [ - "aws-credential-types", - "aws-runtime", - "aws-smithy-async", - "aws-smithy-http", - "aws-smithy-json", - "aws-smithy-observability", - "aws-smithy-query", - "aws-smithy-runtime", - "aws-smithy-runtime-api", - "aws-smithy-types", - "aws-smithy-xml", - "aws-types", - "fastrand", - "http 0.2.12", - "regex-lite", - "tracing", -] - -[[package]] -name = "aws-sigv4" -version = "1.3.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69e523e1c4e8e7e8ff219d732988e22bfeae8a1cafdbe6d9eca1546fa080be7c" -dependencies = [ - "aws-credential-types", - "aws-smithy-http", - "aws-smithy-runtime-api", - "aws-smithy-types", - "bytes", - "form_urlencoded", - "hex", - "hmac", - "http 0.2.12", - "http 1.4.0", - "percent-encoding", - "sha2", - "time", - "tracing", -] - -[[package]] -name = "aws-smithy-async" -version = "1.2.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ee19095c7c4dda59f1697d028ce704c24b2d33c6718790c7f1d5a3015b4107c" -dependencies = [ - "futures-util", - "pin-project-lite", - "tokio", -] - -[[package]] -name = "aws-smithy-http" -version = "0.62.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "826141069295752372f8203c17f28e30c464d22899a43a0c9fd9c458d469c88b" -dependencies = [ - "aws-smithy-runtime-api", - "aws-smithy-types", - "bytes", - "bytes-utils", - "futures-core", - "futures-util", - "http 0.2.12", - "http 1.4.0", - "http-body 0.4.6", - "percent-encoding", - "pin-project-lite", - "pin-utils", - "tracing", -] - -[[package]] -name = "aws-smithy-http-client" -version = "1.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "59e62db736db19c488966c8d787f52e6270be565727236fd5579eaa301e7bc4a" -dependencies = [ - "aws-smithy-async", - "aws-smithy-runtime-api", - "aws-smithy-types", - "h2 0.3.27", - "h2 0.4.13", - "http 0.2.12", - "http 1.4.0", - "http-body 0.4.6", - "hyper 0.14.32", - "hyper 1.8.1", - "hyper-rustls 0.24.2", - "hyper-rustls 0.27.7", - "hyper-util", - "pin-project-lite", - "rustls 0.21.12", - "rustls 0.23.36", - "rustls-native-certs 0.8.3", - "rustls-pki-types", - "tokio", - "tokio-rustls 0.26.4", - "tower 0.5.3", - "tracing", -] - -[[package]] -name = "aws-smithy-json" -version = "0.61.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49fa1213db31ac95288d981476f78d05d9cbb0353d22cdf3472cc05bb02f6551" -dependencies = [ - "aws-smithy-types", -] - -[[package]] -name = "aws-smithy-observability" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef1fcbefc7ece1d70dcce29e490f269695dfca2d2bacdeaf9e5c3f799e4e6a42" -dependencies = [ - "aws-smithy-runtime-api", -] - -[[package]] -name = "aws-smithy-query" -version = "0.60.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae5d689cf437eae90460e944a58b5668530d433b4ff85789e69d2f2a556e057d" -dependencies = [ - "aws-smithy-types", - "urlencoding", -] - -[[package]] -name = "aws-smithy-runtime" -version = "1.9.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bb5b6167fcdf47399024e81ac08e795180c576a20e4d4ce67949f9a88ae37dc1" -dependencies = [ - "aws-smithy-async", - "aws-smithy-http", - "aws-smithy-http-client", - "aws-smithy-observability", - "aws-smithy-runtime-api", - "aws-smithy-types", - "bytes", - "fastrand", - "http 0.2.12", - "http 1.4.0", - "http-body 0.4.6", - "http-body 1.0.1", - "pin-project-lite", - "pin-utils", - "tokio", - "tracing", -] - -[[package]] -name = "aws-smithy-runtime-api" -version = "1.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "efce7aaaf59ad53c5412f14fc19b2d5c6ab2c3ec688d272fd31f76ec12f44fb0" -dependencies = [ - "aws-smithy-async", - "aws-smithy-types", - "bytes", - "http 0.2.12", - "http 1.4.0", - "pin-project-lite", - "tokio", - "tracing", - "zeroize", -] - -[[package]] -name = "aws-smithy-types" -version = "1.4.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2b1117b3b2bbe166d11199b540ceed0d0f7676e36e7b962b5a437a9971eac75" -dependencies = [ - "base64-simd", - "bytes", - "bytes-utils", - "futures-core", - "http 0.2.12", - "http 1.4.0", - "http-body 0.4.6", - "http-body 1.0.1", - "http-body-util", - "itoa", - "num-integer", - "pin-project-lite", - "pin-utils", - "ryu", - "serde", - "time", - "tokio", - "tokio-util", -] - -[[package]] -name = "aws-smithy-xml" -version = "0.60.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "11b2f670422ff42bf7065031e72b45bc52a3508bd089f743ea90731ca2b6ea57" -dependencies = [ - "xmlparser", -] - -[[package]] -name = "aws-types" -version = "1.3.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d980627d2dd7bfc32a3c025685a033eeab8d365cc840c631ef59d1b8f428164" -dependencies = [ - "aws-credential-types", - "aws-smithy-async", - "aws-smithy-runtime-api", - "aws-smithy-types", - "rustc_version", - "tracing", -] - [[package]] name = "axum" version = "0.7.9" @@ -1085,8 +716,8 @@ dependencies = [ "axum-core 0.4.5", "bytes", "futures-util", - "http 1.4.0", - "http-body 1.0.1", + "http", + "http-body", "http-body-util", "itoa", "matchit 0.7.3", @@ -1104,18 +735,18 @@ dependencies = [ [[package]] name = "axum" -version = "0.8.8" +version = "0.8.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b52af3cb4058c895d37317bb27508dccc8e5f2d39454016b297bf4a400597b8" +checksum = "31b698c5f9a010f6573133b09e0de5408834d0c82f8d7475a89fc1867a71cd90" dependencies = [ "axum-core 0.5.6", "bytes", "form_urlencoded", "futures-util", - "http 1.4.0", - "http-body 1.0.1", + "http", + "http-body", "http-body-util", - "hyper 1.8.1", + "hyper", "hyper-util", "itoa", "matchit 0.8.4", @@ -1144,8 +775,8 @@ dependencies = [ "async-trait", "bytes", "futures-util", - "http 1.4.0", - "http-body 1.0.1", + "http", + "http-body", "http-body-util", "mime", "pin-project-lite", @@ -1163,8 +794,8 @@ checksum = "08c78f31d7b1291f7ee735c1c6780ccde7785daae9a9206026862dab7d8792d1" dependencies = [ "bytes", "futures-core", - "http 1.4.0", - "http-body 1.0.1", + "http", + "http-body", "http-body-util", "mime", "pin-project-lite", @@ -1188,17 +819,6 @@ dependencies = [ "tokio", ] -[[package]] -name = "backon" -version = "1.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cffb0e931875b666fc4fcb20fee52e9bbd1ef836fd9e9e04ec21555f9f85f7ef" -dependencies = [ - "fastrand", - "gloo-timers", - "tokio", -] - [[package]] name = "base64" version = "0.21.7" @@ -1220,22 +840,6 @@ dependencies = [ "byteorder", ] -[[package]] -name = "base64-simd" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "339abbe78e73178762e23bea9dfd08e697eb3f3301cd4be981c0f78ba5859195" -dependencies = [ - "outref", - "vsimd", -] - -[[package]] -name = "base64ct" -version = "1.8.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2af50177e190e07a26ab74f8b1efbfe2ef87da2116221318cb1c2e82baf7de06" - [[package]] name = "bigdecimal" version = "0.4.10" @@ -1259,6 +863,26 @@ dependencies = [ "serde", ] +[[package]] +name = "bincode" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36eaf5d7b090263e8150820482d5d93cd964a81e4019913c972f4edcc6edb740" +dependencies = [ + "bincode_derive", + "serde", + "unty", +] + +[[package]] +name = "bincode_derive" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf95709a440f45e986983918d0e8a1f30a9b1df04918fc828670606804ac3c09" +dependencies = [ + "virtue", +] + [[package]] name = "bit-set" version = "0.8.0" @@ -1276,9 +900,9 @@ checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7" [[package]] name = "bitflags" -version = "2.10.0" +version = "2.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3" +checksum = "c4512299f36f043ab09a583e57bceb5a5aab7a73db1805848e8fef3c9e8c78b3" dependencies = [ "serde_core", ] @@ -1315,16 +939,16 @@ dependencies = [ [[package]] name = "blake3" -version = "1.8.3" +version = "1.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2468ef7d57b3fb7e16b576e8377cdbde2320c60e1491e961d11da40fc4f02a2d" +checksum = "4d2d5991425dfd0785aed03aedcf0b321d61975c9b5b3689c774a2610ae0b51e" dependencies = [ "arrayref", "arrayvec", "cc", "cfg-if", "constant_time_eq 0.4.2", - "cpufeatures", + "cpufeatures 0.3.0", ] [[package]] @@ -1336,20 +960,11 @@ dependencies = [ "generic-array", ] -[[package]] -name = "block-padding" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8894febbff9f758034a5b8e12d87918f56dfc64a8e1fe757d65e29041538d93" -dependencies = [ - "generic-array", -] - [[package]] name = "bon" -version = "3.8.2" +version = "3.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "234655ec178edd82b891e262ea7cf71f6584bcd09eff94db786be23f1821825c" +checksum = "f47dbe92550676ee653353c310dfb9cf6ba17ee70396e1f7cf0a2020ad49b2fe" dependencies = [ "bon-macros", "rustversion", @@ -1357,9 +972,9 @@ dependencies = [ [[package]] name = "bon-macros" -version = "3.8.2" +version = "3.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89ec27229c38ed0eb3c0feee3d2c1d6a4379ae44f418a29a658890e062d8f365" +checksum = "519bd3116aeeb42d5372c29d982d16d0170d3d4a5ed85fc7dd91642ffff3c67c" dependencies = [ "darling 0.23.0", "ident_case", @@ -1367,19 +982,14 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] -name = "brotli" -version = "7.0.0" +name = "boxcar" +version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc97b8f16f944bba54f0433f07e30be199b6dc2bd25937444bbad560bcea29bd" -dependencies = [ - "alloc-no-stdlib", - "alloc-stdlib", - "brotli-decompressor 4.0.3", -] +checksum = "36f64beae40a84da1b4b26ff2761a5b895c12adc41dc25aaee1c4f2bbfe97a6e" [[package]] name = "brotli" @@ -1389,17 +999,7 @@ checksum = "4bd8b9603c7aa97359dbd97ecf258968c95f3adddd6db2f7e7a5bef101c84560" dependencies = [ "alloc-no-stdlib", "alloc-stdlib", - "brotli-decompressor 5.0.0", -] - -[[package]] -name = "brotli-decompressor" -version = "4.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a334ef7c9e23abf0ce748e8cd309037da93e606ad52eb372e4ce327a0dcfbdfd" -dependencies = [ - "alloc-no-stdlib", - "alloc-stdlib", + "brotli-decompressor", ] [[package]] @@ -1414,15 +1014,15 @@ dependencies = [ [[package]] name = "bumpalo" -version = "3.19.1" +version = "3.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5dd9dc738b7a8311c7ade152424974d8115f2cdad61e8dab8dac9f2362298510" +checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb" [[package]] name = "bytemuck" -version = "1.24.0" +version = "1.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fbdf580320f38b612e485521afda1ee26d10cc9884efaaa750d383e13e3c5f4" +checksum = "c8efb64bd706a16a1bdde310ae86b351e4d21550d98d056f22f8a7f7a2183fec" dependencies = [ "bytemuck_derive", ] @@ -1435,7 +1035,7 @@ checksum = "f9abbd1bc6865053c427f7198e6af43bfdedc55ab791faed4fbd361d789575ff" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -1453,16 +1053,6 @@ dependencies = [ "serde", ] -[[package]] -name = "bytes-utils" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7dafe3a8757b027e2be6e4e5601ed563c55989fcf1546e933c66c8eb3a058d35" -dependencies = [ - "bytes", - "either", -] - [[package]] name = "bzip2" version = "0.4.4" @@ -1482,15 +1072,6 @@ dependencies = [ "bzip2-sys", ] -[[package]] -name = "bzip2" -version = "0.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3a53fac24f34a81bc9954b5d6cfce0c21e18ec6959f44f56e8e90e4bb7c346c" -dependencies = [ - "libbz2-rs-sys", -] - [[package]] name = "bzip2-sys" version = "0.1.13+1.0.8" @@ -1507,20 +1088,11 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" -[[package]] -name = "cbc" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26b52a9543ae338f279b96b0b9fed9c8093744685043739079ce85cd58f289a6" -dependencies = [ - "cipher", -] - [[package]] name = "cc" -version = "1.2.52" +version = "1.2.60" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd4932aefd12402b36c60956a4fe0035421f544799057659ff86f923657aada3" +checksum = "43c5703da9466b66a946814e1adf53ea2c90f10063b86290cc9eb67ce3478a20" dependencies = [ "find-msvc-tools", "jobserver", @@ -1548,9 +1120,9 @@ checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" [[package]] name = "chrono" -version = "0.4.42" +version = "0.4.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "145052bdd345b87320e369255277e3fb5152762ad123a901ef5c262dd38fe8d2" +checksum = "c673075a2e0e5f4a1dde27ce9dee1ea4558c7ffe648f576438a20ca1d2acc4b0" dependencies = [ "iana-time-zone", "js-sys", @@ -1629,11 +1201,55 @@ dependencies = [ "inout", ] +[[package]] +name = "clam-core" +version = "0.18.0" +dependencies = [ + "ahash", + "arrow", + "async-trait", + "chrono", + "chrono-tz 0.10.4", + "comfy-table", + "criterion", + "db4-storage", + "env_logger 0.10.2", + "fastrand", + "flate2", + "insta", + "itertools 0.13.0", + "log", + "nom 7.1.3", + "optd-core", + "parking_lot", + "proptest", + "pyo3", + "raphtory", + "raphtory-api", + "raphtory-itertools", + "raphtory-storage", + "rayon", + "regex", + "roaring 0.10.12", + "rustyline", + "serde", + "serde_json", + "slotmap", + "snb", + "tempfile", + "thiserror 2.0.18", + "tikv-jemallocator", + "tokio", + "tracing", + "tracing-test", + "uuid", +] + [[package]] name = "clap" -version = "4.5.54" +version = "4.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c6e6ff9dcd79cff5cd969a17a545d79e84ab086e444102a591e288a8aa3ce394" +checksum = "b193af5b67834b676abd72466a96c1024e6a6ad978a1f484bd90b85c94041351" dependencies = [ "clap_builder", "clap_derive", @@ -1641,9 +1257,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.54" +version = "4.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa42cf4d2b7a41bc8f663a7cab4031ebafa1bf3875705bfaf8466dc60ab52c00" +checksum = "714a53001bf66416adb0e2ef5ac857140e7dc3a0c48fb28b2f10762fc4b5069f" dependencies = [ "anstream", "anstyle", @@ -1653,48 +1269,77 @@ dependencies = [ [[package]] name = "clap_derive" -version = "4.5.49" +version = "4.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a0b5487afeab2deb2ff4e03a807ad1a03ac532ff5a2cee5d86884440c7f7671" +checksum = "1110bd8a634a1ab8cb04345d8d878267d57c3cf1b38d91b71af6686408bbca6a" dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] name = "clap_lex" -version = "0.7.7" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8d4a3bb8b1e0c1050499d1815f5ab16d04f0959b233085fb31653fbfc9d98f9" + +[[package]] +name = "clipboard-win" +version = "5.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3e64b0cc0439b12df2fa678eae89a1c56a529fd067a9115f7827f1fffd22b32" +checksum = "bde03770d3df201d4fb868f2c9c59e66a3e4e2bd06692a0fe701e7103c7e84d4" +dependencies = [ + "error-code", +] [[package]] name = "cmake" -version = "0.1.57" +version = "0.1.58" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75443c44cd6b379beb8c5b45d85d0773baf31cce901fe7bb252f4eff3008ef7d" +checksum = "c0f78a02292a74a88ac736019ab962ece0bc380e3f977bf72e376c5d78ff0678" dependencies = [ "cc", ] [[package]] name = "colorchoice" -version = "1.0.4" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" +checksum = "1d07550c9036bf2ae0c684c4297d503f838287c83c53686d05370d0e139ae570" [[package]] name = "comfy-table" -version = "7.1.2" +version = "7.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0d05af1e006a2407bedef5af410552494ce5be9090444dbbcb57258c1af3d56" +checksum = "958c5d6ecf1f214b4c2bbbbf6ab9523a864bd136dcf71a7e8904799acfe1ad47" dependencies = [ - "strum 0.26.3", - "strum_macros 0.26.4", - "unicode-width", + "crossterm", + "unicode-segmentation", + "unicode-width 0.2.2", +] + +[[package]] +name = "compression-codecs" +version = "0.4.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb7b51a7d9c967fc26773061ba86150f19c50c0d65c887cb1fbe295fd16619b7" +dependencies = [ + "brotli", + "compression-core", + "flate2", + "memchr", + "zstd", + "zstd-safe", ] +[[package]] +name = "compression-core" +version = "0.4.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75984efb6ed102a0d42db99afb6c1948f0380d1d91808d5529916e6c08b49d8d" + [[package]] name = "concurrent-queue" version = "2.5.0" @@ -1716,13 +1361,24 @@ dependencies = [ "nom 7.1.3", "pathdiff", "ron", - "rust-ini 0.20.0", + "rust-ini", "serde", "serde_json", "toml", "yaml-rust2", ] +[[package]] +name = "console" +version = "0.16.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d64e8af5551369d19cf50138de61f1c42074ab970f74e99be916646777f8fc87" +dependencies = [ + "encode_unicode", + "libc", + "windows-sys 0.61.2", +] + [[package]] name = "const-oid" version = "0.9.6" @@ -1805,6 +1461,15 @@ dependencies = [ "libc", ] +[[package]] +name = "cpufeatures" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b2a41393f66f16b0823bb79094d54ac5fbd34ab292ddafb9a0456ac9f87d201" +dependencies = [ + "libc", +] + [[package]] name = "crc" version = "3.4.0" @@ -1820,15 +1485,6 @@ version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "19d374276b40fb8bbdee95aef7c7fa6b5316ec764510eb64b8dd0e2ed0d7e7f5" -[[package]] -name = "crc32c" -version = "0.6.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a47af21622d091a8f0fb295b88bc886ac74efcc613efc19f5d0b21de5c89e47" -dependencies = [ - "rustc_version", -] - [[package]] name = "crc32fast" version = "1.5.0" @@ -1911,12 +1567,45 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "crossbeam-skiplist" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df29de440c58ca2cc6e587ec3d22347551a32435fbde9d2bff64e78a9ffa151b" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + [[package]] name = "crossbeam-utils" version = "0.8.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" +[[package]] +name = "crossterm" +version = "0.29.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d8b9f2e4c67f833b660cdb0a3523065869fb35570177239812ed4c905aeff87b" +dependencies = [ + "bitflags", + "crossterm_winapi", + "document-features", + "parking_lot", + "rustix 1.1.4", + "winapi", +] + +[[package]] +name = "crossterm_winapi" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acdd7c62a3665c7f6830a51635d9ac9b23ed385797f70a83bb8bafe9c572ab2b" +dependencies = [ + "winapi", +] + [[package]] name = "crunchy" version = "0.2.4" @@ -1925,9 +1614,9 @@ checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" [[package]] name = "crypto-common" -version = "0.1.7" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a" +checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" dependencies = [ "generic-array", "typenum", @@ -1964,16 +1653,6 @@ dependencies = [ "darling_macro 0.20.11", ] -[[package]] -name = "darling" -version = "0.21.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9cdf337090841a411e2a7f3deb9187445851f91b309c0c0a29e05f74a00a48c0" -dependencies = [ - "darling_core 0.21.3", - "darling_macro 0.21.3", -] - [[package]] name = "darling" version = "0.23.0" @@ -1995,21 +1674,7 @@ dependencies = [ "proc-macro2", "quote", "strsim", - "syn 2.0.114", -] - -[[package]] -name = "darling_core" -version = "0.21.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1247195ecd7e3c85f83c8d2a366e4210d588e802133e1e355180a9870b517ea4" -dependencies = [ - "fnv", - "ident_case", - "proc-macro2", - "quote", - "strsim", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -2022,7 +1687,7 @@ dependencies = [ "proc-macro2", "quote", "strsim", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -2033,18 +1698,7 @@ checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead" dependencies = [ "darling_core 0.20.11", "quote", - "syn 2.0.114", -] - -[[package]] -name = "darling_macro" -version = "0.21.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d38308df82d1080de0afee5d069fa14b0326a88c14f15c5ccda35b4a6c414c81" -dependencies = [ - "darling_core 0.21.3", - "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -2055,7 +1709,7 @@ checksum = "ac3984ec7bd6cfa798e62b4a642426a5be0e68f9401cfc2a01e3fa9ea2fcdb8d" dependencies = [ "darling_core 0.23.0", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -2082,25 +1736,23 @@ checksum = "d7a1e2f27636f116493b8b860f5546edb47c8d8f8ea73e1d2a20be88e28d1fea" [[package]] name = "datafusion" -version = "50.3.0" +version = "52.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2af15bb3c6ffa33011ef579f6b0bcbe7c26584688bd6c994f548e44df67f011a" +checksum = "7541353e77dc7262b71ca27be07d8393661737e3a73b5d1b1c6f7d814c64fa2a" dependencies = [ "arrow", - "arrow-ipc", "arrow-schema", "async-trait", "bytes", - "bzip2 0.6.1", "chrono", "datafusion-catalog", "datafusion-catalog-listing", "datafusion-common", "datafusion-common-runtime", "datafusion-datasource", + "datafusion-datasource-arrow", "datafusion-datasource-csv", "datafusion-datasource-json", - "datafusion-datasource-parquet", "datafusion-execution", "datafusion-expr", "datafusion-expr-common", @@ -2117,29 +1769,25 @@ dependencies = [ "datafusion-physical-plan", "datafusion-session", "datafusion-sql", - "flate2", "futures", "itertools 0.14.0", "log", "object_store", "parking_lot", - "parquet", - "rand 0.9.2", + "rand 0.9.4", "regex", "sqlparser", "tempfile", "tokio", "url", "uuid", - "xz2", - "zstd", ] [[package]] name = "datafusion-catalog" -version = "50.3.0" +version = "52.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "187622262ad8f7d16d3be9202b4c1e0116f1c9aa387e5074245538b755261621" +checksum = "9997731f90fa5398ef831ad0e69600f92c861b79c0d38bd1a29b6f0e3a0ce4c8" dependencies = [ "arrow", "async-trait", @@ -2152,7 +1800,6 @@ dependencies = [ "datafusion-physical-expr", "datafusion-physical-plan", "datafusion-session", - "datafusion-sql", "futures", "itertools 0.14.0", "log", @@ -2163,9 +1810,9 @@ dependencies = [ [[package]] name = "datafusion-catalog-listing" -version = "50.3.0" +version = "52.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9657314f0a32efd0382b9a46fdeb2d233273ece64baa68a7c45f5a192daf0f83" +checksum = "2b30a3dd50dec860c9559275c8d97d9de602e611237a6ecfbda0b3b63b872352" dependencies = [ "arrow", "async-trait", @@ -2175,35 +1822,32 @@ dependencies = [ "datafusion-execution", "datafusion-expr", "datafusion-physical-expr", + "datafusion-physical-expr-adapter", "datafusion-physical-expr-common", "datafusion-physical-plan", - "datafusion-session", "futures", + "itertools 0.14.0", "log", "object_store", - "tokio", ] [[package]] name = "datafusion-common" -version = "50.3.0" +version = "52.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a83760d9a13122d025fbdb1d5d5aaf93dd9ada5e90ea229add92aa30898b2d1" +checksum = "d551054acec0398ca604512310b77ce05c46f66e54b54d48200a686e385cca4e" dependencies = [ "ahash", "arrow", "arrow-ipc", - "base64 0.22.1", "chrono", "half", - "hashbrown 0.14.5", - "indexmap 2.13.0", + "hashbrown 0.16.1", + "indexmap 2.14.0", "libc", "log", "object_store", - "parquet", "paste", - "recursive", "sqlparser", "tokio", "web-time", @@ -2211,9 +1855,9 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" -version = "50.3.0" +version = "52.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b6234a6c7173fe5db1c6c35c01a12b2aa0f803a3007feee53483218817f8b1e" +checksum = "567d40e285f5b79f8737b576605721cd6c1133b5d2b00bdbd5d9838d90d0812f" dependencies = [ "futures", "log", @@ -2222,15 +1866,13 @@ dependencies = [ [[package]] name = "datafusion-datasource" -version = "50.3.0" +version = "52.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7256c9cb27a78709dd42d0c80f0178494637209cac6e29d5c93edd09b6721b86" +checksum = "27d2668f51b3b30befae2207472569e37807fdedd1d14da58acc6f8ca6257eae" dependencies = [ "arrow", - "async-compression", "async-trait", "bytes", - "bzip2 0.6.1", "chrono", "datafusion-common", "datafusion-common-runtime", @@ -2241,119 +1883,100 @@ dependencies = [ "datafusion-physical-expr-common", "datafusion-physical-plan", "datafusion-session", - "flate2", "futures", "glob", "itertools 0.14.0", "log", "object_store", - "parquet", - "rand 0.9.2", - "tempfile", + "rand 0.9.4", "tokio", - "tokio-util", "url", - "xz2", - "zstd", ] [[package]] -name = "datafusion-datasource-csv" -version = "50.3.0" +name = "datafusion-datasource-arrow" +version = "52.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64533a90f78e1684bfb113d200b540f18f268134622d7c96bbebc91354d04825" +checksum = "e02e1b3e3a8ec55f1f62de4252b0407c8567363d056078769a197e24fc834a0f" dependencies = [ "arrow", + "arrow-ipc", "async-trait", "bytes", - "datafusion-catalog", "datafusion-common", "datafusion-common-runtime", "datafusion-datasource", "datafusion-execution", "datafusion-expr", - "datafusion-physical-expr", "datafusion-physical-expr-common", "datafusion-physical-plan", "datafusion-session", "futures", + "itertools 0.14.0", "object_store", - "regex", "tokio", ] [[package]] -name = "datafusion-datasource-json" -version = "50.3.0" +name = "datafusion-datasource-csv" +version = "52.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d7ebeb12c77df0aacad26f21b0d033aeede423a64b2b352f53048a75bf1d6e6" +checksum = "b559d7bf87d4f900f847baba8509634f838d9718695389e903604cdcccdb01f3" dependencies = [ "arrow", "async-trait", "bytes", - "datafusion-catalog", "datafusion-common", "datafusion-common-runtime", "datafusion-datasource", "datafusion-execution", "datafusion-expr", - "datafusion-physical-expr", "datafusion-physical-expr-common", "datafusion-physical-plan", "datafusion-session", "futures", "object_store", - "serde_json", + "regex", "tokio", ] [[package]] -name = "datafusion-datasource-parquet" -version = "50.3.0" +name = "datafusion-datasource-json" +version = "52.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09e783c4c7d7faa1199af2df4761c68530634521b176a8d1331ddbc5a5c75133" +checksum = "250e2d7591ba8b638f063854650faa40bca4e8bd4059b2ece8836f6388d02db4" dependencies = [ "arrow", "async-trait", "bytes", - "datafusion-catalog", "datafusion-common", "datafusion-common-runtime", "datafusion-datasource", "datafusion-execution", "datafusion-expr", - "datafusion-functions-aggregate", - "datafusion-physical-expr", - "datafusion-physical-expr-adapter", "datafusion-physical-expr-common", - "datafusion-physical-optimizer", "datafusion-physical-plan", - "datafusion-pruning", "datafusion-session", "futures", - "itertools 0.14.0", - "log", "object_store", - "parking_lot", - "parquet", - "rand 0.9.2", "tokio", ] [[package]] name = "datafusion-doc" -version = "50.3.0" +version = "52.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99ee6b1d9a80d13f9deb2291f45c07044b8e62fb540dbde2453a18be17a36429" +checksum = "b9496cb0db222dbb9a3735760ceca7fc56f35e1d5502c38d0caa77a81e9c1f6a" [[package]] name = "datafusion-execution" -version = "50.3.0" +version = "52.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4cec0a57653bec7b933fb248d3ffa3fa3ab3bd33bd140dc917f714ac036f531" +checksum = "dc45d23c516ed8d3637751e44e09e21b45b3f58b473c802dddd1f1ad4fe435ff" dependencies = [ "arrow", "async-trait", + "chrono", "dashmap", "datafusion-common", "datafusion-expr", @@ -2361,16 +1984,16 @@ dependencies = [ "log", "object_store", "parking_lot", - "rand 0.9.2", + "rand 0.9.4", "tempfile", "url", ] [[package]] name = "datafusion-expr" -version = "50.3.0" +version = "52.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef76910bdca909722586389156d0aa4da4020e1631994d50fadd8ad4b1aa05fe" +checksum = "63dd30526d2db4fda6440806a41e4676334a94bc0596cc9cc2a0efed20ef2c44" dependencies = [ "arrow", "async-trait", @@ -2381,31 +2004,31 @@ dependencies = [ "datafusion-functions-aggregate-common", "datafusion-functions-window-common", "datafusion-physical-expr-common", - "indexmap 2.13.0", + "indexmap 2.14.0", + "itertools 0.14.0", "paste", - "recursive", "serde_json", "sqlparser", ] [[package]] name = "datafusion-expr-common" -version = "50.3.0" +version = "52.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d155ccbda29591ca71a1344dd6bed26c65a4438072b400df9db59447f590bb6" +checksum = "1b486b5f6255d40976b88bb83813b0d035a8333e0ec39864824e78068cf42fa6" dependencies = [ "arrow", "datafusion-common", - "indexmap 2.13.0", + "indexmap 2.14.0", "itertools 0.14.0", "paste", ] [[package]] name = "datafusion-functions" -version = "50.3.0" +version = "52.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7de2782136bd6014670fd84fe3b0ca3b3e4106c96403c3ae05c0598577139977" +checksum = "07356c94118d881130dd0ffbff127540407d969c8978736e324edcd6c41cd48f" dependencies = [ "arrow", "arrow-buffer", @@ -2413,6 +2036,7 @@ dependencies = [ "blake2", "blake3", "chrono", + "chrono-tz 0.10.4", "datafusion-common", "datafusion-doc", "datafusion-execution", @@ -2423,7 +2047,8 @@ dependencies = [ "itertools 0.14.0", "log", "md-5", - "rand 0.9.2", + "num-traits", + "rand 0.9.4", "regex", "sha2", "unicode-segmentation", @@ -2432,9 +2057,9 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" -version = "50.3.0" +version = "52.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07331fc13603a9da97b74fd8a273f4238222943dffdbbed1c4c6f862a30105bf" +checksum = "b644f9cf696df9233ce6958b9807666d78563b56f923267474dd6c07795f1f8f" dependencies = [ "ahash", "arrow", @@ -2453,9 +2078,9 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate-common" -version = "50.3.0" +version = "52.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5951e572a8610b89968a09b5420515a121fbc305c0258651f318dc07c97ab17" +checksum = "c1de2deaaabe8923ce9ea9f29c47bbb4ee14f67ea2fe1ab5398d9bbebcf86e56" dependencies = [ "ahash", "arrow", @@ -2466,9 +2091,9 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" -version = "50.3.0" +version = "52.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fdacca9302c3d8fc03f3e94f338767e786a88a33f5ebad6ffc0e7b50364b9ea3" +checksum = "552f8d92e4331ee91d23c02d12bb6acf32cbfd5215117e01c0fb63cd4b15af1a" dependencies = [ "arrow", "arrow-ord", @@ -2476,6 +2101,7 @@ dependencies = [ "datafusion-doc", "datafusion-execution", "datafusion-expr", + "datafusion-expr-common", "datafusion-functions", "datafusion-functions-aggregate", "datafusion-functions-aggregate-common", @@ -2488,9 +2114,9 @@ dependencies = [ [[package]] name = "datafusion-functions-table" -version = "50.3.0" +version = "52.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c37ff8a99434fbbad604a7e0669717c58c7c4f14c472d45067c4b016621d981" +checksum = "970fd0cdd3df8802b9a9975ff600998289ba9d46682a4f7285cba4820c9ada78" dependencies = [ "arrow", "async-trait", @@ -2504,9 +2130,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window" -version = "50.3.0" +version = "52.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48e2aea7c79c926cffabb13dc27309d4eaeb130f4a21c8ba91cdd241c813652b" +checksum = "40b4c21a7c8a986a1866c0a87ab756d0bbf7b5f41f306009fa2d9af79c52ed31" dependencies = [ "arrow", "datafusion-common", @@ -2522,9 +2148,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" -version = "50.3.0" +version = "52.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fead257ab5fd2ffc3b40fda64da307e20de0040fe43d49197241d9de82a487f" +checksum = "b1210ad73b8b3211aeaf4a42bef9bd7a2b7fce3ec119a478831f18c6ff7f7b93" dependencies = [ "datafusion-common", "datafusion-physical-expr-common", @@ -2532,20 +2158,20 @@ dependencies = [ [[package]] name = "datafusion-macros" -version = "50.3.0" +version = "52.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec6f637bce95efac05cdfb9b6c19579ed4aa5f6b94d951cfa5bb054b7bb4f730" +checksum = "aaa566a963013a38681ad82a727a654bc7feb19632426aea8c3412d415d200c5" dependencies = [ - "datafusion-expr", + "datafusion-doc", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] name = "datafusion-optimizer" -version = "50.3.0" +version = "52.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c6583ef666ae000a613a837e69e456681a9faa96347bf3877661e9e89e141d8a" +checksum = "ff9aa82b240252a88dee118372f9b9757c545ab9e53c0736bebab2e7da0ef1f2" dependencies = [ "arrow", "chrono", @@ -2553,19 +2179,18 @@ dependencies = [ "datafusion-expr", "datafusion-expr-common", "datafusion-physical-expr", - "indexmap 2.13.0", + "indexmap 2.14.0", "itertools 0.14.0", "log", - "recursive", "regex", "regex-syntax", ] [[package]] name = "datafusion-physical-expr" -version = "50.3.0" +version = "52.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8668103361a272cbbe3a61f72eca60c9b7c706e87cc3565bcf21e2b277b84f6" +checksum = "7d48022b8af9988c1d852644f9e8b5584c490659769a550c5e8d39457a1da0a5" dependencies = [ "ahash", "arrow", @@ -2575,20 +2200,20 @@ dependencies = [ "datafusion-functions-aggregate-common", "datafusion-physical-expr-common", "half", - "hashbrown 0.14.5", - "indexmap 2.13.0", + "hashbrown 0.16.1", + "indexmap 2.14.0", "itertools 0.14.0", - "log", "parking_lot", "paste", "petgraph 0.8.3", + "tokio", ] [[package]] name = "datafusion-physical-expr-adapter" -version = "50.3.0" +version = "52.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "815acced725d30601b397e39958e0e55630e0a10d66ef7769c14ae6597298bb0" +checksum = "ae7a8abc0b4fe624000972a9b145b30b7f1b680bffaa950ea53f78d9b21c27c3" dependencies = [ "arrow", "datafusion-common", @@ -2601,23 +2226,26 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" -version = "50.3.0" +version = "52.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6652fe7b5bf87e85ed175f571745305565da2c0b599d98e697bcbedc7baa47c3" +checksum = "147253ca3e6b9d59c162de64c02800973018660e13340dd1886dd038d17ac429" dependencies = [ "ahash", "arrow", + "chrono", "datafusion-common", "datafusion-expr-common", - "hashbrown 0.14.5", + "hashbrown 0.16.1", + "indexmap 2.14.0", "itertools 0.14.0", + "parking_lot", ] [[package]] name = "datafusion-physical-optimizer" -version = "50.3.0" +version = "52.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49b7d623eb6162a3332b564a0907ba00895c505d101b99af78345f1acf929b5c" +checksum = "689156bb2282107b6239db8d7ef44b4dab10a9b33d3491a0c74acac5e4fedd72" dependencies = [ "arrow", "datafusion-common", @@ -2629,34 +2257,32 @@ dependencies = [ "datafusion-physical-plan", "datafusion-pruning", "itertools 0.14.0", - "log", - "recursive", ] [[package]] name = "datafusion-physical-plan" -version = "50.3.0" +version = "52.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2f7f778a1a838dec124efb96eae6144237d546945587557c9e6936b3414558c" +checksum = "68253dc0ee5330aa558b2549c9b0da5af9fc17d753ae73022939014ad616fc28" dependencies = [ "ahash", "arrow", "arrow-ord", "arrow-schema", "async-trait", - "chrono", "datafusion-common", "datafusion-common-runtime", "datafusion-execution", "datafusion-expr", + "datafusion-functions", "datafusion-functions-aggregate-common", "datafusion-functions-window-common", "datafusion-physical-expr", "datafusion-physical-expr-common", "futures", "half", - "hashbrown 0.14.5", - "indexmap 2.13.0", + "hashbrown 0.16.1", + "indexmap 2.14.0", "itertools 0.14.0", "log", "parking_lot", @@ -2666,12 +2292,11 @@ dependencies = [ [[package]] name = "datafusion-pruning" -version = "50.3.0" +version = "52.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd1e59e2ca14fe3c30f141600b10ad8815e2856caa59ebbd0e3e07cd3d127a65" +checksum = "0fcad240a54d0b1d3e8f668398900260a53122d522b2102ab57218590decacd6" dependencies = [ "arrow", - "arrow-schema", "datafusion-common", "datafusion-datasource", "datafusion-expr-common", @@ -2684,45 +2309,77 @@ dependencies = [ [[package]] name = "datafusion-session" -version = "50.3.0" +version = "52.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21ef8e2745583619bd7a49474e8f45fbe98ebb31a133f27802217125a7b3d58d" +checksum = "f58e83a68bb67007a8fcbf005c44cefe441270c7ee7f6dee10c0e0109b556f6d" dependencies = [ - "arrow", "async-trait", - "dashmap", "datafusion-common", - "datafusion-common-runtime", "datafusion-execution", "datafusion-expr", - "datafusion-physical-expr", "datafusion-physical-plan", - "datafusion-sql", - "futures", - "itertools 0.14.0", - "log", - "object_store", "parking_lot", - "tokio", ] [[package]] name = "datafusion-sql" -version = "50.3.0" +version = "52.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89abd9868770386fede29e5a4b14f49c0bf48d652c3b9d7a8a0332329b87d50b" +checksum = "be53e9eb55db0fbb8980bb6d87f2435b0524acf4c718ed54a57cabbb299b2ab3" dependencies = [ "arrow", "bigdecimal", + "chrono", "datafusion-common", "datafusion-expr", - "indexmap 2.13.0", + "indexmap 2.14.0", "log", - "recursive", "regex", "sqlparser", ] +[[package]] +name = "db4-graph" +version = "0.18.0" +dependencies = [ + "db4-storage", + "raphtory-api", + "raphtory-core", + "rayon", +] + +[[package]] +name = "db4-storage" +version = "0.18.0" +dependencies = [ + "arrow-array", + "arrow-schema", + "bigdecimal", + "boxcar", + "chrono", + "clap", + "dashmap", + "either", + "iter-enum", + "itertools 0.13.0", + "lock_api", + "once_cell", + "parking_lot", + "parquet", + "proptest", + "raphtory-api", + "raphtory-api-macros", + "raphtory-core", + "rayon", + "serde", + "serde_json", + "tempfile", + "test-log", + "thiserror 2.0.18", + "tinyvec", + "tracing", +] + [[package]] name = "deadpool" version = "0.9.5" @@ -2764,9 +2421,9 @@ dependencies = [ [[package]] name = "deflate64" -version = "0.1.10" +version = "0.1.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26bf8fc351c5ed29b5c2f0cbbac1b209b74f60ecd62e675a998df72c49af5204" +checksum = "ac6b926516df9c60bfa16e107b21086399f8285a44ca9711344b9e553c5146e2" [[package]] name = "delegate" @@ -2786,15 +2443,13 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e7c1832837b905bbfb5101e07cc24c8deddf52f93225eee6ead5f4d63d53ddcb" dependencies = [ "const-oid", - "pem-rfc7468", - "zeroize", ] [[package]] name = "deranged" -version = "0.5.5" +version = "0.5.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ececcb659e7ba858fb4f10388c250a7252eb0a27373f1a72b8748afdd248e587" +checksum = "7cd812cc2bc1d69d4764bd80df88b4317eaef9e773c75226407d9bc0876b211c" dependencies = [ "powerfmt", "serde_core", @@ -2808,7 +2463,7 @@ checksum = "1e567bd82dcff979e4b03460c307b3cdc9e96fde3d73bed1496d2bc75d9dd62a" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -2829,7 +2484,7 @@ dependencies = [ "darling 0.20.11", "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -2839,18 +2494,39 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c" dependencies = [ "derive_builder_core", - "syn 2.0.114", + "syn 2.0.117", +] + +[[package]] +name = "derive_more" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d751e9e49156b02b44f9c1815bcb94b984cdcc4396ecc32521c739452808b134" +dependencies = [ + "derive_more-impl", +] + +[[package]] +name = "derive_more-impl" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "799a97264921d8623a957f6c3b9011f3b5492f557bbb7a5a19b7fa6d06ba8dcb" +dependencies = [ + "proc-macro2", + "quote", + "rustc_version", + "syn 2.0.117", ] [[package]] name = "derive_utils" -version = "0.15.0" +version = "0.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ccfae181bab5ab6c5478b2ccb69e4c68a02f8c3ec72f6616bfec9dbc599d2ee0" +checksum = "362f47930db19fe7735f527e6595e4900316b893ebf6d48ad3d31be928d57dd6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -2872,7 +2548,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" dependencies = [ "block-buffer", - "const-oid", "crypto-common", "subtle", ] @@ -2898,6 +2573,12 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "disjoint-sets" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ccbbca7d6a247007ca2535c616d4bb4a5fcad176ef0218671f96b88c52c3d34" + [[package]] name = "display-error-chain" version = "0.2.2" @@ -2912,7 +2593,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -2924,6 +2605,15 @@ dependencies = [ "const-random", ] +[[package]] +name = "document-features" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4b8a88685455ed29a21542a33abd9cb6510b6b129abadabdcef0f4c55bc8f61" +dependencies = [ + "litrs", +] + [[package]] name = "dotenv" version = "0.15.0" @@ -2966,8 +2656,7 @@ checksum = "d0881ea181b1df73ff77ffaaf9c7544ecc11e82fba9b5f27b262a3c73a332555" [[package]] name = "dynamic-graphql" version = "0.10.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0466a5cdd914efd439f0904a84a17506ec4757f4abf35f9e46f3f314ee13fe75" +source = "git+https://github.com/miratepuffin/dynamic-graphql?branch=add-arg-descriptions#69a07c5fe3c16b4baf76f676c96cde5865cae1de" dependencies = [ "async-graphql", "dynamic-graphql-derive", @@ -2977,16 +2666,15 @@ dependencies = [ [[package]] name = "dynamic-graphql-derive" version = "0.10.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c6027c3698e530bf88b37a618a05fd7a5e761dc2777771d5757ff07103f66189" +source = "git+https://github.com/miratepuffin/dynamic-graphql?branch=add-arg-descriptions#69a07c5fe3c16b4baf76f676c96cde5865cae1de" dependencies = [ "Inflector", "darling 0.20.11", "proc-macro-crate", "proc-macro2", "quote", - "syn 2.0.114", - "thiserror 2.0.17", + "syn 2.0.117", + "thiserror 2.0.18", ] [[package]] @@ -2995,6 +2683,12 @@ version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" +[[package]] +name = "encode_unicode" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0" + [[package]] name = "encoding_rs" version = "0.8.35" @@ -3004,14 +2698,45 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "endian-type" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c34f04666d835ff5d62e058c3995147c06f42fe86ff053337632bca83e42702d" + +[[package]] +name = "env_filter" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32e90c2accc4b07a8456ea0debdc2e7587bdd890680d71173a15d4ae604f6eef" +dependencies = [ + "log", + "regex", +] + [[package]] name = "env_logger" -version = "0.8.4" +version = "0.10.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a19187fea3ac7e84da7dacf48de0c45d63c6a76f9490dae389aead16c243fce3" +checksum = "4cd405aab171cb85d6735e5c8d9db038c17d3ca007a4d2c25f337935c3d90580" dependencies = [ + "humantime", + "is-terminal", "log", "regex", + "termcolor", +] + +[[package]] +name = "env_logger" +version = "0.11.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0621c04f2196ac3f488dd583365b9c09be011a4ab8b9f37248ffcc8f6198b56a" +dependencies = [ + "anstream", + "anstyle", + "env_filter", + "log", ] [[package]] @@ -3030,6 +2755,12 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "error-code" +version = "3.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dea2df4cf52843e0452895c455a1a2cfbb842a1e7329671acf418fdc53ed4c59" + [[package]] name = "ethnum" version = "1.5.2" @@ -3102,15 +2833,26 @@ checksum = "9afc2bd4d5a73106dd53d10d73d3401c2f32730ba2c0b93ddb888a8983680471" [[package]] name = "fastrand" -version = "2.3.0" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f1f227452a390804cdb637b74a86990f2a7d7ba4b7d5693aac9b4dd6defd8d6" + +[[package]] +name = "fd-lock" +version = "4.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" +checksum = "0ce92ff622d6dadf7349484f42c93271a0d49b7cc4d466a936405bacbe10aa78" +dependencies = [ + "cfg-if", + "rustix 1.1.4", + "windows-sys 0.59.0", +] [[package]] name = "find-msvc-tools" -version = "0.1.7" +version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f449e6c6c08c865631d4890cfacf252b3d396c9bcc83adb6623cdb02a8336c41" +checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" [[package]] name = "fixedbitset" @@ -3130,9 +2872,9 @@ dependencies = [ [[package]] name = "flate2" -version = "1.1.8" +version = "1.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b375d6465b98090a5f25b1c7703f3859783755aa9a80433b36e0379a3ec2f369" +checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c" dependencies = [ "crc32fast", "miniz_oxide", @@ -3151,6 +2893,12 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" +[[package]] +name = "foldhash" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77ce24cb58228fbb8aa041425bb1050850ac19177686ea6e0f41a70416f56fdb" + [[package]] name = "form_urlencoded" version = "1.2.2" @@ -3178,12 +2926,12 @@ checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" [[package]] name = "fsst" -version = "0.39.0" +version = "4.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d2475ce218217196b161b025598f77e2b405d5e729f7c37bfff145f5df00a41" +checksum = "2195cc7f87e84bd695586137de99605e7e9579b26ec5e01b82960ddb4d0922f2" dependencies = [ "arrow-array", - "rand 0.9.2", + "rand 0.9.4", ] [[package]] @@ -3203,9 +2951,9 @@ checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" [[package]] name = "futures" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876" +checksum = "8b147ee9d1f6d097cef9ce628cd2ee62288d963e16fb287bd9286455b241382d" dependencies = [ "futures-channel", "futures-core", @@ -3218,9 +2966,9 @@ dependencies = [ [[package]] name = "futures-channel" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10" +checksum = "07bbe89c50d7a535e539b8c17bc0b49bdb77747034daa8087407d655f3f7cc1d" dependencies = [ "futures-core", "futures-sink", @@ -3228,15 +2976,15 @@ dependencies = [ [[package]] name = "futures-core" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" +checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d" [[package]] name = "futures-executor" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f" +checksum = "baf29c38818342a3b26b5b923639e7b1f4a61fc5e76102d4b1981c6dc7a7579d" dependencies = [ "futures-core", "futures-task", @@ -3245,9 +2993,9 @@ dependencies = [ [[package]] name = "futures-io" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" +checksum = "cecba35d7ad927e23624b22ad55235f2239cfa44fd10428eecbeba6d6a717718" [[package]] name = "futures-lite" @@ -3261,26 +3009,26 @@ dependencies = [ [[package]] name = "futures-macro" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" +checksum = "e835b70203e41293343137df5c0664546da5745f82ec9b84d40be8336958447b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] name = "futures-sink" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7" +checksum = "c39754e157331b013978ec91992bde1ac089843443c49cbc7f46150b0fad0893" [[package]] name = "futures-task" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" +checksum = "037711b3d59c33004d3856fbdc83b99d4ff37a24768fa1be9ce3538a1cde4393" [[package]] name = "futures-timer" @@ -3290,9 +3038,9 @@ checksum = "f288b0a4f20f9a56b5d1da57e2227c661b7b16168e2f72365f57b63326e29b24" [[package]] name = "futures-util" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" +checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6" dependencies = [ "futures-channel", "futures-core", @@ -3302,7 +3050,6 @@ dependencies = [ "futures-task", "memchr", "pin-project-lite", - "pin-utils", "slab", ] @@ -3323,9 +3070,9 @@ dependencies = [ [[package]] name = "generic-array" -version = "0.14.7" +version = "0.14.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +checksum = "4bb6743198531e02858aeaea5398fcc883e71851fcbcb5a2f773e2fb6cb1edf2" dependencies = [ "typenum", "version_check", @@ -3353,11 +3100,25 @@ dependencies = [ "cfg-if", "js-sys", "libc", - "r-efi", + "r-efi 5.3.0", "wasip2", "wasm-bindgen", ] +[[package]] +name = "getrandom" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0de51e6874e94e7bf76d726fc5d13ba782deca734ff60d5bb2fb2607c7406555" +dependencies = [ + "cfg-if", + "libc", + "r-efi 6.0.0", + "rand_core 0.10.1", + "wasip2", + "wasip3", +] + [[package]] name = "glam" version = "0.29.3" @@ -3370,37 +3131,6 @@ version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" -[[package]] -name = "gloo-timers" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbb143cf96099802033e0d4f4963b19fd2e0b728bcf076cd9cf7f6634f092994" -dependencies = [ - "futures-channel", - "futures-core", - "js-sys", - "wasm-bindgen", -] - -[[package]] -name = "h2" -version = "0.3.27" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0beca50380b1fc32983fc1cb4587bfa4bb9e78fc259aad4a0032d2080309222d" -dependencies = [ - "bytes", - "fnv", - "futures-core", - "futures-sink", - "futures-util", - "http 0.2.12", - "indexmap 2.13.0", - "slab", - "tokio", - "tokio-util", - "tracing", -] - [[package]] name = "h2" version = "0.4.13" @@ -3412,8 +3142,8 @@ dependencies = [ "fnv", "futures-core", "futures-sink", - "http 1.4.0", - "indexmap 2.13.0", + "http", + "indexmap 2.14.0", "slab", "tokio", "tokio-util", @@ -3446,7 +3176,7 @@ dependencies = [ "pest_derive", "serde", "serde_json", - "thiserror 2.0.17", + "thiserror 2.0.18", ] [[package]] @@ -3473,7 +3203,7 @@ checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" dependencies = [ "allocator-api2", "equivalent", - "foldhash", + "foldhash 0.1.5", ] [[package]] @@ -3481,6 +3211,17 @@ name = "hashbrown" version = "0.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" +dependencies = [ + "allocator-api2", + "equivalent", + "foldhash 0.2.0", +] + +[[package]] +name = "hashbrown" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4f467dd6dccf739c208452f8014c75c18bb8301b050ad1cfb27153803edb0f51" [[package]] name = "hashlink" @@ -3500,7 +3241,7 @@ dependencies = [ "base64 0.22.1", "bytes", "headers-core", - "http 1.4.0", + "http", "httpdate", "mime", "sha1", @@ -3512,7 +3253,7 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "54b4a22553d4242c49fddb9ba998a99962b5cc6f22cb5a3482bec22522403ce4" dependencies = [ - "http 1.4.0", + "http", ] [[package]] @@ -3529,9 +3270,9 @@ checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" [[package]] name = "heed" -version = "0.22.0" +version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a56c94661ddfb51aa9cdfbf102cfcc340aa69267f95ebccc4af08d7c530d393" +checksum = "ad82d6598ccf1dac15c8b758a1bd282b755b6776be600429176757190a1b0202" dependencies = [ "bitflags", "byteorder", @@ -3558,7 +3299,7 @@ version = "0.21.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "13c255bdf46e07fb840d120a36dcc81f385140d7191c76a7391672675c01a55d" dependencies = [ - "bincode", + "bincode 1.3.3", "byteorder", "heed-traits", "serde", @@ -3601,17 +3342,6 @@ version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e9025058dae765dee5070ec375f591e2ba14638c63feff74f13805a72e523163" -[[package]] -name = "http" -version = "0.2.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "601cbb57e577e2f5ef5be8e7b83f0f63994f25aa94d673e54a92d5c516d101f1" -dependencies = [ - "bytes", - "fnv", - "itoa", -] - [[package]] name = "http" version = "1.4.0" @@ -3622,17 +3352,6 @@ dependencies = [ "itoa", ] -[[package]] -name = "http-body" -version = "0.4.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2" -dependencies = [ - "bytes", - "http 0.2.12", - "pin-project-lite", -] - [[package]] name = "http-body" version = "1.0.1" @@ -3640,7 +3359,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" dependencies = [ "bytes", - "http 1.4.0", + "http", ] [[package]] @@ -3651,8 +3370,8 @@ checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a" dependencies = [ "bytes", "futures-core", - "http 1.4.0", - "http-body 1.0.1", + "http", + "http-body", "pin-project-lite", ] @@ -3676,46 +3395,21 @@ checksum = "135b12329e5e3ce057a9f972339ea52bc954fe1e9358ef27f95e89716fbc5424" [[package]] name = "hyper" -version = "0.14.32" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41dfc780fdec9373c01bae43289ea34c972e40ee3c9f6b3c8801a35f35586ce7" -dependencies = [ - "bytes", - "futures-channel", - "futures-core", - "futures-util", - "h2 0.3.27", - "http 0.2.12", - "http-body 0.4.6", - "httparse", - "httpdate", - "itoa", - "pin-project-lite", - "socket2 0.5.10", - "tokio", - "tower-service", - "tracing", - "want", -] - -[[package]] -name = "hyper" -version = "1.8.1" +version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2ab2d4f250c3d7b1c9fcdff1cece94ea4e2dfbec68614f7b87cb205f24ca9d11" +checksum = "6299f016b246a94207e63da54dbe807655bf9e00044f73ded42c3ac5305fbcca" dependencies = [ "atomic-waker", "bytes", "futures-channel", "futures-core", - "h2 0.4.13", - "http 1.4.0", - "http-body 1.0.1", + "h2", + "http", + "http-body", "httparse", "httpdate", "itoa", "pin-project-lite", - "pin-utils", "smallvec", "tokio", "want", @@ -3723,35 +3417,19 @@ dependencies = [ [[package]] name = "hyper-rustls" -version = "0.24.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec3efd23720e2049821a693cbc7e65ea87c72f1c58ff2f9522ff332b1491e590" -dependencies = [ - "futures-util", - "http 0.2.12", - "hyper 0.14.32", - "log", - "rustls 0.21.12", - "tokio", - "tokio-rustls 0.24.1", -] - -[[package]] -name = "hyper-rustls" -version = "0.27.7" +version = "0.27.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58" +checksum = "33ca68d021ef39cf6463ab54c1d0f5daf03377b70561305bb89a8f83aab66e0f" dependencies = [ - "http 1.4.0", - "hyper 1.8.1", + "http", + "hyper", "hyper-util", - "rustls 0.23.36", + "rustls", "rustls-native-certs 0.8.3", - "rustls-pki-types", "tokio", - "tokio-rustls 0.26.4", + "tokio-rustls", "tower-service", - "webpki-roots 1.0.5", + "webpki-roots 1.0.6", ] [[package]] @@ -3760,7 +3438,7 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b90d566bffbce6a75bd8b09a05aa8c2cb1fabb6cb348f8840c9e4c90a0d83b0" dependencies = [ - "hyper 1.8.1", + "hyper", "hyper-util", "pin-project-lite", "tokio", @@ -3769,23 +3447,22 @@ dependencies = [ [[package]] name = "hyper-util" -version = "0.1.19" +version = "0.1.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "727805d60e7938b76b826a6ef209eb70eaa1812794f9424d4a4e2d740662df5f" +checksum = "96547c2556ec9d12fb1578c4eaf448b04993e7fb79cbaad930a656880a6bdfa0" dependencies = [ "base64 0.22.1", "bytes", "futures-channel", - "futures-core", "futures-util", - "http 1.4.0", - "http-body 1.0.1", - "hyper 1.8.1", + "http", + "http-body", + "hyper", "ipnet", "libc", "percent-encoding", "pin-project-lite", - "socket2 0.6.1", + "socket2 0.6.3", "tokio", "tower-service", "tracing", @@ -3802,9 +3479,9 @@ dependencies = [ [[package]] name = "iana-time-zone" -version = "0.1.64" +version = "0.1.65" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33e57f83510bb73707521ebaffa789ec8caf86f9657cad665b092b581d40e9fb" +checksum = "e31bc9ad994ba00e440a8aa5c9ef0ec67d5cb5e5cb0cc7f8b744a35b389cc470" dependencies = [ "android_system_properties", "core-foundation-sys", @@ -3826,12 +3503,13 @@ dependencies = [ [[package]] name = "icu_collections" -version = "2.1.1" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c6b649701667bbe825c3b7e6388cb521c23d88644678e83c0c4d0a621a34b43" +checksum = "2984d1cd16c883d7935b9e07e44071dca8d917fd52ecc02c04d5fa0b5a3f191c" dependencies = [ "displaydoc", "potential_utf", + "utf8_iter", "yoke", "zerofrom", "zerovec", @@ -3839,9 +3517,9 @@ dependencies = [ [[package]] name = "icu_locale_core" -version = "2.1.1" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "edba7861004dd3714265b4db54a3c390e880ab658fec5f7db895fae2046b5bb6" +checksum = "92219b62b3e2b4d88ac5119f8904c10f8f61bf7e95b640d25ba3075e6cac2c29" dependencies = [ "displaydoc", "litemap", @@ -3852,9 +3530,9 @@ dependencies = [ [[package]] name = "icu_normalizer" -version = "2.1.1" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f6c8828b67bf8908d82127b2054ea1b4427ff0230ee9141c54251934ab1b599" +checksum = "c56e5ee99d6e3d33bd91c5d85458b6005a22140021cc324cea84dd0e72cff3b4" dependencies = [ "icu_collections", "icu_normalizer_data", @@ -3866,15 +3544,15 @@ dependencies = [ [[package]] name = "icu_normalizer_data" -version = "2.1.1" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7aedcccd01fc5fe81e6b489c15b247b8b0690feb23304303a9e560f37efc560a" +checksum = "da3be0ae77ea334f4da67c12f149704f19f81d1adf7c51cf482943e84a2bad38" [[package]] name = "icu_properties" -version = "2.1.2" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "020bfc02fe870ec3a66d93e677ccca0562506e5872c650f893269e08615d74ec" +checksum = "bee3b67d0ea5c2cca5003417989af8996f8604e34fb9ddf96208a033901e70de" dependencies = [ "icu_collections", "icu_locale_core", @@ -3886,15 +3564,15 @@ dependencies = [ [[package]] name = "icu_properties_data" -version = "2.1.2" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "616c294cf8d725c6afcd8f55abc17c56464ef6211f9ed59cccffe534129c77af" +checksum = "8e2bbb201e0c04f7b4b3e14382af113e17ba4f63e2c9d2ee626b720cbce54a14" [[package]] name = "icu_provider" -version = "2.1.1" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85962cf0ce02e1e0a629cc34e7ca3e373ce20dda4c4d7294bbd0bf1fdb59e614" +checksum = "139c4cf31c8b5f33d7e199446eff9c1e02decfc2f0eec2c8d71f65befa45b421" dependencies = [ "displaydoc", "icu_locale_core", @@ -3905,6 +3583,12 @@ dependencies = [ "zerovec", ] +[[package]] +name = "id-arena" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954" + [[package]] name = "ident_case" version = "1.0.1" @@ -3945,12 +3629,12 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.13.0" +version = "2.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017" +checksum = "d466e9454f08e4a911e14806c24e16fba1b4c121d1ea474396f396069cf949d9" dependencies = [ "equivalent", - "hashbrown 0.16.1", + "hashbrown 0.17.0", "rayon", "serde", "serde_core", @@ -3971,10 +3655,22 @@ version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "879f10e63c20629ecabbb64a8010319738c66a5cd0c29b02d63d272b03751d01" dependencies = [ - "block-padding", "generic-array", ] +[[package]] +name = "insta" +version = "1.47.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b4a6248eb93a4401ed2f37dfe8ea592d3cf05b7cf4f8efa867b6895af7e094e" +dependencies = [ + "console", + "once_cell", + "serde", + "similar", + "tempfile", +] + [[package]] name = "instant" version = "0.1.13" @@ -3995,24 +3691,24 @@ checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02" [[package]] name = "inventory" -version = "0.3.21" +version = "0.3.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc61209c082fbeb19919bee74b176221b27223e27b65d781eb91af24eb1fb46e" +checksum = "a4f0c30c76f2f4ccee3fe55a2435f691ca00c0e4bd87abe4f4a851b1d4dac39b" dependencies = [ "rustversion", ] [[package]] name = "ipnet" -version = "2.11.0" +version = "2.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130" +checksum = "d98f6fed1fde3f8c21bc40a1abb88dd75e67924f9cffc3ef95607bad8017f8e2" [[package]] name = "iri-string" -version = "0.7.10" +version = "0.7.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c91338f0783edbd6195decb37bae672fd3b165faffb89bf7b9e6942f8b1a731a" +checksum = "25e659a4bb38e810ebc252e53b5814ff908a8c58c2a9ce2fae1bbec24cbf4e20" dependencies = [ "memchr", "serde", @@ -4037,9 +3733,9 @@ checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" [[package]] name = "iter-enum" -version = "1.2.0" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c52f2d5e063459674b4735f21870dd911e0d96dbfebb984650068195c2df838" +checksum = "d9e701a443ecfdedddeea9b7975a1875c933f4001f12defa8a7a53e959611308" dependencies = [ "derive_utils", ] @@ -4091,15 +3787,15 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.17" +version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" +checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682" [[package]] name = "jiff" -version = "0.2.18" +version = "0.2.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e67e8da4c49d6d9909fe03361f9b620f58898859f5c7aded68351e85e71ecf50" +checksum = "1a3546dc96b6d42c5f24902af9e2538e82e39ad350b0c766eb3fbf2d8f3d8359" dependencies = [ "jiff-static", "jiff-tzdb-platform", @@ -4112,20 +3808,20 @@ dependencies = [ [[package]] name = "jiff-static" -version = "0.2.18" +version = "0.2.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0c84ee7f197eca9a86c6fd6cb771e55eb991632f15f2bc3ca6ec838929e6e78" +checksum = "2a8c8b344124222efd714b73bb41f8b5120b27a7cc1c75593a6ff768d9d05aa4" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] name = "jiff-tzdb" -version = "0.1.5" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68971ebff725b9e2ca27a601c5eb38a4c5d64422c4cbab0c535f248087eda5c2" +checksum = "c900ef84826f1338a557697dc8fc601df9ca9af4ac137c7fb61d4c6f2dfd3076" [[package]] name = "jiff-tzdb-platform" @@ -4148,10 +3844,12 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.83" +version = "0.3.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "464a3709c7f55f1f721e5389aa6ea4e3bc6aba669353300af094b29ffbdde1d8" +checksum = "2964e92d1d9dc3364cae4d718d93f227e3abb088e747d92e0395bfdedf1c12ca" dependencies = [ + "cfg-if", + "futures-util", "once_cell", "wasm-bindgen", ] @@ -4169,9 +3867,9 @@ dependencies = [ [[package]] name = "jsonb" -version = "0.5.5" +version = "0.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a901f06163d352fbe41c3c2ff5e08b75330a003cc941e988fb501022f5421e6" +checksum = "eb98fb29636087c40ad0d1274d9a30c0c1e83e03ae93f6e7e89247b37fcc6953" dependencies = [ "byteorder", "ethnum", @@ -4180,26 +3878,11 @@ dependencies = [ "jiff", "nom 8.0.0", "num-traits", - "ordered-float 5.1.0", - "rand 0.9.2", - "ryu", - "serde", - "serde_json", -] - -[[package]] -name = "jsonwebtoken" -version = "9.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a87cc7a48537badeae96744432de36f4be2b4a34a05a5ef32e9dd8a1c169dde" -dependencies = [ - "base64 0.22.1", - "js-sys", - "pem", - "ring", + "ordered-float 5.3.0", + "rand 0.9.4", "serde", "serde_json", - "simple_asn1", + "zmij", ] [[package]] @@ -4221,20 +3904,20 @@ dependencies = [ [[package]] name = "kdam" -version = "0.6.3" +version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5740f66a8d86a086ebcacfb937070e8be6eb2f8fb45e4ae7fa428ca2a98a7b1f" +checksum = "d847be338ef16a13f97637c062d97fb52ebe0ff3b77fa18456d5ed366317e4f7" dependencies = [ "pyo3", "terminal_size", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] name = "lance" -version = "0.39.0" +version = "4.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2f0ca022d0424d991933a62d2898864cf5621873962bd84e65e7d1f023f9c36" +checksum = "efe6c3ddd79cdfd2b7e1c23cafae52806906bc40fbd97de9e8cf2f8c7a75fc04" dependencies = [ "arrow", "arrow-arith", @@ -4248,11 +3931,10 @@ dependencies = [ "async-recursion", "async-trait", "async_cell", - "aws-credential-types", - "aws-sdk-dynamodb", "byteorder", "bytes", "chrono", + "crossbeam-skiplist", "dashmap", "datafusion", "datafusion-expr", @@ -4280,17 +3962,18 @@ dependencies = [ "object_store", "permutation", "pin-project", - "prost", - "prost-types", - "rand 0.9.2", - "roaring", + "prost 0.14.3", + "prost-types 0.14.3", + "rand 0.9.4", + "roaring 0.11.3", "semver", "serde", "serde_json", - "snafu", + "snafu 0.9.0", "tantivy 0.24.2", "tokio", "tokio-stream", + "tokio-util", "tracing", "url", "uuid", @@ -4298,29 +3981,31 @@ dependencies = [ [[package]] name = "lance-arrow" -version = "0.39.0" +version = "4.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7552f8d528775bf0ab21e1f75dcb70bdb2a828eeae58024a803b5a4655fd9a11" +checksum = "5d9f5d95bdda2a2b790f1fb8028b5b6dcf661abeb3133a8bca0f3d24b054af87" dependencies = [ "arrow-array", "arrow-buffer", "arrow-cast", "arrow-data", + "arrow-ord", "arrow-schema", "arrow-select", "bytes", + "futures", "getrandom 0.2.17", "half", "jsonb", "num-traits", - "rand 0.9.2", + "rand 0.9.4", ] [[package]] name = "lance-bitpacking" -version = "0.39.0" +version = "4.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2ea14583cc6fa0bb190bcc2d3bc364b0aa545b345702976025f810e4740e8ce" +checksum = "f827d6ab9f8f337a9509d5ad66a12f3314db8713868260521c344ef6135eb4e4" dependencies = [ "arrayref", "paste", @@ -4329,9 +4014,9 @@ dependencies = [ [[package]] name = "lance-core" -version = "0.39.0" +version = "4.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69c752dedd207384892006c40930f898d6634e05e3d489e89763abfe4b9307e7" +checksum = "0f1e25df6a79bf72ee6bcde0851f19b1cd36c5848c1b7db83340882d3c9fdecb" dependencies = [ "arrow-array", "arrow-buffer", @@ -4344,6 +4029,7 @@ dependencies = [ "datafusion-sql", "deepsize", "futures", + "itertools 0.13.0", "lance-arrow", "libc", "log", @@ -4352,11 +4038,11 @@ dependencies = [ "num_cpus", "object_store", "pin-project", - "prost", - "rand 0.9.2", - "roaring", + "prost 0.14.3", + "rand 0.9.4", + "roaring 0.11.3", "serde_json", - "snafu", + "snafu 0.9.0", "tempfile", "tokio", "tokio-stream", @@ -4367,9 +4053,9 @@ dependencies = [ [[package]] name = "lance-datafusion" -version = "0.39.0" +version = "4.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21e1e98ca6e5cd337bdda2d9fb66063f295c0c2852d2bc6831366fea833ee608" +checksum = "93146de8ae720cb90edef81c2f2d0a1b065fc2f23ecff2419546f389b0fa70a4" dependencies = [ "arrow", "arrow-array", @@ -4390,17 +4076,18 @@ dependencies = [ "lance-datagen", "log", "pin-project", - "prost", - "snafu", + "prost 0.14.3", + "prost-build 0.14.3", + "snafu 0.9.0", "tokio", "tracing", ] [[package]] name = "lance-datagen" -version = "0.39.0" +version = "4.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "483c643fc2806ed1a2766edf4d180511bbd1d549bcc60373e33f4785c6185891" +checksum = "ccec8ce4d8e0a87a99c431dab2364398029f2ffb649c1a693c60c79e05ed30dd" dependencies = [ "arrow", "arrow-array", @@ -4410,16 +4097,17 @@ dependencies = [ "futures", "half", "hex", - "rand 0.9.2", + "rand 0.9.4", + "rand_distr 0.5.1", "rand_xoshiro", "random_word", ] [[package]] name = "lance-encoding" -version = "0.39.0" +version = "4.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a199d1fa3487529c5ffc433fbd1721231330b9350c2ff9b0c7b7dbdb98f0806a" +checksum = "5c1aec0bbbac6bce829bc10f1ba066258126100596c375fb71908ecf11c2c2a5" dependencies = [ "arrow-arith", "arrow-array", @@ -4442,11 +4130,11 @@ dependencies = [ "log", "lz4", "num-traits", - "prost", - "prost-build", - "prost-types", - "rand 0.9.2", - "snafu", + "prost 0.14.3", + "prost-build 0.14.3", + "prost-types 0.14.3", + "rand 0.9.4", + "snafu 0.9.0", "strum 0.26.3", "tokio", "tracing", @@ -4456,9 +4144,9 @@ dependencies = [ [[package]] name = "lance-file" -version = "0.39.0" +version = "4.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b57def2279465232cf5a8cd996300c632442e368745768bbed661c7f0a35334b" +checksum = "14a8c548804f5b17486dc2d3282356ed1957095a852780283bc401fdd69e9075" dependencies = [ "arrow-arith", "arrow-array", @@ -4480,19 +4168,19 @@ dependencies = [ "log", "num-traits", "object_store", - "prost", - "prost-build", - "prost-types", - "snafu", + "prost 0.14.3", + "prost-build 0.14.3", + "prost-types 0.14.3", + "snafu 0.9.0", "tokio", "tracing", ] [[package]] name = "lance-index" -version = "0.39.0" +version = "4.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a75938c61e986aef8c615dc44c92e4c19e393160a59e2b57402ccfe08c5e63af" +checksum = "2da212f0090ea59f79ac3686660f596520c167fe1cb5f408900cf71d215f0e03" dependencies = [ "arrow", "arrow-arith", @@ -4506,6 +4194,7 @@ dependencies = [ "bitpacking", "bitvec", "bytes", + "chrono", "crossbeam-queue", "datafusion", "datafusion-common", @@ -4530,19 +4219,21 @@ dependencies = [ "lance-table", "libm", "log", - "ndarray", + "ndarray 0.16.1", "num-traits", "object_store", - "prost", - "prost-build", - "prost-types", - "rand 0.9.2", + "prost 0.14.3", + "prost-build 0.14.3", + "prost-types 0.14.3", + "rand 0.9.4", "rand_distr 0.5.1", + "rangemap", "rayon", - "roaring", + "roaring 0.11.3", "serde", "serde_json", - "snafu", + "smallvec", + "snafu 0.9.0", "tantivy 0.24.2", "tempfile", "tokio", @@ -4553,9 +4244,9 @@ dependencies = [ [[package]] name = "lance-io" -version = "0.39.0" +version = "4.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa6c3b5b28570d6c951206c5b043f1b35c936928af14fca6f2ac25b0097e4c32" +checksum = "41d958eb4b56f03bbe0f5f85eb2b4e9657882812297b6f711f201ffc995f259f" dependencies = [ "arrow", "arrow-arith", @@ -4567,27 +4258,24 @@ dependencies = [ "arrow-select", "async-recursion", "async-trait", - "aws-config", - "aws-credential-types", "byteorder", "bytes", "chrono", "deepsize", "futures", + "http", "lance-arrow", "lance-core", "lance-namespace", "log", "object_store", - "object_store_opendal", - "opendal", "path_abs", "pin-project", - "prost", - "rand 0.9.2", + "prost 0.14.3", + "rand 0.9.4", "serde", - "shellexpand", - "snafu", + "snafu 0.9.0", + "tempfile", "tokio", "tracing", "url", @@ -4595,9 +4283,9 @@ dependencies = [ [[package]] name = "lance-linalg" -version = "0.39.0" +version = "4.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3cbc7e85a89ff9cb3a4627559dea3fd1c1fb16c0d8bc46ede75eefef51eec06" +checksum = "0285b70da35def7ed95e150fae1d5308089554e1290470403ed3c50cb235bc5e" dependencies = [ "arrow-array", "arrow-buffer", @@ -4608,50 +4296,57 @@ dependencies = [ "lance-arrow", "lance-core", "num-traits", - "rand 0.9.2", + "rand 0.9.4", ] [[package]] name = "lance-namespace" -version = "0.39.0" +version = "4.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "897dd6726816515bb70a698ce7cda44670dca5761637696d7905b45f405a8cd9" +checksum = "5f78e2a828b654e062a495462c6e3eb4fcf0e7e907d761b8f217fc09ccd3ceac" dependencies = [ "arrow", "async-trait", "bytes", "lance-core", "lance-namespace-reqwest-client", - "snafu", + "serde", + "snafu 0.9.0", ] [[package]] name = "lance-namespace-impls" -version = "0.39.0" +version = "4.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e3cfcd3ba369de2719abf6fb6233f69cda639eb5cbcb328487a790e745ab988" +checksum = "a2392314f3da38f00d166295e44244208a65ccfc256e274fa8631849fc3f4d94" dependencies = [ "arrow", "arrow-ipc", "arrow-schema", "async-trait", "bytes", + "chrono", + "futures", "lance", "lance-core", + "lance-index", "lance-io", "lance-namespace", + "lance-table", + "log", "object_store", - "reqwest", + "rand 0.9.4", "serde_json", - "snafu", + "snafu 0.9.0", + "tokio", "url", ] [[package]] name = "lance-namespace-reqwest-client" -version = "0.0.18" +version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ea349999bcda4eea53fc05d334b3775ec314761e6a706555c777d7a29b18d19" +checksum = "ee2e48de899e2931afb67fcddd0a08e439bf5d8b6ea2a2ed9cb8f4df669bd5cc" dependencies = [ "reqwest", "serde", @@ -4662,9 +4357,9 @@ dependencies = [ [[package]] name = "lance-table" -version = "0.39.0" +version = "4.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8facc13760ba034b6c38767b16adba85e44cbcbea8124dc0c63c43865c60630" +checksum = "3df9c4adca3eb2074b3850432a9fb34248a3d90c3d6427d158b13ff9355664ee" dependencies = [ "arrow", "arrow-array", @@ -4672,8 +4367,6 @@ dependencies = [ "arrow-ipc", "arrow-schema", "async-trait", - "aws-credential-types", - "aws-sdk-dynamodb", "byteorder", "bytes", "chrono", @@ -4685,16 +4378,16 @@ dependencies = [ "lance-io", "log", "object_store", - "prost", - "prost-build", - "prost-types", - "rand 0.9.2", + "prost 0.14.3", + "prost-build 0.14.3", + "prost-types 0.14.3", + "rand 0.9.4", "rangemap", - "roaring", + "roaring 0.11.3", "semver", "serde", "serde_json", - "snafu", + "snafu 0.9.0", "tokio", "tracing", "url", @@ -4703,22 +4396,22 @@ dependencies = [ [[package]] name = "lance-testing" -version = "0.39.0" +version = "4.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b05052ef86188d6ae6339bdd9f2c5d77190e8ad1158f3dc8a42fa91bde9e5246" +checksum = "7ed7119bdd6983718387b4ac44af873a165262ca94f181b104cd6f97912eb3bf" dependencies = [ "arrow-array", "arrow-schema", "lance-arrow", "num-traits", - "rand 0.9.2", + "rand 0.9.4", ] [[package]] name = "lancedb" -version = "0.22.3" +version = "0.27.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1da241266792d8caa58005a3deb06ba1388a99350d89b5c904ef6f8de5d936f" +checksum = "ce0f4d7f739dc30608fe8b202cbb40986c2937e1a5a189f98fb06d7b8543156a" dependencies = [ "ahash", "arrow", @@ -4737,7 +4430,10 @@ dependencies = [ "datafusion-common", "datafusion-execution", "datafusion-expr", + "datafusion-functions", + "datafusion-physical-expr", "datafusion-physical-plan", + "datafusion-sql", "futures", "half", "lance", @@ -4760,13 +4456,13 @@ dependencies = [ "num-traits", "object_store", "pin-project", - "rand 0.9.2", + "rand 0.9.4", "regex", "semver", "serde", "serde_json", "serde_with", - "snafu", + "snafu 0.8.9", "tempfile", "tokio", "url", @@ -4778,9 +4474,12 @@ name = "lazy_static" version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" -dependencies = [ - "spin", -] + +[[package]] +name = "leb128fmt" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" [[package]] name = "levenshtein_automata" @@ -4845,31 +4544,24 @@ dependencies = [ "lexical-util", ] -[[package]] -name = "libbz2-rs-sys" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c4a545a15244c7d945065b5d392b2d2d7f21526fba56ce51467b06ed445e8f7" - [[package]] name = "libc" -version = "0.2.180" +version = "0.2.185" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bcc35a38544a891a5f7c865aca548a982ccb3b8650a5b06d0fd33a10283c56fc" +checksum = "52ff2c0fe9bc6cb6b14a0592c2ff4fa9ceb83eea9db979b0487cd054946a2b8f" [[package]] name = "libm" -version = "0.2.15" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9fbbcab51052fe104eb5e5d351cf728d30a5be1fe14d9be8a3b097481fb97de" +checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981" [[package]] name = "libredox" -version = "0.1.12" +version = "0.1.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d0b95e02c851351f877147b7deea7b1afb1df71b63aa5f8270716e0c5720616" +checksum = "e02f3bb43d335493c96bf3fd3a321600bf6bd07ed34bc64118e9293bdffea46c" dependencies = [ - "bitflags", "libc", ] @@ -4881,21 +4573,27 @@ checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab" [[package]] name = "linux-raw-sys" -version = "0.11.0" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" +checksum = "32a66949e030da00e8c7d4434b251670a91556f4144941d37452769c25d58a53" [[package]] name = "litemap" -version = "0.8.1" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92daf443525c4cce67b150400bc2316076100ce0b3686209eb8cf3c31612e6f0" + +[[package]] +name = "litrs" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6373607a59f0be73a39b6fe456b8192fcc3585f602af20751600e974dd455e77" +checksum = "11d3d7f243d5c5a8b9bb5d6dd2b1602c0cb0b9db1621bafc7ed66e35ff9fe092" [[package]] name = "lmdb-master-sys" -version = "0.2.5" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "864808e0b19fb6dd3b70ba94ee671b82fce17554cf80aeb0a155c65bb08027df" +checksum = "aaeb9bd22e73bd1babffff614994b341e9b2008de7bb73bf1f7e9154f1978f8b" dependencies = [ "cc", "doxygen-rs", @@ -4967,9 +4665,15 @@ dependencies = [ [[package]] name = "lz4_flex" -version = "0.11.5" +version = "0.11.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "373f5eceeeab7925e0c1098212f2fbc4d416adec9d35051a6ab251e824c1854a" + +[[package]] +name = "lz4_flex" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08ab2867e3eeeca90e844d1940eab391c9dc5228783db2ed999acbc0a9ed375a" +checksum = "98c23545df7ecf1b16c303910a69b079e8e251d60f7dd2cc9b4177f2afaf1746" dependencies = [ "twox-hash", ] @@ -4997,9 +4701,9 @@ dependencies = [ [[package]] name = "marrow" -version = "0.2.5" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea734fcb7619dfcc47a396f7bf0c72571ccc8c18ae7236ae028d485b27424b74" +checksum = "f5240d6977234968ff9ad254bfa73aa397fb51e41dcb22b1eb85835e9295485b" dependencies = [ "arrow-array", "arrow-buffer", @@ -5075,19 +4779,25 @@ dependencies = [ [[package]] name = "memchr" -version = "2.7.6" +version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" +checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" [[package]] name = "memmap2" -version = "0.9.9" +version = "0.9.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "744133e4a0e0a658e1374cf3bf8e415c4052a15a111acd372764c55b4177d490" +checksum = "714098028fe011992e1c3962653c96b2d578c4b4bce9036e15ff220319b1e0e3" dependencies = [ "libc", ] +[[package]] +name = "memo-map" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38d1115007560874e373613744c6fba374c17688327a71c1476d1a5954cc857b" + [[package]] name = "memoffset" version = "0.9.1" @@ -5115,18 +4825,19 @@ dependencies = [ [[package]] name = "minijinja" -version = "2.14.0" +version = "2.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "12ea9ac0a51fb5112607099560fdf0f90366ab088a2a9e6e8ae176794e9806aa" +checksum = "805bfd7352166bae857ee569628b52bcd85a1cecf7810861ebceb1686b72b75d" dependencies = [ + "memo-map", "serde", ] [[package]] name = "minijinja-contrib" -version = "2.14.0" +version = "2.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be6ad8bbc21c256d5f2f5494699d5d69d519b8510d672a0e43b7bfa3a56c388a" +checksum = "45092d80391870622fcf3bd82f5d2af18f99533ea60debb4bc9db0c76f0e809a" dependencies = [ "minijinja", "serde", @@ -5151,9 +4862,9 @@ dependencies = [ [[package]] name = "mio" -version = "1.1.1" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a69bcab0ad47271a0234d9422b131806bf3968021e5dc9328caf2d4cd58557fc" +checksum = "50b7e5b27aa02a74bac8c3f23f448f8d87ff11f92d3aac1a6ed369ee08cc56c1" dependencies = [ "libc", "wasi", @@ -5168,9 +4879,9 @@ checksum = "dce6dd36094cac388f119d2e9dc82dc730ef91c32a6222170d630e5414b956e6" [[package]] name = "moka" -version = "0.12.12" +version = "0.12.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3dec6bd31b08944e08b58fd99373893a6c17054d6f3ea5006cc894f4f4eee2a" +checksum = "957228ad12042ee839f93c8f257b62b4c0ab5eaae1d4fa60de53b27c9d7c5046" dependencies = [ "async-lock", "crossbeam-channel", @@ -5195,7 +4906,7 @@ dependencies = [ "bytes", "encoding_rs", "futures-util", - "http 1.4.0", + "http", "httparse", "memchr", "mime", @@ -5230,6 +4941,21 @@ dependencies = [ "rawpointer", ] +[[package]] +name = "ndarray" +version = "0.17.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "520080814a7a6b4a6e9070823bb24b4531daac8c4627e08ba5de8c5ef2f2752d" +dependencies = [ + "matrixmultiply", + "num-complex", + "num-integer", + "num-traits", + "portable-atomic", + "portable-atomic-util", + "rawpointer", +] + [[package]] name = "neo4rs" version = "0.8.0" @@ -5253,7 +4979,7 @@ dependencies = [ "serde", "thiserror 1.0.69", "tokio", - "tokio-rustls 0.26.4", + "tokio-rustls", "url", "webpki-roots 0.26.11", ] @@ -5265,7 +4991,27 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "53a0d57c55d2d1dc62a2b1d16a0a1079eb78d67c36bdf468d582ab4482ec7002" dependencies = [ "quote", - "syn 2.0.114", + "syn 2.0.117", +] + +[[package]] +name = "nibble_vec" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77a5d83df9f36fe23f0c3648c6bbb8b0298bb5f1939c8f2704431371f4b84d43" +dependencies = [ + "smallvec", +] + +[[package]] +name = "nix" +version = "0.27.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2eb04e9c688eff1c89d72b407f168cf79bb9e867a9d3323ed6c01519eb9cc053" +dependencies = [ + "bitflags", + "cfg-if", + "libc", ] [[package]] @@ -5332,22 +5078,6 @@ dependencies = [ "num-traits", ] -[[package]] -name = "num-bigint-dig" -version = "0.8.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e661dda6640fad38e827a6d4a310ff4763082116fe217f279885c97f511bb0b7" -dependencies = [ - "lazy_static", - "libm", - "num-integer", - "num-iter", - "num-traits", - "rand 0.8.5", - "smallvec", - "zeroize", -] - [[package]] name = "num-complex" version = "0.4.6" @@ -5359,9 +5089,9 @@ dependencies = [ [[package]] name = "num-conv" -version = "0.2.0" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf97ec579c3c42f953ef76dbf8d55ac91fb219dde70e49aa4a6b7d74e9919050" +checksum = "c6673768db2d862beb9b39a78fdcb1a69439615d5794a1be50caa9bc92c81967" [[package]] name = "num-integer" @@ -5431,60 +5161,37 @@ dependencies = [ [[package]] name = "numpy" -version = "0.25.0" +version = "0.27.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29f1dee9aa8d3f6f8e8b9af3803006101bb3653866ef056d530d53ae68587191" +checksum = "7aac2e6a6e4468ffa092ad43c39b81c79196c2bb773b8db4085f695efe3bba17" dependencies = [ "half", "libc", - "ndarray", + "ndarray 0.17.2", "num-complex", "num-integer", "num-traits", "pyo3", "pyo3-build-config", - "rustc-hash 2.1.1", -] - -[[package]] -name = "object" -version = "0.32.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441" -dependencies = [ - "memchr", + "rustc-hash 2.1.2", ] [[package]] name = "object_store" -version = "0.12.4" +version = "0.12.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c1be0c6c22ec0817cdc77d3842f721a17fd30ab6965001415b5402a74e6b740" +checksum = "fbfbfff40aeccab00ec8a910b57ca8ecf4319b335c542f2edcd19dd25a1e2a00" dependencies = [ "async-trait", - "base64 0.22.1", "bytes", "chrono", - "form_urlencoded", "futures", - "http 1.4.0", - "http-body-util", - "httparse", + "http", "humantime", - "hyper 1.8.1", "itertools 0.14.0", - "md-5", "parking_lot", "percent-encoding", - "quick-xml 0.38.4", - "rand 0.9.2", - "reqwest", - "ring", - "rustls-pemfile", - "serde", - "serde_json", - "serde_urlencoded", - "thiserror 2.0.17", + "thiserror 2.0.18", "tokio", "tracing", "url", @@ -5493,26 +5200,11 @@ dependencies = [ "web-time", ] -[[package]] -name = "object_store_opendal" -version = "0.54.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0b88fc0e0c4890c1d99e2b8c519c5db40f7d9b69a0f562ff1ad4967a4c8bbc6" -dependencies = [ - "async-trait", - "bytes", - "futures", - "object_store", - "opendal", - "pin-project", - "tokio", -] - [[package]] name = "once_cell" -version = "1.21.3" +version = "1.21.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" +checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50" [[package]] name = "once_cell_polyfill" @@ -5522,9 +5214,9 @@ checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" [[package]] name = "oneshot" -version = "0.1.11" +version = "0.1.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4ce411919553d3f9fa53a0880544cda985a112117a0444d5ff1e870a893d6ea" +checksum = "269bca4c2591a28585d6bf10d9ed0332b7d76900a1b02bec41bdc3a2cdcda107" [[package]] name = "oorandom" @@ -5532,35 +5224,6 @@ version = "11.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e" -[[package]] -name = "opendal" -version = "0.54.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42afda58fa2cf50914402d132cc1caacff116a85d10c72ab2082bb7c50021754" -dependencies = [ - "anyhow", - "backon", - "base64 0.22.1", - "bytes", - "chrono", - "crc32c", - "futures", - "getrandom 0.2.17", - "http 1.4.0", - "http-body 1.0.1", - "log", - "md-5", - "percent-encoding", - "quick-xml 0.38.4", - "reqsign", - "reqwest", - "serde", - "serde_json", - "sha2", - "tokio", - "uuid", -] - [[package]] name = "openssl-probe" version = "0.1.6" @@ -5569,9 +5232,9 @@ checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e" [[package]] name = "openssl-probe" -version = "0.2.0" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f50d9b3dabb09ecd771ad0aa242ca6894994c130308ca3d7684634df8037391" +checksum = "7c87def4c32ab89d880effc9e097653c8da5d6ef28e6b539d313baaacfbafcbe" [[package]] name = "opentelemetry" @@ -5595,11 +5258,11 @@ checksum = "91cf61a1868dacc576bf2b2a1c3e9ab150af7272909e80085c3173384fe11f76" dependencies = [ "async-trait", "futures-core", - "http 1.4.0", + "http", "opentelemetry", "opentelemetry-proto", "opentelemetry_sdk", - "prost", + "prost 0.13.5", "thiserror 1.0.69", "tokio", "tonic", @@ -5614,7 +5277,7 @@ checksum = "a6e05acbfada5ec79023c85368af14abd0b307c015e9064d249b2a950ef459a6" dependencies = [ "opentelemetry", "opentelemetry_sdk", - "prost", + "prost 0.13.5", "tonic", ] @@ -5639,6 +5302,19 @@ dependencies = [ "tracing", ] +[[package]] +name = "optd-core" +version = "0.18.0" +dependencies = [ + "anyhow", + "bitvec", + "itertools 0.14.0", + "pretty-xmlish", + "snafu 0.8.9", + "tokio", + "tracing", +] + [[package]] name = "option-ext" version = "0.2.0" @@ -5665,9 +5341,9 @@ dependencies = [ [[package]] name = "ordered-float" -version = "5.1.0" +version = "5.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f4779c6901a562440c3786d08192c6fbda7c1c2060edd10006b05ee35d10f2d" +checksum = "b7d950ca161dc355eaf28f82b11345ed76c6e1f6eb1f4f4479e0323b9e2fbd0e" dependencies = [ "num-traits", ] @@ -5703,15 +5379,9 @@ dependencies = [ "proc-macro2", "proc-macro2-diagnostics", "quote", - "syn 2.0.114", + "syn 2.0.117", ] -[[package]] -name = "outref" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a80800c0488c3a21695ea981a54918fbb37abf04f4d0720c453632255e2ff0e" - [[package]] name = "ownedbytes" version = "0.7.0" @@ -5771,9 +5441,9 @@ dependencies = [ [[package]] name = "parquet" -version = "56.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0dbd48ad52d7dccf8ea1b90a3ddbfaea4f69878dd7683e51c507d4bc52b5b27" +checksum = "6ee96b29972a257b855ff2341b37e61af5f12d6af1158b6dcdb5b31ea07bb3cb" dependencies = [ "ahash", "arrow-array", @@ -5784,24 +5454,21 @@ dependencies = [ "arrow-schema", "arrow-select", "base64 0.22.1", - "brotli 8.0.2", + "brotli", "bytes", "chrono", "flate2", - "futures", "half", "hashbrown 0.16.1", - "lz4_flex", - "num", + "lz4_flex 0.12.1", "num-bigint", - "object_store", + "num-integer", + "num-traits", "paste", - "ring", "seq-macro", "simdutf8", "snap", "thrift", - "tokio", "twox-hash", "zstd", ] @@ -5859,15 +5526,6 @@ dependencies = [ "serde_core", ] -[[package]] -name = "pem-rfc7468" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88b39c9bfcfc231068454382784bb460aae594343fb030d46e9f50a645418412" -dependencies = [ - "base64ct", -] - [[package]] name = "percent-encoding" version = "2.3.2" @@ -5882,9 +5540,9 @@ checksum = "df202b0b0f5b8e389955afd5f27b007b00fb948162953f1db9c70d2c7e3157d7" [[package]] name = "pest" -version = "2.8.5" +version = "2.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c9eb05c21a464ea704b53158d358a31e6425db2f63a1a7312268b05fe2b75f7" +checksum = "e0848c601009d37dfa3430c4666e147e49cdcf1b92ecd3e63657d8a5f19da662" dependencies = [ "memchr", "ucd-trie", @@ -5892,9 +5550,9 @@ dependencies = [ [[package]] name = "pest_derive" -version = "2.8.5" +version = "2.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68f9dbced329c441fa79d80472764b1a2c7e57123553b8519b36663a2fb234ed" +checksum = "11f486f1ea21e6c10ed15d5a7c77165d0ee443402f0780849d1768e7d9d6fe77" dependencies = [ "pest", "pest_generator", @@ -5902,22 +5560,22 @@ dependencies = [ [[package]] name = "pest_generator" -version = "2.8.5" +version = "2.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3bb96d5051a78f44f43c8f712d8e810adb0ebf923fc9ed2655a7f66f63ba8ee5" +checksum = "8040c4647b13b210a963c1ed407c1ff4fdfa01c31d6d2a098218702e6664f94f" dependencies = [ "pest", "pest_meta", "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] name = "pest_meta" -version = "2.8.5" +version = "2.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "602113b5b5e8621770cfd490cfd90b9f84ab29bd2b0e49ad83eb6d186cef2365" +checksum = "89815c69d36021a140146f26659a81d6c2afa33d216d736dd4be5381a7362220" dependencies = [ "pest", "sha2", @@ -5930,7 +5588,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772" dependencies = [ "fixedbitset", - "indexmap 2.13.0", + "indexmap 2.14.0", ] [[package]] @@ -5941,7 +5599,7 @@ checksum = "8701b58ea97060d5e5b155d383a69952a60943f0e6dfe30b04c287beb0b27455" dependencies = [ "fixedbitset", "hashbrown 0.15.5", - "indexmap 2.13.0", + "indexmap 2.14.0", "serde", ] @@ -5994,7 +5652,7 @@ dependencies = [ "phf_shared 0.11.3", "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -6017,79 +5675,35 @@ dependencies = [ [[package]] name = "pin-project" -version = "1.1.10" +version = "1.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "677f1add503faace112b9f1373e43e9e054bfdd22ff1a63c1bc485eaec6a6a8a" +checksum = "f1749c7ed4bcaf4c3d0a3efc28538844fb29bcdd7d2b67b2be7e20ba861ff517" dependencies = [ "pin-project-internal", ] [[package]] name = "pin-project-internal" -version = "1.1.10" +version = "1.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861" +checksum = "d9b20ed30f105399776b9c883e68e536ef602a16ae6f596d2c473591d6ad64c6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] name = "pin-project-lite" -version = "0.2.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" - -[[package]] -name = "pin-utils" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" - -[[package]] -name = "pkcs1" -version = "0.7.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8ffb9f10fa047879315e6625af03c164b16962a5368d724ed16323b68ace47f" -dependencies = [ - "der", - "pkcs8", - "spki", -] - -[[package]] -name = "pkcs5" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e847e2c91a18bfa887dd028ec33f2fe6f25db77db3619024764914affe8b69a6" -dependencies = [ - "aes", - "cbc", - "der", - "pbkdf2", - "scrypt", - "sha2", - "spki", -] - -[[package]] -name = "pkcs8" -version = "0.10.2" +version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f950b2377845cebe5cf8b5165cb3cc1a5e0fa5cfa3e1f7f55707d8fd82e0a7b7" -dependencies = [ - "der", - "pkcs5", - "rand_core 0.6.4", - "spki", -] +checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd" [[package]] name = "pkg-config" -version = "0.3.32" +version = "0.3.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" +checksum = "19f132c84eca552bf34cab8ec81f1c1dcc229b811638f9d283dceabe58c5569e" [[package]] name = "plotters" @@ -6131,14 +5745,14 @@ dependencies = [ "futures-util", "headers", "hex", - "http 1.4.0", + "http", "http-body-util", "httpdate", - "hyper 1.8.1", + "hyper", "hyper-util", "mime", "mime_guess", - "nix", + "nix 0.30.1", "parking_lot", "percent-encoding", "pin-project-lite", @@ -6151,7 +5765,7 @@ dependencies = [ "serde_urlencoded", "smallvec", "sync_wrapper", - "thiserror 2.0.17", + "thiserror 2.0.18", "tokio", "tokio-tungstenite", "tokio-util", @@ -6168,7 +5782,7 @@ dependencies = [ "proc-macro-crate", "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -6181,34 +5795,30 @@ dependencies = [ "concurrent-queue", "hermit-abi", "pin-project-lite", - "rustix 1.1.3", + "rustix 1.1.4", "windows-sys 0.61.2", ] -[[package]] -name = "pometry-storage" -version = "0.18.0" - [[package]] name = "portable-atomic" -version = "1.13.0" +version = "1.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f89776e4d69bb58bc6993e99ffa1d11f228b839984854c7daeb5d37f87cbe950" +checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49" [[package]] name = "portable-atomic-util" -version = "0.2.4" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8a2f0d8d040d7848a709caf78912debcc3f33ee4b3cac47d73d1e1069e83507" +checksum = "091397be61a01d4be58e7841595bd4bfedb15f1cd54977d79b8271e94ed799a3" dependencies = [ "portable-atomic", ] [[package]] name = "potential_utf" -version = "0.1.4" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b73949432f5e2a09657003c25bca5e19a0e9c84f8058ca374f49e0ebe605af77" +checksum = "0103b1cef7ec0cf76490e969665504990193874ea05c85ff9bab8b911d0a0564" dependencies = [ "zerovec", ] @@ -6228,6 +5838,12 @@ dependencies = [ "zerocopy", ] +[[package]] +name = "pretty-xmlish" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96b8aab53732b7a9c5c39bb0e130f85671b48b188ef258c3b9f7f5da1877382a" + [[package]] name = "pretty_assertions" version = "1.4.1" @@ -6245,23 +5861,23 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" dependencies = [ "proc-macro2", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] name = "proc-macro-crate" -version = "3.4.0" +version = "3.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "219cb19e96be00ab2e37d6e299658a0cfa83e52429179969b0f0121b4ac46983" +checksum = "e67ba7e9b2b56446f1d419b1d807906278ffa1a658a8a5d8a39dcb1f5a78614f" dependencies = [ - "toml_edit 0.23.10+spec-1.0.0", + "toml_edit 0.25.11+spec-1.1.0", ] [[package]] name = "proc-macro2" -version = "1.0.105" +version = "1.0.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "535d180e0ecab6268a3e718bb9fd44db66bbbc256257165fc699dadf70d16fe7" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" dependencies = [ "unicode-ident", ] @@ -6274,22 +5890,22 @@ checksum = "af066a9c399a26e020ada66a034357a868728e72cd426f3adcd35f80d88d88c8" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", "version_check", "yansi", ] [[package]] name = "proptest" -version = "1.9.0" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bee689443a2bd0a16ab0348b52ee43e3b2d1b1f931c8aa5c9f8de4c86fbe8c40" +checksum = "4b45fcc2344c680f5025fe57779faef368840d0bd1f42f216291f0dc4ace4744" dependencies = [ "bit-set", "bit-vec", "bitflags", "num-traits", - "rand 0.9.2", + "rand 0.9.4", "rand_chacha 0.9.0", "rand_xorshift", "regex-syntax", @@ -6300,13 +5916,13 @@ dependencies = [ [[package]] name = "proptest-derive" -version = "0.5.1" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ee1c9ac207483d5e7db4940700de86a9aae46ef90c48b57f99fe7edb8345e49" +checksum = "095a99f75c69734802359b682be8daaf8980296731f6470434ea2c652af1dd30" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -6316,7 +5932,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2796faa41db3ec313a31f7624d9286acf277b52de526150b7e69f3debf891ee5" dependencies = [ "bytes", - "prost-derive", + "prost-derive 0.13.5", +] + +[[package]] +name = "prost" +version = "0.14.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2ea70524a2f82d518bce41317d0fae74151505651af45faf1ffbd6fd33f0568" +dependencies = [ + "bytes", + "prost-derive 0.14.3", ] [[package]] @@ -6332,10 +5958,29 @@ dependencies = [ "once_cell", "petgraph 0.7.1", "prettyplease", - "prost", - "prost-types", + "prost 0.13.5", + "prost-types 0.13.5", + "regex", + "syn 2.0.117", + "tempfile", +] + +[[package]] +name = "prost-build" +version = "0.14.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "343d3bd7056eda839b03204e68deff7d1b13aba7af2b2fd16890697274262ee7" +dependencies = [ + "heck 0.5.0", + "itertools 0.14.0", + "log", + "multimap", + "petgraph 0.8.3", + "prettyplease", + "prost 0.14.3", + "prost-types 0.14.3", "regex", - "syn 2.0.114", + "syn 2.0.117", "tempfile", ] @@ -6349,7 +5994,20 @@ dependencies = [ "itertools 0.14.0", "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", +] + +[[package]] +name = "prost-derive" +version = "0.14.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "27c6023962132f4b30eb4c172c91ce92d933da334c59c23cddee82358ddafb0b" +dependencies = [ + "anyhow", + "itertools 0.14.0", + "proc-macro2", + "quote", + "syn 2.0.117", ] [[package]] @@ -6358,28 +6016,27 @@ version = "0.13.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "52c2c1bf36ddb1a1c396b3601a3cec27c2462e45f07c386894ec3ccf5332bd16" dependencies = [ - "prost", + "prost 0.13.5", ] [[package]] -name = "psm" -version = "0.1.28" +name = "prost-types" +version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d11f2fedc3b7dafdc2851bc52f277377c5473d378859be234bc7ebb593144d01" +checksum = "8991c4cbdb8bc5b11f0b074ffe286c30e523de90fee5ba8132f1399f23cb3dd7" dependencies = [ - "ar_archive_writer", - "cc", + "prost 0.14.3", ] [[package]] name = "pyo3" -version = "0.25.1" +version = "0.27.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8970a78afe0628a3e3430376fc5fd76b6b45c4d43360ffd6cdd40bdde72b682a" +checksum = "ab53c047fcd1a1d2a8820fe84f05d6be69e9526be40cb03b73f86b6b03e6d87d" dependencies = [ "chrono", "chrono-tz 0.10.4", - "indexmap 2.13.0", + "indexmap 2.14.0", "indoc", "inventory", "libc", @@ -6394,9 +6051,9 @@ dependencies = [ [[package]] name = "pyo3-arrow" -version = "0.11.0" +version = "0.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8454903e6b8406a98c9210e828f85af167aef1816208a1c04e837185a49eee5b" +checksum = "36b9f03cb749b0326951ebb30e39eda2f32b0b9205dce67e947e65779b8faffc" dependencies = [ "arrow-array", "arrow-buffer", @@ -6404,8 +6061,10 @@ dependencies = [ "arrow-data", "arrow-schema", "arrow-select", + "chrono", + "chrono-tz 0.10.4", "half", - "indexmap 2.13.0", + "indexmap 2.14.0", "numpy", "pyo3", "thiserror 1.0.69", @@ -6413,19 +6072,18 @@ dependencies = [ [[package]] name = "pyo3-build-config" -version = "0.25.1" +version = "0.27.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "458eb0c55e7ece017adeba38f2248ff3ac615e53660d7c71a238d7d2a01c7598" +checksum = "b455933107de8642b4487ed26d912c2d899dec6114884214a0b3bb3be9261ea6" dependencies = [ - "once_cell", "target-lexicon", ] [[package]] name = "pyo3-ffi" -version = "0.25.1" +version = "0.27.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7114fe5457c61b276ab77c5055f206295b812608083644a5c5b2640c3102565c" +checksum = "1c85c9cbfaddf651b1221594209aed57e9e5cff63c4d11d1feead529b872a089" dependencies = [ "libc", "pyo3-build-config", @@ -6433,27 +6091,37 @@ dependencies = [ [[package]] name = "pyo3-macros" -version = "0.25.1" +version = "0.27.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8725c0a622b374d6cb051d11a0983786448f7785336139c3c94f5aa6bef7e50" +checksum = "0a5b10c9bf9888125d917fb4d2ca2d25c8df94c7ab5a52e13313a07e050a3b02" dependencies = [ "proc-macro2", "pyo3-macros-backend", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] name = "pyo3-macros-backend" -version = "0.25.1" +version = "0.27.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4109984c22491085343c05b0dbc54ddc405c3cf7b4374fc533f5c3313a572ccc" +checksum = "03b51720d314836e53327f5871d4c0cfb4fb37cc2c4a11cc71907a86342c40f9" dependencies = [ "heck 0.5.0", "proc-macro2", "pyo3-build-config", "quote", - "syn 2.0.114", + "syn 2.0.117", +] + +[[package]] +name = "pythonize" +version = "0.27.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3a8f29db331e28c332c63496cfcbb822aca3d7320bc08b655d7fd0c29c50ede" +dependencies = [ + "pyo3", + "serde", ] [[package]] @@ -6468,46 +6136,15 @@ version = "1.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" -[[package]] -name = "quick-xml" -version = "0.37.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "331e97a1af0bf59823e6eadffe373d7b27f485be8748f71471c662c1f269b7fb" -dependencies = [ - "memchr", - "serde", -] - -[[package]] -name = "quick-xml" -version = "0.38.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b66c2058c55a409d601666cffe35f04333cf1013010882cec174a7467cd4e21c" -dependencies = [ - "memchr", - "serde", -] - [[package]] name = "quickcheck" -version = "1.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "588f6378e4dd99458b60ec275b4477add41ce4fa9f64dcba6f15adccb19b50d6" -dependencies = [ - "env_logger", - "log", - "rand 0.8.5", -] - -[[package]] -name = "quickcheck_macros" version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f71ee38b42f8459a88d3362be6f9b841ad2d5421844f61eb1c59c11bff3ac14a" +checksum = "95c589f335db0f6aaa168a7cd27b1fc6920f5e1470c804f814d9cd6e62a0f70b" dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.114", + "env_logger 0.11.10", + "log", + "rand 0.10.1", ] [[package]] @@ -6521,10 +6158,10 @@ dependencies = [ "pin-project-lite", "quinn-proto", "quinn-udp", - "rustc-hash 2.1.1", - "rustls 0.23.36", - "socket2 0.6.1", - "thiserror 2.0.17", + "rustc-hash 2.1.2", + "rustls", + "socket2 0.6.3", + "thiserror 2.0.18", "tokio", "tracing", "web-time", @@ -6532,20 +6169,20 @@ dependencies = [ [[package]] name = "quinn-proto" -version = "0.11.13" +version = "0.11.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1906b49b0c3bc04b5fe5d86a77925ae6524a19b816ae38ce1e426255f1d8a31" +checksum = "434b42fec591c96ef50e21e886936e66d3cc3f737104fdb9b737c40ffb94c098" dependencies = [ "bytes", "getrandom 0.3.4", "lru-slab", - "rand 0.9.2", + "rand 0.9.4", "ring", - "rustc-hash 2.1.1", - "rustls 0.23.36", + "rustc-hash 2.1.2", + "rustls", "rustls-pki-types", "slab", - "thiserror 2.0.17", + "thiserror 2.0.18", "tinyvec", "tracing", "web-time", @@ -6560,16 +6197,16 @@ dependencies = [ "cfg_aliases", "libc", "once_cell", - "socket2 0.6.1", + "socket2 0.6.3", "tracing", "windows-sys 0.60.2", ] [[package]] name = "quote" -version = "1.0.43" +version = "1.0.45" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc74d9a594b72ae6656596548f56f667211f8a97b3d4c3d467150794690dc40a" +checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" dependencies = [ "proc-macro2", ] @@ -6580,12 +6217,28 @@ version = "5.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" +[[package]] +name = "r-efi" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf" + [[package]] name = "radium" version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09" +[[package]] +name = "radix_trie" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c069c179fcdc6a2fe24d8d18305cf085fdbd4f922c041943e203685d6a1c58fd" +dependencies = [ + "endian-type", + "nibble_vec", +] + [[package]] name = "rand" version = "0.8.5" @@ -6599,14 +6252,24 @@ dependencies = [ [[package]] name = "rand" -version = "0.9.2" +version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" +checksum = "44c5af06bb1b7d3216d91932aed5265164bf384dc89cd6ba05cf59a35f5f76ea" dependencies = [ "rand_chacha 0.9.0", "rand_core 0.9.5", ] +[[package]] +name = "rand" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2e8e8bcc7961af1fdac401278c6a831614941f6164ee3bf4ce61b7edb162207" +dependencies = [ + "getrandom 0.4.2", + "rand_core 0.10.1", +] + [[package]] name = "rand_chacha" version = "0.3.1" @@ -6645,6 +6308,12 @@ dependencies = [ "getrandom 0.3.4", ] +[[package]] +name = "rand_core" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63b8176103e19a2643978565ca18b50549f6101881c443590420e4dc998a3c69" + [[package]] name = "rand_distr" version = "0.4.3" @@ -6662,7 +6331,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6a8615d50dcf34fa31f7ab52692afec947c4dd0ab803cc87cb3b0b4570ff7463" dependencies = [ "num-traits", - "rand 0.9.2", + "rand 0.9.4", ] [[package]] @@ -6690,9 +6359,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e47a395bdb55442b883c89062d6bcff25dc90fa5f8369af81e0ac6d49d78cf81" dependencies = [ "ahash", - "brotli 8.0.2", + "brotli", "paste", - "rand 0.9.2", + "rand 0.9.4", "unicase", ] @@ -6709,18 +6378,15 @@ dependencies = [ "ahash", "arrow", "arrow-array", - "arrow-buffer", "arrow-csv", - "arrow-data", - "arrow-ipc", "arrow-json", "arrow-schema", "arrow-select", "async-openai", "async-trait", - "axum 0.8.8", + "axum 0.8.9", "bigdecimal", - "bincode", + "bincode 2.0.1", "bytemuck", "bzip2 0.4.4", "chrono", @@ -6730,15 +6396,18 @@ dependencies = [ "datafusion-physical-expr", "datafusion-physical-expr-common", "datafusion-physical-plan", + "db4-graph", + "db4-storage", + "disjoint-sets", "display-error-chain", "dotenv", "either", "flate2", "futures-util", "glam", - "hashbrown 0.15.5", + "hashbrown 0.14.5", "heed", - "indexmap 2.13.0", + "indexmap 2.14.0", "indoc", "iter-enum", "itertools 0.13.0", @@ -6758,20 +6427,18 @@ dependencies = [ "ouroboros", "parking_lot", "parquet", - "pometry-storage", "pretty_assertions", "proptest", "proptest-derive", - "prost", - "prost-build", - "prost-types", + "prost 0.13.5", + "prost-build 0.13.5", + "prost-types 0.13.5", "pyo3", "pyo3-arrow", + "pythonize", "quad-rand", - "quickcheck", - "quickcheck_macros", - "rand 0.8.5", - "rand_distr 0.4.3", + "rand 0.9.4", + "rand_distr 0.5.1", "raphtory", "raphtory-api", "raphtory-core", @@ -6779,8 +6446,8 @@ dependencies = [ "rayon", "regex", "reqwest", - "roaring", - "rustc-hash 2.1.1", + "roaring 0.10.12", + "rustc-hash 2.1.2", "serde", "serde_arrow", "serde_json", @@ -6788,7 +6455,8 @@ dependencies = [ "strsim", "tantivy 0.22.1", "tempfile", - "thiserror 2.0.17", + "thiserror 2.0.18", + "tikv-jemallocator", "tokio", "tracing", "uuid", @@ -6801,12 +6469,14 @@ name = "raphtory-api" version = "0.18.0" dependencies = [ "arrow-array", + "arrow-buffer", "arrow-ipc", "arrow-schema", "bigdecimal", "bytemuck", "chrono", "dashmap", + "derive_more", "display-error-chain", "iter-enum", "itertools 0.13.0", @@ -6817,18 +6487,32 @@ dependencies = [ "proptest", "pyo3", "pyo3-arrow", - "rand 0.8.5", + "rand 0.9.4", "rayon", - "rustc-hash 2.1.1", + "rustc-hash 2.1.2", "serde", + "serde_arrow", "serde_json", "sorted_vector_map", - "thiserror 2.0.17", + "thiserror 2.0.18", "tracing", "tracing-subscriber", "twox-hash", ] +[[package]] +name = "raphtory-api-macros" +version = "0.18.0" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "raphtory-auth-noop" +version = "0.18.0" + [[package]] name = "raphtory-benchmark" version = "0.18.0" @@ -6839,7 +6523,7 @@ dependencies = [ "fake", "itertools 0.13.0", "once_cell", - "rand 0.8.5", + "rand 0.9.4", "raphtory", "raphtory-api", "rayon", @@ -6854,10 +6538,15 @@ dependencies = [ name = "raphtory-core" version = "0.18.0" dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", "bigdecimal", "chrono", "dashmap", "either", + "hashbrown 0.14.5", "iter-enum", "itertools 0.13.0", "lock_api", @@ -6869,39 +6558,9 @@ dependencies = [ "raphtory-api", "rayon", "regex", - "rustc-hash 2.1.1", - "serde", - "thiserror 2.0.17", -] - -[[package]] -name = "raphtory-cypher" -version = "0.18.0" -dependencies = [ - "arrow", - "arrow-array", - "arrow-buffer", - "arrow-schema", - "async-trait", - "clap", - "datafusion", - "futures", - "itertools 0.13.0", - "lazy_static", - "pest", - "pest_derive", - "pometry-storage", - "pretty_assertions", - "proptest", - "raphtory", - "rayon", + "rustc-hash 2.1.2", "serde", - "serde_json", - "sqlparser", - "tempfile", - "thiserror 2.0.17", - "tokio", - "tracing", + "thiserror 2.0.18", ] [[package]] @@ -6914,6 +6573,7 @@ dependencies = [ "async-graphql-poem", "base64 0.22.1", "base64-compat", + "bigdecimal", "chrono", "clap", "config", @@ -6921,7 +6581,7 @@ dependencies = [ "dynamic-graphql", "futures-util", "itertools 0.13.0", - "jsonwebtoken 10.3.0", + "jsonwebtoken", "minijinja", "moka", "once_cell", @@ -6939,14 +6599,14 @@ dependencies = [ "rayon", "reqwest", "rust-embed", - "rustc-hash 2.1.1", + "rustc-hash 2.1.2", "serde", "serde_json", "spki", "strum 0.27.2", "strum_macros 0.27.2", "tempfile", - "thiserror 2.0.17", + "thiserror 2.0.18", "tokio", "tracing", "tracing-opentelemetry", @@ -6956,14 +6616,35 @@ dependencies = [ "zip", ] +[[package]] +name = "raphtory-itertools" +version = "0.18.0" +dependencies = [ + "criterion", + "itertools 0.13.0", + "proptest", + "rand 0.9.4", +] + [[package]] name = "raphtory-pymodule" version = "0.18.0" dependencies = [ + "clam-core", "pyo3", "pyo3-build-config", "raphtory", + "raphtory-auth-noop", + "raphtory-graphql", +] + +[[package]] +name = "raphtory-server" +version = "0.18.0" +dependencies = [ + "raphtory-auth-noop", "raphtory-graphql", + "tokio", ] [[package]] @@ -6973,18 +6654,20 @@ dependencies = [ "arrow-array", "arrow-schema", "bigdecimal", + "db4-graph", + "db4-storage", "iter-enum", "itertools 0.13.0", "num-traits", "parking_lot", - "pometry-storage", "proptest", "raphtory-api", + "raphtory-api-macros", "raphtory-core", "rayon", "serde", "tempfile", - "thiserror 2.0.17", + "thiserror 2.0.18", ] [[package]] @@ -7017,9 +6700,9 @@ checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3" [[package]] name = "rayon" -version = "1.11.0" +version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f" +checksum = "fb39b166781f92d482534ef4b4b1b2568f42613b53e5b6c160e24cfbfa30926d" dependencies = [ "either", "rayon-core", @@ -7035,26 +6718,6 @@ dependencies = [ "crossbeam-utils", ] -[[package]] -name = "recursive" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0786a43debb760f491b1bc0269fe5e84155353c67482b9e60d0cfb596054b43e" -dependencies = [ - "recursive-proc-macro-impl", - "stacker", -] - -[[package]] -name = "recursive-proc-macro-impl" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b" -dependencies = [ - "quote", - "syn 2.0.114", -] - [[package]] name = "redox_syscall" version = "0.5.18" @@ -7072,7 +6735,7 @@ checksum = "a4e608c6638b9c18977b00b475ac1f28d14e84b27d8d42f70e0bf1e3dec127ac" dependencies = [ "getrandom 0.2.17", "libredox", - "thiserror 2.0.17", + "thiserror 2.0.18", ] [[package]] @@ -7092,14 +6755,14 @@ checksum = "b7186006dcb21920990093f30e3dea63b7d6e977bf1256be20c3563a5db070da" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] name = "regex" -version = "1.12.2" +version = "1.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4" +checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276" dependencies = [ "aho-corasick", "memchr", @@ -7109,58 +6772,20 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.13" +version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c" +checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" dependencies = [ "aho-corasick", "memchr", "regex-syntax", ] -[[package]] -name = "regex-lite" -version = "0.1.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d942b98df5e658f56f20d592c7f868833fe38115e65c33003d8cd224b0155da" - [[package]] name = "regex-syntax" -version = "0.8.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" - -[[package]] -name = "reqsign" -version = "0.16.5" +version = "0.8.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43451dbf3590a7590684c25fb8d12ecdcc90ed3ac123433e500447c7d77ed701" -dependencies = [ - "anyhow", - "async-trait", - "base64 0.22.1", - "chrono", - "form_urlencoded", - "getrandom 0.2.17", - "hex", - "hmac", - "home", - "http 1.4.0", - "jsonwebtoken 9.3.1", - "log", - "once_cell", - "percent-encoding", - "quick-xml 0.37.5", - "rand 0.8.5", - "reqwest", - "rsa", - "rust-ini 0.21.3", - "serde", - "serde_json", - "sha1", - "sha2", - "tokio", -] +checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" [[package]] name = "reqwest" @@ -7174,12 +6799,12 @@ dependencies = [ "futures-channel", "futures-core", "futures-util", - "h2 0.4.13", - "http 1.4.0", - "http-body 1.0.1", + "h2", + "http", + "http-body", "http-body-util", - "hyper 1.8.1", - "hyper-rustls 0.27.7", + "hyper", + "hyper-rustls", "hyper-util", "js-sys", "log", @@ -7188,7 +6813,7 @@ dependencies = [ "percent-encoding", "pin-project-lite", "quinn", - "rustls 0.23.36", + "rustls", "rustls-native-certs 0.8.3", "rustls-pki-types", "serde", @@ -7196,7 +6821,7 @@ dependencies = [ "serde_urlencoded", "sync_wrapper", "tokio", - "tokio-rustls 0.26.4", + "tokio-rustls", "tokio-util", "tower 0.5.3", "tower-http", @@ -7206,7 +6831,7 @@ dependencies = [ "wasm-bindgen-futures", "wasm-streams", "web-sys", - "webpki-roots 1.0.5", + "webpki-roots 1.0.6", ] [[package]] @@ -7264,6 +6889,16 @@ dependencies = [ "byteorder", ] +[[package]] +name = "roaring" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ba9ce64a8f45d7fc86358410bb1a82e8c987504c0d4900e9141d69a9f26c885" +dependencies = [ + "bytemuck", + "byteorder", +] + [[package]] name = "ron" version = "0.8.1" @@ -7276,32 +6911,11 @@ dependencies = [ "serde_derive", ] -[[package]] -name = "rsa" -version = "0.9.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8573f03f5883dcaebdfcf4725caa1ecb9c15b2ef50c43a07b816e06799bb12d" -dependencies = [ - "const-oid", - "digest", - "num-bigint-dig", - "num-integer", - "num-traits", - "pkcs1", - "pkcs8", - "rand_core 0.6.4", - "sha2", - "signature", - "spki", - "subtle", - "zeroize", -] - [[package]] name = "rust-embed" -version = "8.10.0" +version = "8.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f783a9e226b5319beefe29d45941f559ace8b56801bb8355be17eea277fc8272" +checksum = "04113cb9355a377d83f06ef1f0a45b8ab8cd7d8b1288160717d66df5c7988d27" dependencies = [ "rust-embed-impl", "rust-embed-utils", @@ -7310,23 +6924,23 @@ dependencies = [ [[package]] name = "rust-embed-impl" -version = "8.10.0" +version = "8.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "303d4e979140595f1d824b3dd53a32684835fa32425542056826521ac279f538" +checksum = "da0902e4c7c8e997159ab384e6d0fc91c221375f6894346ae107f47dd0f3ccaa" dependencies = [ "proc-macro2", "quote", "rust-embed-utils", "shellexpand", - "syn 2.0.114", + "syn 2.0.117", "walkdir", ] [[package]] name = "rust-embed-utils" -version = "8.10.0" +version = "8.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f6b4ab509cae251bd524d2425d746b0af0018f5a81fc1eaecdd4e661c8ab3a0" +checksum = "5bcdef0be6fe7f6fa333b1073c949729274b05f123a0ad7efcb8efd878e5c3b1" dependencies = [ "sha2", "walkdir", @@ -7354,16 +6968,6 @@ dependencies = [ "ordered-multimap", ] -[[package]] -name = "rust-ini" -version = "0.21.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "796e8d2b6696392a43bea58116b667fb4c29727dc5abd27d6acf338bb4f688c7" -dependencies = [ - "cfg-if", - "ordered-multimap", -] - [[package]] name = "rust-stemmers" version = "1.2.0" @@ -7382,9 +6986,9 @@ checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" [[package]] name = "rustc-hash" -version = "2.1.1" +version = "2.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" +checksum = "94300abf3f1ae2e2b8ffb7b58043de3d399c73fa6f4b73826402a5c457614dbe" [[package]] name = "rustc_version" @@ -7410,40 +7014,27 @@ dependencies = [ [[package]] name = "rustix" -version = "1.1.3" +version = "1.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "146c9e247ccc180c1f61615433868c99f3de3ae256a30a43b49f67c2d9171f34" +checksum = "b6fe4565b9518b83ef4f91bb47ce29620ca828bd32cb7e408f0062e9930ba190" dependencies = [ "bitflags", "errno", "libc", - "linux-raw-sys 0.11.0", + "linux-raw-sys 0.12.1", "windows-sys 0.61.2", ] [[package]] name = "rustls" -version = "0.21.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f56a14d1f48b391359b22f731fd4bd7e43c97f3c50eee276f3aa09c94784d3e" -dependencies = [ - "log", - "ring", - "rustls-webpki 0.101.7", - "sct", -] - -[[package]] -name = "rustls" -version = "0.23.36" +version = "0.23.38" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c665f33d38cea657d9614f766881e4d510e0eda4239891eea56b4cadcf01801b" +checksum = "69f9466fb2c14ea04357e91413efb882e2a6d4a406e625449bc0a5d360d53a21" dependencies = [ - "aws-lc-rs", "once_cell", "ring", "rustls-pki-types", - "rustls-webpki 0.103.8", + "rustls-webpki", "subtle", "zeroize", ] @@ -7467,10 +7058,10 @@ version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "612460d5f7bea540c490b2b6395d8e34a953e52b491accd6c86c8164c5932a63" dependencies = [ - "openssl-probe 0.2.0", + "openssl-probe 0.2.1", "rustls-pki-types", "schannel", - "security-framework 3.5.1", + "security-framework 3.7.0", ] [[package]] @@ -7484,9 +7075,9 @@ dependencies = [ [[package]] name = "rustls-pki-types" -version = "1.13.2" +version = "1.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21e6f2ab2928ca4291b86736a8bd920a277a399bba1589409d72154ff87c1282" +checksum = "be040f8b0a225e40375822a563fa9524378b9d63112f53e19ffff34df5d33fdd" dependencies = [ "web-time", "zeroize", @@ -7494,21 +7085,10 @@ dependencies = [ [[package]] name = "rustls-webpki" -version = "0.101.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765" -dependencies = [ - "ring", - "untrusted 0.9.0", -] - -[[package]] -name = "rustls-webpki" -version = "0.103.8" +version = "0.103.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2ffdfa2f5286e2247234e03f680868ac2815974dc39e00ea15adc445d0aafe52" +checksum = "8279bb85272c9f10811ae6a6c547ff594d6a7f3c6c6b02ee9726d1d0dcfcdd06" dependencies = [ - "aws-lc-rs", "ring", "rustls-pki-types", "untrusted 0.9.0", @@ -7533,19 +7113,32 @@ dependencies = [ ] [[package]] -name = "ryu" -version = "1.0.22" +name = "rustyline" +version = "13.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a50f4cf475b65d88e057964e0e9bb1f0aa9bbb2036dc65c64596b42932536984" +checksum = "02a2d683a4ac90aeef5b1013933f6d977bd37d51ff3f4dad829d4931a7e6be86" +dependencies = [ + "bitflags", + "cfg-if", + "clipboard-win", + "fd-lock", + "home", + "libc", + "log", + "memchr", + "nix 0.27.1", + "radix_trie", + "unicode-segmentation", + "unicode-width 0.1.14", + "utf8parse", + "winapi", +] [[package]] -name = "salsa20" -version = "0.10.2" +name = "ryu" +version = "1.0.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97a22f5af31f73a954c10289c93e8a50cc23d971e80ee446f1f6f7137a088213" -dependencies = [ - "cipher", -] +checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f" [[package]] name = "same-file" @@ -7558,9 +7151,9 @@ dependencies = [ [[package]] name = "schannel" -version = "0.1.28" +version = "0.1.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "891d81b926048e76efe18581bf793546b4c0eaf8448d72be8de2bbee5fd166e1" +checksum = "91c1b7e4904c873ef0710c1f407dde2e6287de2bebc1bbbf7d430bb7cbffd939" dependencies = [ "windows-sys 0.61.2", ] @@ -7579,9 +7172,9 @@ dependencies = [ [[package]] name = "schemars" -version = "1.2.0" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "54e910108742c57a770f492731f99be216a52fadd361b06c8fb59d74ccc267d2" +checksum = "a2b42f36aa1cd011945615b92222f6bf73c599a102a300334cd7f8dbeec726cc" dependencies = [ "dyn-clone", "ref-cast", @@ -7593,34 +7186,13 @@ dependencies = [ name = "scoped-tls" version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1cf6437eb19a8f4a6cc0f7dca544973b0b78843adbfeb3683d1a94a0024a294" - -[[package]] -name = "scopeguard" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" - -[[package]] -name = "scrypt" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0516a385866c09368f0b5bcd1caff3366aace790fcd46e2bb032697bb172fd1f" -dependencies = [ - "pbkdf2", - "salsa20", - "sha2", -] +checksum = "e1cf6437eb19a8f4a6cc0f7dca544973b0b78843adbfeb3683d1a94a0024a294" [[package]] -name = "sct" -version = "0.7.1" +name = "scopeguard" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414" -dependencies = [ - "ring", - "untrusted 0.9.0", -] +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" [[package]] name = "secrecy" @@ -7647,9 +7219,9 @@ dependencies = [ [[package]] name = "security-framework" -version = "3.5.1" +version = "3.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3297343eaf830f66ede390ea39da1d462b6b0c1b000f420d0a83f898bbbe6ef" +checksum = "b7f4bc775c73d9a02cde8bf7b2ec4c9d12743edf609006c7facc23998404cd1d" dependencies = [ "bitflags", "core-foundation 0.10.1", @@ -7660,9 +7232,9 @@ dependencies = [ [[package]] name = "security-framework-sys" -version = "2.15.0" +version = "2.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc1f0cbffaac4852523ce30d8bd3c5cdc873501d96ff467ca09b6767bb8cd5c0" +checksum = "6ce2691df843ecc5d231c0b14ece2acc3efb62c0a398c7e1d875f3983ce020e3" dependencies = [ "core-foundation-sys", "libc", @@ -7670,9 +7242,9 @@ dependencies = [ [[package]] name = "semver" -version = "1.0.27" +version = "1.0.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2" +checksum = "8a7852d02fc848982e0c167ef163aaff9cd91dc640ba85e263cb1ce46fae51cd" [[package]] name = "seq-macro" @@ -7722,7 +7294,7 @@ checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -7757,7 +7329,7 @@ checksum = "175ee3e80ae9982737ca543e96133087cbd9a485eecc3bc4de9c1a37b47ea59c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -7783,17 +7355,17 @@ dependencies = [ [[package]] name = "serde_with" -version = "3.16.1" +version = "3.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fa237f2807440d238e0364a218270b98f767a00d3dada77b1c53ae88940e2e7" +checksum = "dd5414fad8e6907dbdd5bc441a50ae8d6e26151a03b1de04d89a5576de61d01f" dependencies = [ "base64 0.22.1", "chrono", "hex", "indexmap 1.9.3", - "indexmap 2.13.0", + "indexmap 2.14.0", "schemars 0.9.0", - "schemars 1.2.0", + "schemars 1.2.1", "serde_core", "serde_json", "serde_with_macros", @@ -7802,14 +7374,14 @@ dependencies = [ [[package]] name = "serde_with_macros" -version = "3.16.1" +version = "3.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52a8e3ca0ca629121f70ab50f95249e5a6f925cc0f6ffe8256c45b728875706c" +checksum = "d3db8978e608f1fe7357e211969fd9abdcae80bac1ba7a3369bb7eb6b404eb65" dependencies = [ - "darling 0.21.3", + "darling 0.23.0", "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -7819,7 +7391,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba" dependencies = [ "cfg-if", - "cpufeatures", + "cpufeatures 0.2.17", "digest", ] @@ -7830,7 +7402,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" dependencies = [ "cfg-if", - "cpufeatures", + "cpufeatures 0.2.17", "digest", ] @@ -7845,9 +7417,9 @@ dependencies = [ [[package]] name = "shellexpand" -version = "3.1.1" +version = "3.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b1fdf65dd6331831494dd616b30351c38e96e45921a27745cf98490458b90bb" +checksum = "32824fab5e16e6c4d86dc1ba84489390419a39f97699852b66480bb87d297ed8" dependencies = [ "dirs", ] @@ -7874,15 +7446,14 @@ version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "77549399552de45a898a580c1b41d445bf730df867cc44e6c0233bbc4b8329de" dependencies = [ - "digest", "rand_core 0.6.4", ] [[package]] name = "simd-adler32" -version = "0.3.8" +version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2" +checksum = "703d5c7ef118737c72f1af64ad2f6f8c5e1921f818cdcb97b8fe6fc69bf66214" [[package]] name = "simdutf8" @@ -7890,6 +7461,12 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e" +[[package]] +name = "similar" +version = "2.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbbb5d9659141646ae647b42fe094daf6c6192d1620870b449d9557f748b2daa" + [[package]] name = "simple_asn1" version = "0.6.4" @@ -7898,15 +7475,15 @@ checksum = "0d585997b0ac10be3c5ee635f1bab02d512760d14b7c468801ac8a01d9ae5f1d" dependencies = [ "num-bigint", "num-traits", - "thiserror 2.0.17", + "thiserror 2.0.18", "time", ] [[package]] name = "siphasher" -version = "1.0.1" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d" +checksum = "b2aa850e253778c88a04c3d7323b043aeda9d3e30d5971937c1855769763678e" [[package]] name = "sketches-ddsketch" @@ -7919,18 +7496,27 @@ dependencies = [ [[package]] name = "sketches-ddsketch" -version = "0.3.0" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1e9a774a6c28142ac54bb25d25562e6bcf957493a184f15ad4eebccb23e410a" +checksum = "0c6f73aeb92d671e0cc4dca167e59b2deb6387c375391bc99ee743f326994a2b" dependencies = [ "serde", ] [[package]] name = "slab" -version = "0.4.11" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5" + +[[package]] +name = "slotmap" +version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a2ae44ef20feb57a68b23d846850f861394c2e02dc425a50098ae8c90267589" +checksum = "bdd58c3c93c3d278ca835519292445cb4b0d4dc59ccfdf7ceadaab3f8aeb4038" +dependencies = [ + "version_check", +] [[package]] name = "smallvec" @@ -7944,7 +7530,16 @@ version = "0.8.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6e84b3f4eacbf3a1ce05eac6763b4d629d60cbc94d632e4092c54ade71f1e1a2" dependencies = [ - "snafu-derive", + "snafu-derive 0.8.9", +] + +[[package]] +name = "snafu" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1d4bced6a69f90b2056c03dcff2c4737f98d6fb9e0853493996e1d253ca29c6" +dependencies = [ + "snafu-derive 0.9.0", ] [[package]] @@ -7956,7 +7551,19 @@ dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", +] + +[[package]] +name = "snafu-derive" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "54254b8531cafa275c5e096f62d48c81435d1015405a91198ddb11e967301d40" +dependencies = [ + "heck 0.5.0", + "proc-macro2", + "quote", + "syn 2.0.117", ] [[package]] @@ -7965,6 +7572,15 @@ version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" +[[package]] +name = "snb" +version = "0.18.0" +dependencies = [ + "chrono", + "flate2", + "raphtory", +] + [[package]] name = "socket2" version = "0.5.10" @@ -7977,12 +7593,12 @@ dependencies = [ [[package]] name = "socket2" -version = "0.6.1" +version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17129e116933cf371d018bb80ae557e889637989d8638274fb25622827b03881" +checksum = "3a766e1110788c36f4fa1c2b71b387a7815aa65f88ce0229841826633d93723e" dependencies = [ "libc", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -8007,18 +7623,16 @@ version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d91ed6c858b01f942cd56b37a94b3e0a1798290327d1236e4d9cf4eaca44d29d" dependencies = [ - "base64ct", "der", ] [[package]] name = "sqlparser" -version = "0.58.0" +version = "0.59.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec4b661c54b1e4b603b37873a18c59920e4c51ea8ea2cf527d925424dbd4437c" +checksum = "4591acadbcf52f0af60eafbb2c003232b2b4cd8de5f0e9437cb8b1b59046cc0f" dependencies = [ "log", - "recursive", "sqlparser_derive", ] @@ -8030,7 +7644,7 @@ checksum = "da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -8039,19 +7653,6 @@ version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" -[[package]] -name = "stacker" -version = "0.1.22" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1f8b29fb42aafcea4edeeb6b2f2d7ecd0d969c48b4cf0d2e64aafc471dd6e59" -dependencies = [ - "cc", - "cfg-if", - "libc", - "psm", - "windows-sys 0.59.0", -] - [[package]] name = "static_assertions" version = "1.1.0" @@ -8119,7 +7720,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -8131,7 +7732,7 @@ dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -8153,9 +7754,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.114" +version = "2.0.117" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4d107df263a3013ef9b1879b0df87d706ff80f65a86ea879bd9c31f9b307c2a" +checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" dependencies = [ "proc-macro2", "quote", @@ -8188,7 +7789,7 @@ checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -8220,7 +7821,7 @@ dependencies = [ "levenshtein_automata", "log", "lru", - "lz4_flex", + "lz4_flex 0.11.6", "measure_time 0.8.3", "memmap2", "num_cpus", @@ -8273,7 +7874,7 @@ dependencies = [ "levenshtein_automata", "log", "lru", - "lz4_flex", + "lz4_flex 0.11.6", "measure_time 0.9.0", "memmap2", "once_cell", @@ -8281,10 +7882,10 @@ dependencies = [ "rayon", "regex", "rust-stemmers", - "rustc-hash 2.1.1", + "rustc-hash 2.1.2", "serde", "serde_json", - "sketches-ddsketch 0.3.0", + "sketches-ddsketch 0.3.1", "smallvec", "tantivy-bitpacker 0.8.0", "tantivy-columnar 0.5.0", @@ -8294,7 +7895,7 @@ dependencies = [ "tantivy-stacker 0.5.0", "tantivy-tokenizer-api 0.5.0", "tempfile", - "thiserror 2.0.17", + "thiserror 2.0.18", "time", "uuid", "winapi", @@ -8481,31 +8082,62 @@ checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" [[package]] name = "target-lexicon" -version = "0.13.4" +version = "0.13.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1dd07eb858a2067e2f3c7155d54e929265c264e6f37efe3ee7a8d1b5a1dd0ba" +checksum = "adb6935a6f5c20170eeceb1a3835a49e12e19d792f6dd344ccc76a985ca5a6ca" [[package]] name = "tempfile" -version = "3.24.0" +version = "3.27.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "655da9c7eb6305c55742045d5a8d2037996d61d8de95806335c7c86ce0f82e9c" +checksum = "32497e9a4c7b38532efcdebeef879707aa9f794296a4f0244f6f69e9bc8574bd" dependencies = [ "fastrand", - "getrandom 0.3.4", + "getrandom 0.4.2", "once_cell", - "rustix 1.1.3", + "rustix 1.1.4", "windows-sys 0.61.2", ] +[[package]] +name = "termcolor" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755" +dependencies = [ + "winapi-util", +] + [[package]] name = "terminal_size" -version = "0.4.3" +version = "0.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60b8cb979cb11c32ce1603f8137b22262a9d131aaa5c37b5678025f22b8becd0" +checksum = "230a1b821ccbd75b185820a1f1ff7b14d21da1e442e22c0863ea5f08771a8874" dependencies = [ - "rustix 1.1.3", - "windows-sys 0.60.2", + "rustix 1.1.4", + "windows-sys 0.61.2", +] + +[[package]] +name = "test-log" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37d53ac171c92a39e4769491c4b4dde7022c60042254b5fc044ae409d34a24d4" +dependencies = [ + "env_logger 0.11.10", + "test-log-macros", + "tracing-subscriber", +] + +[[package]] +name = "test-log-macros" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be35209fd0781c5401458ab66e4f98accf63553e8fae7425503e92fdd319783b" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", ] [[package]] @@ -8519,11 +8151,11 @@ dependencies = [ [[package]] name = "thiserror" -version = "2.0.17" +version = "2.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f63587ca0f12b72a0600bcba1d40081f830876000bb46dd2337a3051618f4fc8" +checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" dependencies = [ - "thiserror-impl 2.0.17", + "thiserror-impl 2.0.18", ] [[package]] @@ -8534,18 +8166,18 @@ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] name = "thiserror-impl" -version = "2.0.17" +version = "2.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913" +checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -8577,6 +8209,26 @@ dependencies = [ "ordered-float 2.10.1", ] +[[package]] +name = "tikv-jemalloc-sys" +version = "0.6.1+5.3.0-1-ge13ca993e8ccb9ba9847cc330696e02839f328f7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd8aa5b2ab86a2cefa406d889139c162cbb230092f7d1d7cbc1716405d852a3b" +dependencies = [ + "cc", + "libc", +] + +[[package]] +name = "tikv-jemallocator" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0359b4327f954e0567e69fb191cf1436617748813819c94b8cd4a431422d053a" +dependencies = [ + "libc", + "tikv-jemalloc-sys", +] + [[package]] name = "time" version = "0.3.47" @@ -8619,9 +8271,9 @@ dependencies = [ [[package]] name = "tinystr" -version = "0.8.2" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42d3e9c45c09de15d06dd8acf5f4e0e399e85927b7f00711024eb7ae10fa4869" +checksum = "c8323304221c2a851516f22236c5722a72eaa19749016521d6dff0824447d96d" dependencies = [ "displaydoc", "zerovec", @@ -8639,10 +8291,11 @@ dependencies = [ [[package]] name = "tinyvec" -version = "1.10.0" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfa5fdc3bce6191a1dbc8c02d5c8bffcf557bafa17c124c5264a458f1b0613fa" +checksum = "3e61e67053d25a4e82c844e8424039d9745781b3fc4f32b8d55ed50f5f667ef3" dependencies = [ + "serde_core", "tinyvec_macros", ] @@ -8654,9 +8307,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.49.0" +version = "1.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72a2903cd7736441aac9df9d7688bd0ce48edccaadf181c3b90be801e81d3d86" +checksum = "a91135f59b1cbf38c91e73cf3386fca9bb77915c45ce2771460c9d92f0f3d776" dependencies = [ "bytes", "libc", @@ -8664,30 +8317,21 @@ dependencies = [ "parking_lot", "pin-project-lite", "signal-hook-registry", - "socket2 0.6.1", + "socket2 0.6.3", "tokio-macros", + "tracing", "windows-sys 0.61.2", ] [[package]] name = "tokio-macros" -version = "2.6.0" +version = "2.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5" +checksum = "385a6cb71ab9ab790c5fe8d67f1645e6c450a7ce006a33de03daa956cf70a496" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", -] - -[[package]] -name = "tokio-rustls" -version = "0.24.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081" -dependencies = [ - "rustls 0.21.12", - "tokio", + "syn 2.0.117", ] [[package]] @@ -8696,7 +8340,7 @@ version = "0.26.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1729aa945f29d91ba541258c8df89027d5792d85a8841fb65e8bf0f4ede4ef61" dependencies = [ - "rustls 0.23.36", + "rustls", "tokio", ] @@ -8760,9 +8404,9 @@ dependencies = [ [[package]] name = "toml_datetime" -version = "0.7.5+spec-1.1.0" +version = "1.1.1+spec-1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92e1cfed4a3038bc5a127e35a2d360f145e1f4b971b551a2ba5fd7aedf7e1347" +checksum = "3165f65f62e28e0115a00b2ebdd37eb6f3b641855f9d636d3cd4103767159ad7" dependencies = [ "serde_core", ] @@ -8773,33 +8417,33 @@ version = "0.22.27" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a" dependencies = [ - "indexmap 2.13.0", + "indexmap 2.14.0", "serde", "serde_spanned", "toml_datetime 0.6.11", "toml_write", - "winnow", + "winnow 0.7.15", ] [[package]] name = "toml_edit" -version = "0.23.10+spec-1.0.0" +version = "0.25.11+spec-1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "84c8b9f757e028cee9fa244aea147aab2a9ec09d5325a9b01e0a49730c2b5269" +checksum = "0b59c4d22ed448339746c59b905d24568fcbb3ab65a500494f7b8c3e97739f2b" dependencies = [ - "indexmap 2.13.0", - "toml_datetime 0.7.5+spec-1.1.0", + "indexmap 2.14.0", + "toml_datetime 1.1.1+spec-1.1.0", "toml_parser", - "winnow", + "winnow 1.0.1", ] [[package]] name = "toml_parser" -version = "1.0.6+spec-1.1.0" +version = "1.1.2+spec-1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3198b4b0a8e11f09dd03e133c0280504d0801269e9afa46362ffde1cbeebf44" +checksum = "a2abe9b86193656635d2411dc43050282ca48aa31c2451210f4202550afb7526" dependencies = [ - "winnow", + "winnow 1.0.1", ] [[package]] @@ -8819,16 +8463,16 @@ dependencies = [ "axum 0.7.9", "base64 0.22.1", "bytes", - "h2 0.4.13", - "http 1.4.0", - "http-body 1.0.1", + "h2", + "http", + "http-body", "http-body-util", - "hyper 1.8.1", + "hyper", "hyper-timeout", "hyper-util", "percent-encoding", "pin-project", - "prost", + "prost 0.13.5", "socket2 0.5.10", "tokio", "tokio-stream", @@ -8885,8 +8529,8 @@ dependencies = [ "bytes", "futures-core", "futures-util", - "http 1.4.0", - "http-body 1.0.1", + "http", + "http-body", "http-body-util", "iri-string", "pin-project-lite", @@ -8929,7 +8573,7 @@ checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -8973,9 +8617,9 @@ dependencies = [ [[package]] name = "tracing-subscriber" -version = "0.3.22" +version = "0.3.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f30143827ddab0d256fd843b7a66d164e9f271cfa0dde49142c5ca0ca291f1e" +checksum = "cb7f578e5945fb242538965c2d0b04418d38ec25c79d160cd279bf0731c8d319" dependencies = [ "matchers", "nu-ansi-term", @@ -8989,6 +8633,27 @@ dependencies = [ "tracing-log", ] +[[package]] +name = "tracing-test" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19a4c448db514d4f24c5ddb9f73f2ee71bfb24c526cf0c570ba142d1119e0051" +dependencies = [ + "tracing-core", + "tracing-subscriber", + "tracing-test-macro", +] + +[[package]] +name = "tracing-test-macro" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad06847b7afb65c7866a36664b75c40b895e318cea4f71299f013fb22965329d" +dependencies = [ + "quote", + "syn 2.0.117", +] + [[package]] name = "try-lock" version = "0.2.5" @@ -9003,12 +8668,12 @@ checksum = "eadc29d668c91fcc564941132e17b28a7ceb2f3ebf0b9dae3e03fd7a6748eb0d" dependencies = [ "bytes", "data-encoding", - "http 1.4.0", + "http", "httparse", "log", - "rand 0.9.2", + "rand 0.9.4", "sha1", - "thiserror 2.0.17", + "thiserror 2.0.18", "utf-8", ] @@ -9018,7 +8683,7 @@ version = "2.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9ea3136b675547379c4bd395ca6b938e5ad3c3d20fad76e7fe85f9e0d011419c" dependencies = [ - "rand 0.9.2", + "rand 0.9.4", ] [[package]] @@ -9056,15 +8721,21 @@ checksum = "dbc4bc3a9f746d862c45cb89d705aa10f187bb96c76001afab07a0d35ce60142" [[package]] name = "unicode-ident" -version = "1.0.22" +version = "1.0.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" +checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" [[package]] name = "unicode-segmentation" -version = "1.12.0" +version = "1.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" +checksum = "9629274872b2bfaf8d66f5f15725007f635594914870f65218920345aa11aa8c" + +[[package]] +name = "unicode-width" +version = "0.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" [[package]] name = "unicode-width" @@ -9072,6 +8743,12 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254" +[[package]] +name = "unicode-xid" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" + [[package]] name = "unindent" version = "0.2.4" @@ -9090,6 +8767,12 @@ version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" +[[package]] +name = "unty" +version = "0.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d49784317cd0d1ee7ec5c716dd598ec5b4483ea832a2dced265471cc0f690ae" + [[package]] name = "url" version = "2.5.8" @@ -9102,12 +8785,6 @@ dependencies = [ "serde", ] -[[package]] -name = "urlencoding" -version = "2.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da" - [[package]] name = "utf-8" version = "0.7.6" @@ -9134,11 +8811,11 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" [[package]] name = "uuid" -version = "1.19.0" +version = "1.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2e054861b4bd027cd373e18e8d8d8e6548085000e41290d95ce0c373a654b4a" +checksum = "5ac8b6f42ead25368cf5b098aeb3dc8a1a2c05a3eee8a9a1a68c640edbfc79d9" dependencies = [ - "getrandom 0.3.4", + "getrandom 0.4.2", "js-sys", "serde_core", "wasm-bindgen", @@ -9157,10 +8834,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" [[package]] -name = "vsimd" -version = "0.8.0" +name = "virtue" +version = "0.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c3082ca00d5a5ef149bb8b555a72ae84c9c59f7250f013ac822ac2e49b19c64" +checksum = "051eb1abcf10076295e815102942cc58f9d5e3b4560e46e53c21e8ff6f3af7b1" [[package]] name = "wait-timeout" @@ -9198,18 +8875,27 @@ checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" [[package]] name = "wasip2" -version = "1.0.1+wasi-0.2.4" +version = "1.0.2+wasi-0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9517f9239f02c069db75e65f174b3da828fe5f5b945c4dd26bd25d89c03ebcf5" +dependencies = [ + "wit-bindgen", +] + +[[package]] +name = "wasip3" +version = "0.4.0+wasi-0.3.0-rc-2026-01-06" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0562428422c63773dad2c345a1882263bbf4d65cf3f42e90921f787ef5ad58e7" +checksum = "5428f8bf88ea5ddc08faddef2ac4a67e390b88186c703ce6dbd955e1c145aca5" dependencies = [ "wit-bindgen", ] [[package]] name = "wasm-bindgen" -version = "0.2.106" +version = "0.2.118" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d759f433fa64a2d763d1340820e46e111a7a5ab75f993d1852d70b03dbb80fd" +checksum = "0bf938a0bacb0469e83c1e148908bd7d5a6010354cf4fb73279b7447422e3a89" dependencies = [ "cfg-if", "once_cell", @@ -9220,22 +8906,19 @@ dependencies = [ [[package]] name = "wasm-bindgen-futures" -version = "0.4.56" +version = "0.4.68" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "836d9622d604feee9e5de25ac10e3ea5f2d65b41eac0d9ce72eb5deae707ce7c" +checksum = "f371d383f2fb139252e0bfac3b81b265689bf45b6874af544ffa4c975ac1ebf8" dependencies = [ - "cfg-if", "js-sys", - "once_cell", "wasm-bindgen", - "web-sys", ] [[package]] name = "wasm-bindgen-macro" -version = "0.2.106" +version = "0.2.118" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48cb0d2638f8baedbc542ed444afc0644a29166f1595371af4fecf8ce1e7eeb3" +checksum = "eeff24f84126c0ec2db7a449f0c2ec963c6a49efe0698c4242929da037ca28ed" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -9243,26 +8926,48 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.106" +version = "0.2.118" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cefb59d5cd5f92d9dcf80e4683949f15ca4b511f4ac0a6e14d4e1ac60c6ecd40" +checksum = "9d08065faf983b2b80a79fd87d8254c409281cf7de75fc4b773019824196c904" dependencies = [ "bumpalo", "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.106" +version = "0.2.118" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cbc538057e648b67f72a982e708d485b2efa771e1ac05fec311f9f63e5800db4" +checksum = "5fd04d9e306f1907bd13c6361b5c6bfc7b3b3c095ed3f8a9246390f8dbdee129" dependencies = [ "unicode-ident", ] +[[package]] +name = "wasm-encoder" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "990065f2fe63003fe337b932cfb5e3b80e0b4d0f5ff650e6985b1048f62c8319" +dependencies = [ + "leb128fmt", + "wasmparser", +] + +[[package]] +name = "wasm-metadata" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909" +dependencies = [ + "anyhow", + "indexmap 2.14.0", + "wasm-encoder", + "wasmparser", +] + [[package]] name = "wasm-streams" version = "0.4.2" @@ -9276,11 +8981,23 @@ dependencies = [ "web-sys", ] +[[package]] +name = "wasmparser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe" +dependencies = [ + "bitflags", + "hashbrown 0.15.5", + "indexmap 2.14.0", + "semver", +] + [[package]] name = "web-sys" -version = "0.3.83" +version = "0.3.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b32828d774c412041098d182a8b38b16ea816958e07cf40eec2bc080ae137ac" +checksum = "4f2dfbb17949fa2088e5d39408c48368947b86f7834484e87b73de55bc14d97d" dependencies = [ "js-sys", "wasm-bindgen", @@ -9302,14 +9019,14 @@ version = "0.26.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "521bc38abb08001b01866da9f51eb7c5d647a19260e00054a8c7fd5f9e57f7a9" dependencies = [ - "webpki-roots 1.0.5", + "webpki-roots 1.0.6", ] [[package]] name = "webpki-roots" -version = "1.0.5" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "12bed680863276c63889429bfd6cab3b99943659923822de1c8a39c49e4d722c" +checksum = "22cfaf3c063993ff62e73cb4311efde4db1efb31ab78a3e5c457939ad5cc0bed" dependencies = [ "rustls-pki-types", ] @@ -9372,7 +9089,7 @@ checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -9383,7 +9100,7 @@ checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -9577,24 +9294,115 @@ checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" [[package]] name = "winnow" -version = "0.7.14" +version = "0.7.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df79d97927682d2fd8adb29682d1140b343be4ac0f08fd68b7765d9c059d3945" +dependencies = [ + "memchr", +] + +[[package]] +name = "winnow" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a5364e9d77fcdeeaa6062ced926ee3381faa2ee02d3eb83a5c27a8825540829" +checksum = "09dac053f1cd375980747450bfc7250c264eaae0583872e845c0c7cd578872b5" dependencies = [ "memchr", ] [[package]] name = "wit-bindgen" -version = "0.46.0" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" +dependencies = [ + "wit-bindgen-rust-macro", +] + +[[package]] +name = "wit-bindgen-core" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea61de684c3ea68cb082b7a88508a8b27fcc8b797d738bfc99a82facf1d752dc" +dependencies = [ + "anyhow", + "heck 0.5.0", + "wit-parser", +] + +[[package]] +name = "wit-bindgen-rust" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21" +dependencies = [ + "anyhow", + "heck 0.5.0", + "indexmap 2.14.0", + "prettyplease", + "syn 2.0.117", + "wasm-metadata", + "wit-bindgen-core", + "wit-component", +] + +[[package]] +name = "wit-bindgen-rust-macro" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c0f9bfd77e6a48eccf51359e3ae77140a7f50b1e2ebfe62422d8afdaffab17a" +dependencies = [ + "anyhow", + "prettyplease", + "proc-macro2", + "quote", + "syn 2.0.117", + "wit-bindgen-core", + "wit-bindgen-rust", +] + +[[package]] +name = "wit-component" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2" +dependencies = [ + "anyhow", + "bitflags", + "indexmap 2.14.0", + "log", + "serde", + "serde_derive", + "serde_json", + "wasm-encoder", + "wasm-metadata", + "wasmparser", + "wit-parser", +] + +[[package]] +name = "wit-parser" +version = "0.244.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" +checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736" +dependencies = [ + "anyhow", + "id-arena", + "indexmap 2.14.0", + "log", + "semver", + "serde", + "serde_derive", + "serde_json", + "unicode-xid", + "wasmparser", +] [[package]] name = "writeable" -version = "0.6.2" +version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9" +checksum = "1ffae5123b2d3fc086436f8834ae3ab053a283cfac8fe0a0b8eaae044768a4c4" [[package]] name = "wyz" @@ -9605,12 +9413,6 @@ dependencies = [ "tap", ] -[[package]] -name = "xmlparser" -version = "0.13.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "66fee0b777b0f5ac1c69bb06d361268faafa61cd4682ae064a171c16c433e9e4" - [[package]] name = "xxhash-rust" version = "0.8.15" @@ -9645,9 +9447,9 @@ checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049" [[package]] name = "yoke" -version = "0.8.1" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72d6e5c6afb84d73944e5cedb052c4680d5657337201555f9f2a16b7406d4954" +checksum = "abe8c5fda708d9ca3df187cae8bfb9ceda00dd96231bed36e445a1a48e66f9ca" dependencies = [ "stable_deref_trait", "yoke-derive", @@ -9656,54 +9458,54 @@ dependencies = [ [[package]] name = "yoke-derive" -version = "0.8.1" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d" +checksum = "de844c262c8848816172cef550288e7dc6c7b7814b4ee56b3e1553f275f1858e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", "synstructure", ] [[package]] name = "zerocopy" -version = "0.8.33" +version = "0.8.48" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "668f5168d10b9ee831de31933dc111a459c97ec93225beb307aed970d1372dfd" +checksum = "eed437bf9d6692032087e337407a86f04cd8d6a16a37199ed57949d415bd68e9" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.33" +version = "0.8.48" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c7962b26b0a8685668b671ee4b54d007a67d4eaf05fda79ac0ecf41e32270f1" +checksum = "70e3cd084b1788766f53af483dd21f93881ff30d7320490ec3ef7526d203bad4" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] name = "zerofrom" -version = "0.1.6" +version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5" +checksum = "69faa1f2a1ea75661980b013019ed6687ed0e83d069bc1114e2cc74c6c04c4df" dependencies = [ "zerofrom-derive", ] [[package]] name = "zerofrom-derive" -version = "0.1.6" +version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" +checksum = "11532158c46691caf0f2593ea8358fed6bbf68a0315e80aae9bd41fbade684a1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", "synstructure", ] @@ -9724,14 +9526,14 @@ checksum = "85a5b4158499876c763cb03bc4e49185d3cccbabb15b33c627f7884f43db852e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] name = "zerotrie" -version = "0.2.3" +version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a59c17a5562d507e4b54960e8569ebee33bee890c70aa3fe7b97e85a9fd7851" +checksum = "0f9152d31db0792fa83f70fb2f83148effb5c1f5b8c7686c3459e361d9bc20bf" dependencies = [ "displaydoc", "yoke", @@ -9740,9 +9542,9 @@ dependencies = [ [[package]] name = "zerovec" -version = "0.11.5" +version = "0.11.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c28719294829477f525be0186d13efa9a3c602f7ec202ca9e353d310fb9a002" +checksum = "90f911cbc359ab6af17377d242225f4d75119aec87ea711a880987b18cd7b239" dependencies = [ "yoke", "zerofrom", @@ -9751,13 +9553,13 @@ dependencies = [ [[package]] name = "zerovec-derive" -version = "0.11.2" +version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3" +checksum = "625dc425cab0dca6dc3c3319506e6593dcb08a9f387ea3b284dbd52a92c40555" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -9777,12 +9579,12 @@ dependencies = [ "flate2", "getrandom 0.3.4", "hmac", - "indexmap 2.13.0", + "indexmap 2.14.0", "lzma-rs", "memchr", "pbkdf2", "sha1", - "thiserror 2.0.17", + "thiserror 2.0.18", "time", "xz2", "zeroize", @@ -9792,15 +9594,15 @@ dependencies = [ [[package]] name = "zlib-rs" -version = "0.5.5" +version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40990edd51aae2c2b6907af74ffb635029d5788228222c4bb811e9351c0caad3" +checksum = "3be3d40e40a133f9c916ee3f9f4fa2d9d63435b5fbe1bfc6d9dae0aa0ada1513" [[package]] name = "zmij" -version = "1.0.14" +version = "1.0.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd8f3f50b848df28f887acb68e41201b5aea6bc8a8dacc00fb40635ff9a72fea" +checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" [[package]] name = "zopfli" diff --git a/Cargo.toml b/Cargo.toml index 6ae4198d63..eb7e7e59a9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,18 +1,25 @@ [workspace] members = [ "raphtory", - "raphtory-cypher", "raphtory-benchmark", "examples/rust", "examples/netflow", "examples/custom-gql-apis", "python", "raphtory-graphql", + "raphtory-auth-noop", + "raphtory-server", "raphtory-api", "raphtory-core", "raphtory-storage", + "raphtory-api-macros", + "raphtory-itertools", + "clam-core", + "clam-core/snb", + "raphtory-itertools" ] default-members = ["raphtory"] +exclude = ["optd"] resolver = "2" [workspace.package] @@ -30,7 +37,8 @@ edition = "2021" # debug symbols are using a lot of resources [profile.dev] split-debuginfo = "unpacked" -debug = true +debug = 1 +opt-level = 1 [profile.release-with-debug] inherits = "release" @@ -46,138 +54,158 @@ debug = true inherits = "dev" debug = false incremental = false +opt-level = 1 # much smaller files [workspace.dependencies] -#[public-storage] -pometry-storage = { version = ">=0.8.1", path = "pometry-storage" } -#[private-storage] -# pometry-storage = { path = "pometry-storage-private", package = "pometry-storage-private" } -raphtory = { path = "raphtory", version = "0.18.0" } -raphtory-api = { path = "raphtory-api", version = "0.18.0" } -raphtory-core = { path = "raphtory-core", version = "0.18.0" } -raphtory-storage = { path = "raphtory-storage", version = "0.18.0" } -raphtory-graphql = { path = "raphtory-graphql", version = "0.18.0" } -async-graphql = { version = "7.0.16", features = ["dynamic-schema"] } -bincode = "1.3.3" -async-graphql-poem = "7.0.16" -dynamic-graphql = "0.10.1" +db4-graph = { version = "0.18.0", path = "db4-graph", default-features = false } +db4-storage = { version = "0.18.0", path = "db4-storage" } +raphtory = { version = "0.18.0", path = "raphtory", default-features = false } +raphtory-api = { version = "0.18.0", path = "raphtory-api", default-features = false } +raphtory-api-macros = { version = "0.18.0", path = "raphtory-api-macros", default-features = false } +raphtory-core = { version = "0.18.0", path = "raphtory-core", default-features = false } +raphtory-graphql = { version = "0.18.0", path = "raphtory-graphql", default-features = false } +raphtory-storage = { version = "0.18.0", path = "raphtory-storage", default-features = false } +raphtory-itertools = { version = "0.18.0", path = "raphtory-itertools" } +clam-core = { version = "0.18.0", path = "clam-core" } +optd-core = { version = "0.18.0", path = "optd/optd/core" } +async-graphql = { version = "7.2.1", features = ["dynamic-schema"] } +bincode = { version = "2", features = ["serde"] } +async-graphql-poem = "7.2.1" +dynamic-graphql = { git = "https://github.com/miratepuffin/dynamic-graphql", branch = "add-arg-descriptions" } +derive_more = "2.1.1" +tikv-jemallocator = "0.6.1" reqwest = { version = "0.12.28", default-features = false, features = [ "rustls-tls", "multipart", "json", ] } -iter-enum = { version = "1.2.0", features = ["rayon"] } -serde = { version = "1.0.197", features = ["derive", "rc"] } -serde_json = "1.0.114" -pyo3 = { version = "0.25.1", features = ["multiple-pymethods", "chrono"] } -pyo3-build-config = "0.25.1" -pyo3-arrow = "0.11.0" -numpy = "0.25.0" +boxcar = "0.2.14" +iter-enum = { version = "1.2.1", features = ["rayon"] } +serde = { version = "1.0.228", features = ["derive", "rc"] } +serde_json = { version = "1.0.149", features = ["float_roundtrip"] } +pyo3 = { version = "0.27.2", features = ["multiple-pymethods", "chrono"] } +pyo3-build-config = "0.27.2" +pyo3-arrow = "0.15.0" +numpy = "0.27.1" itertools = "0.13.0" -rand = "0.8.5" -rayon = "1.8.1" -roaring = "0.10.6" +rand = "0.9.2" +rayon = "1.11.0" +roaring = "0.10.12" sorted_vector_map = "0.2.0" -tokio = { version = "1.43.1", features = ["full"] } -once_cell = "1.19.0" -parking_lot = { version = "0.12.1", features = [ +tokio = { version = "1.50.0", features = ["full"] } +once_cell = "1.21.4" +parking_lot = { version = "0.12.5", features = [ "serde", "arc_lock", "send_guard", ] } -ordered-float = "4.2.0" -chrono = { version = "0.4.41", features = ["serde"] } -tempfile = "3.10.0" -futures-util = "0.3.30" -thiserror = "2.0.0" +ordered-float = "4.6.0" +chrono = { version = "0.4.44", features = ["serde"] } +chrono-tz = "0.10.4" +tempfile = "3.27.0" +futures-util = "0.3.32" +thiserror = "2.0.18" dotenv = "0.15.0" -csv = "1.3.0" -flate2 = "1.0.28" -regex = "1.10.3" -num-traits = "0.2.18" +csv = "1.4.0" +flate2 = "1.1.9" +regex = "1.12.3" +num-traits = "0.2.19" num-integer = "0.1" -rand_distr = "0.4.3" -rustc-hash = "2.0.0" -twox-hash = "2.1.0" -lock_api = { version = "0.4.11", features = ["arc_lock", "serde"] } -dashmap = { version = "6.0.1", features = ["serde", "rayon"] } -glam = "0.29.0" -quad-rand = "0.2.1" -zip = "2.3.0" +rand_distr = "0.5.1" +rustc-hash = "2.1.1" +twox-hash = "2.1.2" +tinyvec = { version = "1.11", features = ["serde", "alloc"] } +lock_api = { version = "0.4.14", features = ["arc_lock", "serde"] } +dashmap = { version = "6.1.0", features = ["serde", "rayon"] } +glam = "0.29.3" +quad-rand = "0.2.3" +zip = "2.4.2" neo4rs = "0.8.0" bzip2 = "0.4.4" tantivy = "0.22.1" -async-trait = "0.1.77" +async-trait = "0.1.89" async-openai = "0.28.3" -num = "0.4.1" -display-error-chain = "0.2.0" -bigdecimal = { version = "0.4.7", features = ["serde"] } -kdam = "0.6.3" -hashbrown = "0.15.1" -pretty_assertions = "1.4.0" -quickcheck = "1.0.3" -quickcheck_macros = "1.0.0" +num = "0.4.3" +display-error-chain = "0.2.2" +bigdecimal = { version = "0.4.10", features = ["serde"] } +kdam = "0.6.4" +hashbrown = { version = "0.14.5", features = ["raw"] } +pretty_assertions = "1.4.1" streaming-stats = "0.2.3" -proptest = "1.4.0" -proptest-derive = "0.5.1" +proptest = "1.11.0" +proptest-derive = "0.6.0" criterion = "0.5.1" crossbeam-channel = "0.5.15" base64 = "0.22.1" jsonwebtoken = { version = "10.3.0", features = ["aws_lc_rs"] } spki = "0.7.3" -poem = { version = "3.0.1", features = ["compression", "embed", "static-files"] } -rust-embed = { version = "8.7.2", features = ["interpolate-folder-path"] } +poem = { version = "3.1.12", features = ["compression", "embed", "static-files"] } +rust-embed = { version = "8.11.0", features = ["interpolate-folder-path"] } opentelemetry = "0.27.1" opentelemetry_sdk = { version = "0.27.1", features = ["rt-tokio"] } opentelemetry-otlp = { version = "0.27.0" } -tracing = "0.1.37" +tracing = { version = "0.1.44", features = ["log"] } tracing-opentelemetry = "0.28.0" -tracing-subscriber = { version = "0.3.20", features = ["std", "env-filter"] } -indoc = "2.0.5" +tracing-subscriber = { version = "0.3.23", features = ["std", "env-filter"] } +indoc = "2.0.7" walkdir = "2" -config = "0.14.0" +config = "0.14.1" either = "=1.15.0" -clap = { version = "4.5.21", features = ["derive", "env"] } -memmap2 = { version = "0.9.4" } -ahash = { version = "0.8.3", features = ["serde"] } -bytemuck = { version = "1.18.0", features = ["derive"] } -ouroboros = "0.18.3" -url = "2.2" +clap = { version = "4.6.0", features = ["derive", "env"] } +memmap2 = { version = "0.9.10" } +ahash = { version = "0.8.12", features = ["serde"] } +bytemuck = { version = "1.25.0", features = ["derive"] } +ouroboros = "0.18.5" +url = "2.5" base64-compat = { package = "base64-compat", version = "1.0.0" } -prost = "0.13.1" -prost-types = "0.13.1" -prost-build = "0.13.1" -lazy_static = "1.4.0" -pest = "2.7.8" -pest_derive = "2.7.8" -minijinja = "2.2.0" -minijinja-contrib = { version = "2.2.0", features = ["datetime"] } -datafusion = { version = "50.0.0" } -lancedb = { version = "0.22.2", features = [] } # this is the latest and asks for chrono 0.4.41 +prost = "0.13.5" +prost-types = "0.13.5" +prost-build = "0.13.5" +lazy_static = "1.5.0" +pest = "2.8.6" +pest_derive = "2.8.6" +minijinja = "2.18.0" +minijinja-contrib = { version = "2.18.0", features = ["datetime"] } + +lancedb = { version = "0.27.2", features = [] } heed = "0.22.0" -sqlparser = "0.58.0" +sqlparser = "0.59.0" futures = "0.3" -arrow = { version = "56.2.0" } -parquet = { version = "56.2.0" } -arrow-json = { version = "56.2.0" } -arrow-buffer = { version = "56.2.0" } -arrow-schema = { version = "56.2.0" } -arrow-csv = { version = "56.2.0" } -arrow-array = { version = "56.2.0", features = ["chrono-tz"] } -arrow-cast = { version = "56.2.0" } -arrow-ipc = { version = "56.2.0" } -arrow-data = { version = "56.2.0" } -arrow-select = { version = "56.2.0" } -serde_arrow = { version = "0.13.0" } -moka = { version = "0.12.7", features = ["future"] } -indexmap = { version = "2.7.0", features = ["rayon"] } +arrow = { version = "57.3.0" } +parquet = { version = "57.3.0" } +arrow-json = { version = "57.3.0" } +arrow-buffer = { version = "57.3.0" } +arrow-schema = { version = "57.3.0" } +arrow-csv = { version = "57.3.0" } +arrow-array = { version = "57.3.0", features = ["chrono-tz"] } +arrow-cast = { version = "57.3.0" } +arrow-ipc = { version = "57.3.0" } +arrow-data = { version = "57.3.0" } +arrow-select = { version = "57.3.0" } +serde_arrow = { version = "0.13.7", features = ["arrow-57"] } +moka = { version = "0.12.15", features = ["future"] } +indexmap = { version = "2.13.0", features = ["rayon"] } fake = { version = "3.1.0", features = ["chrono"] } strsim = { version = "0.11.1" } -uuid = { version = "1.16.0", features = ["v4"] } +uuid = { version = "1.22.0", features = ["v4"] } +bitvec = "1.0.1" +sysinfo = "0.37.2" strum = "0.27.2" strum_macros = "0.27.2" -datafusion-physical-expr-common = "50.3.0" -datafusion-physical-expr = "50.3.0" -datafusion-physical-plan = "50.3.0" -datafusion-expr-common = "50.3.0" +pythonize = { version = "0.27.0" } +test-log = "0.2.19" +disjoint-sets = "0.4.2" +datafusion = { version = "52.5.0" } +datafusion-physical-expr-common = "52.5.0" +datafusion-physical-expr = "52.5.0" +datafusion-physical-plan = "52.5.0" +datafusion-expr-common = "52.5.0" + +[workspace.dependencies.storage] +package = "db4-storage" +path = "db4-storage" + +[workspace.dependencies.auth] +package = "raphtory-auth-noop" +path = "raphtory-auth-noop" diff --git a/Dockerfile b/Dockerfile index 21dd104bc4..736edc4261 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -ARG RUST_VERSION=1.86.0 +ARG RUST_VERSION=1.89.0 ARG RAPHTORY_PROFILE="release" FROM rust:${RUST_VERSION} AS chef diff --git a/Makefile b/Makefile index f1170a349c..53b85fa83e 100644 --- a/Makefile +++ b/Makefile @@ -12,17 +12,13 @@ build-all: rust-build test-all: rust-test-all python-test -test-all-public: rust-test-all-public python-test-public - # Tidying tidy: rust-fmt build-python stubs python-fmt -tidy-public: rust-fmt build-python-public stubs python-fmt - python-tidy: stubs python-fmt test-graphql-schema -check-pr: tidy-public test-all +check-pr: tidy test-all gen-graphql-schema: raphtory schema > raphtory-graphql/schema.graphql @@ -31,7 +27,6 @@ test-graphql-schema: install-node-tools npx graphql-schema-linter --rules fields-have-descriptions,types-have-descriptions raphtory-graphql/schema.graphql # Utilities - activate-storage: ./scripts/activate_private_storage.py @@ -41,8 +36,10 @@ deactivate-storage: pull-storage: activate-storage git submodule update --init --recursive pometry-storage-private -pull-ui-tests: - git submodule update --init --recursive ui-tests +update-ui-tests: + rm -rf ui-tests + git clone git@github.com:Pometry/ui-tests.git ui-tests + rm -rf ui-tests/.git install-node-tools: @if command -v npx >/dev/null 2>&1; then \ @@ -71,13 +68,12 @@ run-graphql: rust-test: cargo test -q -rust-test-all: activate-storage - cargo nextest run --all --features=storage +rust-check: cargo hack check --workspace --all-targets --each-feature --skip extension-module,default -rust-test-all-public: +rust-test-all: rust-check cargo nextest run --all - cargo hack check --workspace --all-targets --each-feature --skip extension-module,default,storage + ########## # Python # @@ -92,32 +88,23 @@ clean: install-python: build-wheel pip install target/wheels/*.whl -build-python-public: deactivate-storage +build-python: cd python && maturin develop -r --extras=dev -build-python: activate-storage - cd python && maturin develop -r --features=storage,extension-module --extras=dev +debug-python: + cd python && maturin develop --profile=dev --extras=dev # Testing - -python-test: activate-storage - cd python && tox run && tox run -e storage - -python-test-public: +python-test: cd python && tox run python-fmt: cd python && black . -debug-python-public: deactivate-storage - cd python && maturin develop --profile=dev build-python-rtd: cd python && maturin build --profile=build-fast && pip install ../target/wheels/*.whl -debug-python: activate-storage - cd python && maturin develop --features=storage,extension-module --extras=dev - ######## # Docs # ######## diff --git a/clam-core b/clam-core new file mode 160000 index 0000000000..9f86d6e4a0 --- /dev/null +++ b/clam-core @@ -0,0 +1 @@ +Subproject commit 9f86d6e4a0a74b41bf71b316e34f614fb33609ce diff --git a/pometry-storage/Cargo.toml b/db4-graph/Cargo.toml similarity index 65% rename from pometry-storage/Cargo.toml rename to db4-graph/Cargo.toml index 1d7ae0a0ba..544912f0de 100644 --- a/pometry-storage/Cargo.toml +++ b/db4-graph/Cargo.toml @@ -1,16 +1,18 @@ [package] -name = "pometry-storage" -description = "Storage backend for Raphtory" -edition.workspace = true -rust-version.workspace = true +name = "db4-graph" version.workspace = true -keywords.workspace = true -authors.workspace = true documentation.workspace = true repository.workspace = true license.workspace = true readme.workspace = true homepage.workspace = true +keywords.workspace = true +authors.workspace = true +rust-version.workspace = true +edition.workspace = true -[features] -storage = [] +[dependencies] +storage.workspace = true +raphtory-api.workspace = true +raphtory-core.workspace = true +rayon.workspace = true diff --git a/db4-graph/src/lib.rs b/db4-graph/src/lib.rs new file mode 100644 index 0000000000..4502f01feb --- /dev/null +++ b/db4-graph/src/lib.rs @@ -0,0 +1,465 @@ +use raphtory_api::core::{ + entities::{ + self, + properties::meta::{Meta, STATIC_GRAPH_LAYER_ID}, + GidType, + }, + input::input_node::InputNode, + storage::timeindex::TimeIndexOps, +}; +use raphtory_core::{ + entities::{graph::tgraph::InvalidLayer, nodes::node_ref::NodeRef, GidRef, LayerIds, EID, VID}, + storage::timeindex::EventTime, +}; +use rayon::prelude::*; +use std::{ + ops::Deref, + path::Path, + sync::{atomic::AtomicUsize, Arc}, +}; +use storage::{ + api::{ + edges::EdgeSegmentOps, + graph_props::GraphPropSegmentOps, + nodes::{LockedNSSegment, NodeRefOps, NodeSegmentOps}, + }, + dir::GraphDir, + error::StorageError, + pages::{ + layer_counter::GraphStats, + locked::{ + edges::WriteLockedEdgePages, graph_props::WriteLockedGraphPropPages, + nodes::WriteLockedNodePages, + }, + }, + persist::strategy::PersistenceStrategy, + resolver::GIDResolverOps, + transaction::TransactionManager, + Config, Extension, GIDResolver, Layer, LocalPOS, ReadLockedLayer, ES, GS, NS, +}; + +mod replay; + +#[derive(Debug)] +pub struct TemporalGraph +where + EXT: PersistenceStrategy, ES = ES, GS = GS>, + NS: NodeSegmentOps, + ES: EdgeSegmentOps, + GS: GraphPropSegmentOps, +{ + // mapping between logical and physical ids + pub logical_to_physical: Arc, + pub round_robin_counter: AtomicUsize, + storage: Arc>, + graph_dir: Option, + pub transaction_manager: Arc, +} + +impl Default for TemporalGraph { + fn default() -> Self { + let config = Config::default(); + let graph_dir = None; + Self::new(Extension::new(config, graph_dir).unwrap()).unwrap() + } +} + +impl TemporalGraph +where + EXT: PersistenceStrategy, ES = ES, GS = GS>, + NS: NodeSegmentOps, + ES: EdgeSegmentOps, + GS: GraphPropSegmentOps, +{ + pub fn new(ext: EXT) -> Result { + let node_meta = Meta::new_for_nodes(); + let edge_meta = Meta::new_for_edges(); + let graph_props_meta = Meta::new_for_graph_props(); + + Self::new_with_meta(None, node_meta, edge_meta, graph_props_meta, ext) + } + + pub fn new_at_path_with_ext(path: impl AsRef, ext: EXT) -> Result { + let node_meta = Meta::new_for_nodes(); + let edge_meta = Meta::new_for_edges(); + let graph_props_meta = Meta::new_for_graph_props(); + + Self::new_with_meta( + Some(path.as_ref().into()), + node_meta, + edge_meta, + graph_props_meta, + ext, + ) + } + + pub fn new_with_meta( + graph_dir: Option, + node_meta: Meta, + edge_meta: Meta, + graph_meta: Meta, + ext: EXT, + ) -> Result { + let mut graph_dir = graph_dir; + + // Short-circuit graph_dir to None if disk storage is not enabled + if !Extension::disk_storage_enabled() { + graph_dir = None; + } + + if let Some(dir) = graph_dir.as_ref() { + std::fs::create_dir_all(dir)? + } + + let id_type = node_meta + .metadata_mapper() + .d_types() + .first() + .and_then(|dtype| GidType::from_prop_type(dtype)); + + let gid_resolver_dir = graph_dir.as_ref().map(|dir| dir.gid_resolver_dir()); + let logical_to_physical = match gid_resolver_dir { + Some(gid_resolver_dir) => GIDResolver::new_with_path(gid_resolver_dir, id_type)?, + None => GIDResolver::new()?, + } + .into(); + + let storage: Layer = Layer::new_with_meta( + graph_dir.as_ref().map(|p| p.path()), + node_meta, + edge_meta, + graph_meta, + ext, + ); + + Ok(Self { + graph_dir, + logical_to_physical, + storage: Arc::new(storage), + transaction_manager: Arc::new(TransactionManager::new()), + round_robin_counter: AtomicUsize::new(0), + }) + } + + pub fn load(path: impl AsRef, ext: EXT) -> Result { + let path = path.as_ref(); + let storage = Layer::load(path, ext)?; + let id_type = storage.nodes().id_type(); + + let gid_resolver_dir = path.join("gid_resolver"); + let resolver = GIDResolver::new_with_path(&gid_resolver_dir, id_type)?; + + Ok(Self { + graph_dir: Some(path.into()), + round_robin_counter: AtomicUsize::new(0), + logical_to_physical: resolver.into(), + storage: Arc::new(storage), + transaction_manager: Arc::new(TransactionManager::new()), + }) + } + + pub fn flush(&self) -> Result<(), StorageError> { + self.storage.flush()?; + self.logical_to_physical.flush() + } + + pub fn disk_storage_path(&self) -> Option<&Path> { + self.graph_dir() + .filter(|_| Extension::disk_storage_enabled()) + } + + pub fn extension(&self) -> &EXT { + self.storage().extension() + } + + pub fn read_event_counter(&self) -> usize { + self.storage().read_event_id() + } + + pub fn storage(&self) -> &Arc> { + &self.storage + } + + pub fn num_layers(&self) -> usize { + self.storage.nodes().num_layers() - 1 + } + + #[inline] + pub fn resolve_node_ref(&self, node: NodeRef) -> Option { + let vid = match node { + NodeRef::Internal(vid) => Some(vid), + NodeRef::External(GidRef::U64(gid)) => self.logical_to_physical.get_u64(gid), + NodeRef::External(GidRef::Str(string)) => self + .logical_to_physical + .get_str(string) + .or_else(|| self.logical_to_physical.get_u64(string.id())), + }?; + + // VIDs in the resolver may not be initialised yet, need to double-check the node actually exists! + let nodes = self.storage().nodes(); + let (page_id, pos) = nodes.resolve_pos(vid); + let node_page = nodes.get_segment(page_id)?; + + if pos.0 < node_page.num_nodes() { + Some(vid) + } else { + None + } + } + + #[inline] + pub fn internal_num_nodes(&self, layer_ids: &LayerIds) -> usize { + match layer_ids { + LayerIds::None => self + .storage + .nodes() + .segments_par_iter() + .map(|segment| { + let locked = segment.locked(); + locked + .iter_entries() + .filter(|entry| !entry.node_additions(STATIC_GRAPH_LAYER_ID).is_empty()) + .count() + }) + .sum(), + LayerIds::All => self.storage.nodes().num_nodes(), + LayerIds::One(id) => self + .storage + .nodes() + .segments_par_iter() + .map(|segment| { + let locked = segment.locked(); + locked + .iter_entries() + .filter(|entry| { + !entry.node_additions(STATIC_GRAPH_LAYER_ID).is_empty() + || entry.has_layer_inner(*id) + }) + .count() + }) + .sum(), + LayerIds::Multiple(ids) => { + // no fast path, need to count + self.storage + .nodes() + .segments_par_iter() + .map(|segment| { + let locked = segment.locked(); + locked + .iter_entries() + .filter(|entry| { + !entry.node_additions(STATIC_GRAPH_LAYER_ID).is_empty() + || ids.iter().any(|layer| entry.has_layer_inner(layer)) + }) + .count() + }) + .sum() + } + } + } + + #[inline] + pub fn internal_num_edges(&self, layer_ids: &LayerIds) -> usize { + match layer_ids { + LayerIds::None => 0, + LayerIds::All => self.storage.edges().num_edges_layer(STATIC_GRAPH_LAYER_ID), + LayerIds::One(id) => self.storage.edges().num_edges_layer(*id), + LayerIds::Multiple(ids) => { + // no fast path, need to count + self.storage + .edges() + .par_iter_segments() + .map(|segment| { + let head = segment.head(); + (0..segment.num_edges()) + .map(LocalPOS) + .filter(|pos| { + ids.iter() + .any(|layer| segment.has_edge(*pos, layer, head.deref())) + }) + .count() + }) + .sum() + } + } + } + + pub fn read_locked(self: &Arc) -> ReadLockedLayer { + self.storage.read_locked() + } + + pub fn edge_meta(&self) -> &Meta { + self.storage().edge_meta() + } + + pub fn node_meta(&self) -> &Meta { + self.storage().node_meta() + } + + pub fn graph_props_meta(&self) -> &Meta { + self.storage.graph_props_meta() + } + + pub fn graph_dir(&self) -> Option<&Path> { + self.graph_dir.as_ref().map(|p| p.path()) + } + + #[inline] + pub fn graph_earliest_time(&self) -> Option { + Some(self.storage().earliest()).filter(|t| *t != i64::MAX) + } + + #[inline] + pub fn graph_latest_time(&self) -> Option { + Some(self.storage().latest()).filter(|t| *t != i64::MIN) + } + + pub fn layer_ids(&self, key: entities::Layer) -> Result { + match key { + entities::Layer::None => Ok(LayerIds::None), + entities::Layer::All => Ok(LayerIds::All), + entities::Layer::Default => match self.edge_meta().get_default_layer_id() { + None => Ok(LayerIds::None), + Some(id) => Ok(LayerIds::One(id)), + }, + entities::Layer::One(id) => match self.edge_meta().get_layer_id(&id) { + Some(id) => Ok(LayerIds::One(id)), + None => Err(InvalidLayer::new( + id, + Self::get_valid_layers(self.edge_meta()), + )), + }, + entities::Layer::Multiple(ids) => { + let mut new_layers = ids + .iter() + .map(|id| { + self.edge_meta().get_layer_id(id).ok_or_else(|| { + InvalidLayer::new(id.clone(), Self::get_valid_layers(self.edge_meta())) + }) + }) + .collect::, InvalidLayer>>()?; + let num_layers = self.num_layers(); + let num_new_layers = new_layers.len(); + if num_new_layers == 0 { + Ok(LayerIds::None) + } else if num_new_layers == 1 { + Ok(LayerIds::One(new_layers[0])) + } else if num_new_layers == num_layers { + Ok(LayerIds::All) + } else { + new_layers.sort_unstable(); + new_layers.dedup(); + Ok(LayerIds::Multiple(new_layers.into())) + } + } + } + } + + fn get_valid_layers(edge_meta: &Meta) -> Vec { + edge_meta + .layer_meta() + .keys() + .iter() + .map(|x| x.to_string()) + .collect::>() + } + + pub fn valid_layer_ids(&self, key: entities::Layer) -> LayerIds { + match key { + entities::Layer::None => LayerIds::None, + entities::Layer::All => LayerIds::All, + entities::Layer::Default => match self.edge_meta().get_default_layer_id() { + None => LayerIds::None, + Some(id) => LayerIds::One(id), + }, + entities::Layer::One(id) => match self.edge_meta().get_layer_id(&id) { + Some(id) => LayerIds::One(id), + None => LayerIds::None, + }, + entities::Layer::Multiple(ids) => { + let mut new_layers = ids + .iter() + .flat_map(|id| self.edge_meta().get_layer_id(id)) + .collect::>(); + let num_layers = self.num_layers(); + let num_new_layers = new_layers.len(); + if num_new_layers == 0 { + LayerIds::None + } else if num_new_layers == 1 { + LayerIds::One(new_layers[0]) + } else if num_new_layers == num_layers { + LayerIds::All + } else { + new_layers.sort_unstable(); + new_layers.dedup(); + LayerIds::Multiple(new_layers.into()) + } + } + } + } + + pub fn write_locked_graph<'a>(&'a self) -> WriteLockedGraph<'a, EXT> { + WriteLockedGraph::new(self) + } + + pub fn update_time(&self, earliest: EventTime) { + // self.storage.update_time(earliest); + } +} + +/// Holds write locks across all segments in the graph for fast bulk ingestion. +pub struct WriteLockedGraph<'a, EXT> +where + EXT: PersistenceStrategy, ES = ES, GS = GS>, + NS: NodeSegmentOps, + ES: EdgeSegmentOps, + GS: GraphPropSegmentOps, +{ + pub nodes: WriteLockedNodePages<'a, storage::NS>, + pub edges: WriteLockedEdgePages<'a, storage::ES>, + pub graph_props: WriteLockedGraphPropPages<'a, storage::GS>, + pub graph: &'a TemporalGraph, +} + +impl<'a, EXT> WriteLockedGraph<'a, EXT> +where + EXT: PersistenceStrategy, ES = ES, GS = GS>, + NS: NodeSegmentOps, + ES: EdgeSegmentOps, + GS: GraphPropSegmentOps, +{ + pub fn new(graph: &'a TemporalGraph) -> Self { + WriteLockedGraph { + nodes: graph.storage.nodes().write_locked(), + edges: graph.storage.edges().write_locked(), + graph_props: graph.storage.graph_props().write_locked(), + graph, + } + } + + pub fn graph(&self) -> &TemporalGraph { + self.graph + } + + pub fn resize_segments_to_vid(&mut self, vid: VID) { + let (segment_id, _) = self.graph.storage.nodes().resolve_pos(vid); + self.graph.storage().nodes().grow(segment_id + 1); + std::mem::take(&mut self.nodes); + self.nodes = self.graph.storage.nodes().write_locked(); + } + + pub fn resize_segments_to_eid(&mut self, eid: EID) { + let (segment_id, _) = self.graph.storage.edges().resolve_pos(eid); + self.graph.storage().edges().grow(segment_id + 1); + std::mem::take(&mut self.edges); + self.edges = self.graph.storage.edges().write_locked(); + } + + pub fn edge_stats(&self) -> &Arc { + self.graph.storage().edges().stats() + } + + pub fn node_stats(&self) -> &Arc { + self.graph.storage().nodes().stats() + } +} diff --git a/db4-graph/src/replay.rs b/db4-graph/src/replay.rs new file mode 100644 index 0000000000..fffe0dca6a --- /dev/null +++ b/db4-graph/src/replay.rs @@ -0,0 +1,678 @@ +//! Implements WAL replay for a `WriteLockedGraph`. +//! Allows for fast replay by making use of one-time lock acquisition for +//! all the segments in the graph. + +use crate::WriteLockedGraph; +use raphtory_api::core::{ + entities::{ + properties::{ + meta::{Meta, STATIC_GRAPH_LAYER_ID}, + prop::Prop, + }, + LayerId, EID, GID, VID, + }, + storage::timeindex::EventTime, +}; +use storage::{ + api::{edges::EdgeSegmentOps, graph_props::GraphPropSegmentOps, nodes::NodeSegmentOps}, + error::StorageError, + persist::strategy::PersistenceStrategy, + resolver::GIDResolverOps, + wal::{GraphReplay, TransactionID, LSN}, + ES, GS, NS, +}; + +impl GraphReplay for WriteLockedGraph<'_, EXT> +where + EXT: PersistenceStrategy, ES = ES, GS = GS>, + NS: NodeSegmentOps, + ES: EdgeSegmentOps, + GS: GraphPropSegmentOps, +{ + fn replay_add_edge( + &mut self, + lsn: LSN, + _transaction_id: TransactionID, + t: EventTime, + src_name: Option, + src_id: VID, + dst_name: Option, + dst_id: VID, + eid: EID, + layer_name: Option, + layer_id: LayerId, + props: Vec<(String, usize, Prop)>, + ) -> Result<(), StorageError> { + // Insert node ids into resolver. + if let Some(src_name) = src_name.as_ref() { + self.graph() + .logical_to_physical + .set(src_name.as_ref(), src_id)?; + } + + if let Some(dst_name) = dst_name.as_ref() { + self.graph() + .logical_to_physical + .set(dst_name.as_ref(), dst_id)?; + } + + // Insert layer id into the layer meta of both edge and node. + self.graph() + .edge_meta() + .layer_meta() + .set_id(layer_name.as_deref().unwrap_or("_default"), layer_id.0); + self.graph() + .node_meta() + .layer_meta() + .set_id(layer_name.as_deref().unwrap_or("_default"), layer_id.0); + + // Grab src writer and add edge data. + let (src_segment_id, src_pos) = self.graph().storage().nodes().resolve_pos(src_id); + self.resize_segments_to_vid(src_id); // Create enough segments. + + let segment = self + .graph() + .storage() + .nodes() + .get_or_create_segment(src_segment_id); + + let immut_lsn = segment.immut_lsn(); + + // Replay this entry only if it doesn't exist in immut. + if immut_lsn < lsn { + let src_writer = self.nodes.get_mut(src_segment_id).ok_or_else(|| { + StorageError::GenericFailure(format!( + "Node segment {src_segment_id} not found during replay_add_edge" + )) + })?; + + let mut src_writer = src_writer.writer(); + + // Increment the node counter for this segment if this is a new node. + if !src_writer.has_node(src_pos, STATIC_GRAPH_LAYER_ID) { + src_writer.increment_seg_num_nodes(); + } + + if let Some(src_name) = src_name { + src_writer.store_node_id(src_pos, STATIC_GRAPH_LAYER_ID, src_name); + } + + let is_new_edge_in_static = src_writer + .get_out_edge(src_pos, dst_id, STATIC_GRAPH_LAYER_ID) + .is_none(); + + let is_new_edge_in_layer = src_writer.get_out_edge(src_pos, dst_id, layer_id).is_none(); + + // Add the edge to the static graph if it doesn't already exist. + if is_new_edge_in_static { + src_writer.add_static_outbound_edge(src_pos, dst_id, eid); + } + + // Add the edge to the layer if it doesn't already exist, else just record the timestamp. + if is_new_edge_in_layer { + src_writer.add_outbound_edge(Some(t), src_pos, dst_id, eid.with_layer(layer_id)); + } else { + src_writer.update_timestamp(t, src_pos, eid.with_layer(layer_id)); + } + + src_writer.set_lsn(lsn); + } + + // Grab dst writer and add edge data. + let (dst_segment_id, dst_pos) = self.graph().storage().nodes().resolve_pos(dst_id); + self.resize_segments_to_vid(dst_id); + + let segment = self + .graph() + .storage() + .nodes() + .get_or_create_segment(dst_segment_id); + + let immut_lsn = segment.immut_lsn(); + + // Replay this entry only if it doesn't exist in immut. + if immut_lsn < lsn { + let dst_writer = self.nodes.get_mut(dst_segment_id).ok_or_else(|| { + StorageError::GenericFailure(format!( + "Node segment {dst_segment_id} not found during replay_add_edge" + )) + })?; + + let mut dst_writer = dst_writer.writer(); + + // Increment the node counter for this segment if this is a new node. + if !dst_writer.has_node(dst_pos, STATIC_GRAPH_LAYER_ID) { + dst_writer.increment_seg_num_nodes(); + } + + if let Some(dst_name) = dst_name { + dst_writer.store_node_id(dst_pos, STATIC_GRAPH_LAYER_ID, dst_name); + } + + let is_new_edge_in_static = dst_writer + .get_inb_edge(dst_pos, src_id, STATIC_GRAPH_LAYER_ID) + .is_none(); + + let is_new_edge_in_layer = dst_writer.get_inb_edge(dst_pos, src_id, layer_id).is_none(); + + if is_new_edge_in_static { + dst_writer.add_static_inbound_edge(dst_pos, src_id, eid); + } + + if is_new_edge_in_layer { + dst_writer.add_inbound_edge(Some(t), dst_pos, src_id, eid.with_layer(layer_id)); + } else { + dst_writer.update_timestamp(t, dst_pos, eid.with_layer(layer_id)); + } + + dst_writer.set_lsn(lsn); + } + + // Grab edge writer and add temporal props. + let (edge_segment_id, edge_pos) = self.graph().storage().edges().resolve_pos(eid); + self.resize_segments_to_eid(eid); + + let segment = self + .graph() + .storage() + .edges() + .get_or_create_segment(edge_segment_id); + + let immut_lsn = segment.immut_lsn(); + + // Replay this entry only if it doesn't exist in immut. + if immut_lsn < lsn { + let edge_meta = self.graph().edge_meta(); + + // Insert prop ids into edge meta. + unify_types(edge_meta, &props, true)?; + + let edge_writer = self.edges.get_mut(edge_segment_id).ok_or_else(|| { + StorageError::GenericFailure(format!( + "Edge segment {edge_segment_id} not found during replay_add_edge" + )) + })?; + + let mut edge_writer = edge_writer.writer(); + + let is_new_edge_in_static = edge_writer + .get_edge(STATIC_GRAPH_LAYER_ID, edge_pos) + .is_none(); + + // Add edge into the static graph if it doesn't already exist. + if is_new_edge_in_static { + let already_counted = false; + edge_writer.add_static_edge(Some(edge_pos), src_id, dst_id, already_counted); + } + + // Add edge into the specified layer with timestamp and props. + edge_writer.add_edge( + t, + edge_pos, + src_id, + dst_id, + props + .into_iter() + .map(|(_, prop_id, prop_value)| (prop_id, prop_value)), + layer_id, + ); + + edge_writer.set_lsn(lsn); + } + + Ok(()) + } + + fn replay_add_edge_metadata( + &mut self, + lsn: LSN, + _transaction_id: TransactionID, + eid: EID, + layer_id: LayerId, + props: Vec<(String, usize, Prop)>, + ) -> Result<(), StorageError> { + let (edge_segment_id, edge_pos) = self.graph().storage().edges().resolve_pos(eid); + self.resize_segments_to_eid(eid); + + let segment = self + .graph() + .storage() + .edges() + .get_or_create_segment(edge_segment_id); + + let immut_lsn = segment.immut_lsn(); + + if immut_lsn < lsn { + let edge_meta = self.graph().edge_meta(); + + unify_types(edge_meta, &props, false)?; + + let edge_writer = self.edges.get_mut(edge_segment_id).ok_or_else(|| { + StorageError::GenericFailure(format!( + "Edge segment {edge_segment_id} not found during replay_add_edge_metadata" + )) + })?; + + let mut edge_writer = edge_writer.writer(); + + let (src, dst) = edge_writer.get_edge(layer_id, edge_pos).ok_or_else(|| { + StorageError::GenericFailure(format!( + "Edge {eid:?} not found in layer {layer_id} during replay_add_edge_metadata" + )) + })?; + + let props = props.into_iter().map(|(_, id, p)| (id, p)); + + // No need to check metadata since the operation was logged after validation. + edge_writer.update_c_props(edge_pos, src, dst, layer_id, props); + edge_writer.set_lsn(lsn); + } + + Ok(()) + } + + fn replay_delete_edge( + &mut self, + lsn: LSN, + _transaction_id: TransactionID, + t: EventTime, + src_name: Option, + src_id: VID, + dst_name: Option, + dst_id: VID, + eid: EID, + layer_name: Option, + layer_id: LayerId, + ) -> Result<(), StorageError> { + // Insert node ids into resolver. + if let Some(src_name) = src_name.as_ref() { + self.graph() + .logical_to_physical + .set(src_name.as_ref(), src_id)?; + } + + if let Some(dst_name) = dst_name.as_ref() { + self.graph() + .logical_to_physical + .set(dst_name.as_ref(), dst_id)?; + } + + // Insert layer id into the layer meta of both edge and node. + self.graph() + .edge_meta() + .layer_meta() + .set_id(layer_name.as_deref().unwrap_or("_default"), layer_id.0); + self.graph() + .node_meta() + .layer_meta() + .set_id(layer_name.as_deref().unwrap_or("_default"), layer_id.0); + + // Grab src writer and record deletion time. + let (src_segment_id, src_pos) = self.graph().storage().nodes().resolve_pos(src_id); + self.resize_segments_to_vid(src_id); + + let segment = self + .graph() + .storage() + .nodes() + .get_or_create_segment(src_segment_id); + + let immut_lsn = segment.immut_lsn(); + + if immut_lsn < lsn { + let src_writer = self.nodes.get_mut(src_segment_id).ok_or_else(|| { + StorageError::GenericFailure(format!( + "Node segment {src_segment_id} not found during replay_delete_edge" + )) + })?; + + let mut src_writer = src_writer.writer(); + + // Increment the node counter for this segment if this is a new node. + if !src_writer.has_node(src_pos, STATIC_GRAPH_LAYER_ID) { + src_writer.increment_seg_num_nodes(); + } + + if let Some(src_name) = src_name { + src_writer.store_node_id(src_pos, STATIC_GRAPH_LAYER_ID, src_name); + } + + let is_new_edge_in_static = src_writer + .get_out_edge(src_pos, dst_id, STATIC_GRAPH_LAYER_ID) + .is_none(); + let is_new_edge_in_layer = src_writer.get_out_edge(src_pos, dst_id, layer_id).is_none(); + + // Add the edge to the static graph if it doesn't already exist. + if is_new_edge_in_static { + src_writer.add_static_outbound_edge(src_pos, dst_id, eid); + } + + // Add the edge to the layer if it doesn't already exist. + if is_new_edge_in_layer { + src_writer.add_outbound_edge(Some(t), src_pos, dst_id, eid.with_layer(layer_id)); + } + + src_writer.update_deletion_time(t, src_pos, eid.with_layer(layer_id)); + src_writer.set_lsn(lsn); + } + + // Grab dst writer and record deletion time. + let (dst_segment_id, dst_pos) = self.graph().storage().nodes().resolve_pos(dst_id); + self.resize_segments_to_vid(dst_id); + + let segment = self + .graph() + .storage() + .nodes() + .get_or_create_segment(dst_segment_id); + + let immut_lsn = segment.immut_lsn(); + + if immut_lsn < lsn { + let dst_writer = self.nodes.get_mut(dst_segment_id).ok_or_else(|| { + StorageError::GenericFailure(format!( + "Node segment {dst_segment_id} not found during replay_delete_edge" + )) + })?; + + let mut dst_writer = dst_writer.writer(); + + // Increment the node counter for this segment if this is a new node. + if !dst_writer.has_node(dst_pos, STATIC_GRAPH_LAYER_ID) { + dst_writer.increment_seg_num_nodes(); + } + + if let Some(dst_name) = dst_name { + dst_writer.store_node_id(dst_pos, STATIC_GRAPH_LAYER_ID, dst_name); + } + + let is_new_edge_in_static = dst_writer + .get_inb_edge(dst_pos, src_id, STATIC_GRAPH_LAYER_ID) + .is_none(); + + let is_new_edge_in_layer = dst_writer.get_inb_edge(dst_pos, src_id, layer_id).is_none(); + + // Add the edge to the static graph if it doesn't already exist. + if is_new_edge_in_static { + dst_writer.add_static_inbound_edge(dst_pos, src_id, eid); + } + + // Add the edge to the layer if it doesn't already exist. + if is_new_edge_in_layer { + dst_writer.add_inbound_edge(Some(t), dst_pos, src_id, eid.with_layer(layer_id)); + } + + // Always update the deletion time on the edge. + dst_writer.update_deletion_time(t, dst_pos, eid.with_layer(layer_id)); + + dst_writer.set_lsn(lsn); + } + + // Grab edge writer and delete the edge at (t, layer_id). + let (edge_segment_id, edge_pos) = self.graph().storage().edges().resolve_pos(eid); + self.resize_segments_to_eid(eid); + + let segment = self + .graph() + .storage() + .edges() + .get_or_create_segment(edge_segment_id); + + let immut_lsn = segment.immut_lsn(); + + if immut_lsn < lsn { + let edge_writer = self.edges.get_mut(edge_segment_id).ok_or_else(|| { + StorageError::GenericFailure(format!( + "Edge segment {edge_segment_id} not found during replay_delete_edge" + )) + })?; + + let mut edge_writer = edge_writer.writer(); + + let is_new_edge_in_static = edge_writer + .get_edge(STATIC_GRAPH_LAYER_ID, edge_pos) + .is_none(); + + // Add the edge to the static graph if it doesn't already exist. + if is_new_edge_in_static { + let already_counted = false; + edge_writer.add_static_edge(Some(edge_pos), src_id, dst_id, already_counted); + } + + // Delete the edge from the layer at the specified timestamp. + edge_writer.delete_edge(t, edge_pos, src_id, dst_id, layer_id); + + edge_writer.set_lsn(lsn); + } + + Ok(()) + } + + fn replay_add_node( + &mut self, + lsn: LSN, + _transaction_id: TransactionID, + t: EventTime, + node_name: Option, + node_id: VID, + node_type_and_id: Option<(String, usize)>, + props: Vec<(String, usize, Prop)>, + ) -> Result<(), StorageError> { + // Insert node id into resolver. + if let Some(ref name) = node_name { + self.graph() + .logical_to_physical + .set(name.as_ref(), node_id)?; + } + + // Resolve segment and check LSN. + let (segment_id, pos) = self.graph().storage().nodes().resolve_pos(node_id); + self.resize_segments_to_vid(node_id); + + let segment = self + .graph() + .storage() + .nodes() + .get_or_create_segment(segment_id); + + let immut_lsn = segment.immut_lsn(); + + // Replay this entry only if it doesn't exist in immut. + if immut_lsn < lsn { + let node_meta = self.graph().node_meta(); + + unify_types(node_meta, &props, true)?; + + // Set node type metadata early to prevent issues with borrowing node_writer. + if let Some((ref node_type, node_type_id)) = node_type_and_id { + node_meta + .node_type_meta() + .set_id(node_type.as_str(), node_type_id); + } + + let node_writer = self.nodes.get_mut(segment_id).ok_or_else(|| { + StorageError::GenericFailure(format!( + "Node segment {segment_id} not found during replay_add_node" + )) + })?; + + let mut node_writer = node_writer.writer(); + + if !node_writer.has_node(pos, STATIC_GRAPH_LAYER_ID) { + node_writer.increment_seg_num_nodes(); + } + + if let Some(name) = node_name { + node_writer.store_node_id(pos, STATIC_GRAPH_LAYER_ID, name); + } + + if let Some((_, node_type_id)) = node_type_and_id { + node_writer.store_node_type(pos, STATIC_GRAPH_LAYER_ID, node_type_id); + } + + // Add the node with its timestamp and props. + node_writer.add_props( + t, + pos, + STATIC_GRAPH_LAYER_ID, + props + .into_iter() + .map(|(_, prop_id, prop_value)| (prop_id, prop_value)), + ); + + node_writer.set_lsn(lsn); + } + + Ok(()) + } + + fn replay_add_node_metadata( + &mut self, + lsn: LSN, + _transaction_id: TransactionID, + vid: VID, + props: Vec<(String, usize, Prop)>, + ) -> Result<(), StorageError> { + let (segment_id, pos) = self.graph().storage().nodes().resolve_pos(vid); + self.resize_segments_to_vid(vid); + + let segment = self + .graph() + .storage() + .nodes() + .get_or_create_segment(segment_id); + + let immut_lsn = segment.immut_lsn(); + + if immut_lsn < lsn { + let node_meta = self.graph().node_meta(); + + unify_types(&node_meta, &props, false)?; + + let node_writer = self.nodes.get_mut(segment_id).ok_or_else(|| { + StorageError::GenericFailure(format!( + "Node segment {segment_id} not found during replay_add_node_metadata" + )) + })?; + + let mut node_writer = node_writer.writer(); + let props = props.into_iter().map(|(_, id, p)| (id, p)); + + // No need to check metadata since the operation was logged after validation. + node_writer.update_c_props(pos, STATIC_GRAPH_LAYER_ID, props); + node_writer.set_lsn(lsn); + } + + Ok(()) + } + + fn replay_set_node_type( + &mut self, + lsn: LSN, + _transaction_id: TransactionID, + vid: VID, + node_type: String, + node_type_id: usize, + ) -> Result<(), StorageError> { + let (segment_id, pos) = self.graph().storage().nodes().resolve_pos(vid); + self.resize_segments_to_vid(vid); + + let segment = self + .graph() + .storage() + .nodes() + .get_or_create_segment(segment_id); + + let immut_lsn = segment.immut_lsn(); + + if immut_lsn < lsn { + let node_meta = self.graph().node_meta(); + + node_meta + .node_type_meta() + .set_id(node_type.as_str(), node_type_id); + + let node_writer = self.nodes.get_mut(segment_id).ok_or_else(|| { + StorageError::GenericFailure(format!( + "Node segment {segment_id} not found during replay_set_node_type" + )) + })?; + let mut node_writer = node_writer.writer(); + + node_writer.store_node_type(pos, STATIC_GRAPH_LAYER_ID, node_type_id); + node_writer.set_lsn(lsn); + } + + Ok(()) + } + + fn replay_add_graph_props( + &mut self, + lsn: LSN, + _transaction_id: TransactionID, + t: EventTime, + props: Vec<(String, usize, Prop)>, + ) -> Result<(), StorageError> { + let segment = self.graph().storage().graph_props().segment(); + let immut_lsn = segment.immut_lsn(); + + if immut_lsn < lsn { + let graph_props_meta = self.graph().graph_props_meta(); + + unify_types(graph_props_meta, &props, true)?; + + let writer = self.graph_props.writer(); + let props = props.into_iter().map(|(_, id, p)| (id, p)); + + writer.add_properties(t, props); + writer.set_lsn(lsn); + } + + Ok(()) + } + + fn replay_add_graph_metadata( + &mut self, + lsn: LSN, + _transaction_id: TransactionID, + props: Vec<(String, usize, Prop)>, + ) -> Result<(), StorageError> { + let segment = self.graph().storage().graph_props().segment(); + let immut_lsn = segment.immut_lsn(); + + if immut_lsn < lsn { + let graph_props_meta = self.graph().graph_props_meta(); + + unify_types(graph_props_meta, &props, false)?; + + let writer = self.graph_props.writer(); + let props = props.into_iter().map(|(_, id, p)| (id, p)); + + writer.update_metadata(props); + writer.set_lsn(lsn); + } + + Ok(()) + } +} + +fn unify_types( + meta: &Meta, + props: &[(String, usize, Prop)], + temporal: bool, +) -> Result<(), StorageError> { + let prop_mapper = if !temporal { + meta.metadata_mapper() + } else { + meta.temporal_prop_mapper() + }; + let mut write_locked_mapper = prop_mapper.write_locked(); + for (prop_name, prop_id, prop_value) in props { + write_locked_mapper.set_or_unify_id_and_dtype( + prop_name.as_ref(), + *prop_id, + prop_value.dtype(), + )?; + } + Ok(()) +} diff --git a/db4-storage/Cargo.toml b/db4-storage/Cargo.toml new file mode 100644 index 0000000000..cf0d0b68c0 --- /dev/null +++ b/db4-storage/Cargo.toml @@ -0,0 +1,49 @@ +[package] +name = "db4-storage" +version.workspace = true +documentation.workspace = true +repository.workspace = true +readme.workspace = true +homepage.workspace = true +keywords.workspace = true +authors.workspace = true +rust-version.workspace = true +edition = "2024" + +[dependencies] +raphtory-api.workspace = true +raphtory-api-macros.workspace = true +raphtory-core = { workspace = true } +bigdecimal.workspace = true +either.workspace = true +parking_lot.workspace = true +serde.workspace = true +boxcar.workspace = true +serde_json.workspace = true +arrow-array.workspace = true +arrow-schema.workspace = true +parquet.workspace = true +rayon.workspace = true +itertools.workspace = true +thiserror.workspace = true +tinyvec.workspace = true +proptest = { workspace = true, optional = true } +tempfile = { workspace = true } +iter-enum = { workspace = true, features = ["rayon"] } +chrono = { workspace = true, optional = true } +clap.workspace = true +tracing.workspace = true +dashmap.workspace = true +lock_api.workspace = true +once_cell.workspace = true + +[dev-dependencies] +proptest.workspace = true +tempfile.workspace = true +chrono.workspace = true +rayon.workspace = true +test-log.workspace = true + +[features] +test-utils = ["dep:proptest", "dep:chrono"] +default = ["test-utils"] diff --git a/db4-storage/build.rs b/db4-storage/build.rs new file mode 100644 index 0000000000..7acbc3f99d --- /dev/null +++ b/db4-storage/build.rs @@ -0,0 +1,9 @@ +use std::io::Result; + +fn main() -> Result<()> { + println!("cargo::rustc-check-cfg=cfg(has_debug_symbols)"); + if let Ok("true" | "1" | "2") = std::env::var("DEBUG").as_deref() { + println!("cargo::rustc-cfg=has_debug_symbols"); + } + Ok(()) +} diff --git a/db4-storage/src/api/edges.rs b/db4-storage/src/api/edges.rs new file mode 100644 index 0000000000..74082bc59b --- /dev/null +++ b/db4-storage/src/api/edges.rs @@ -0,0 +1,180 @@ +use crate::{ + LocalPOS, error::StorageError, persist::strategy::PersistenceStrategy, + segments::edge::segment::MemEdgeSegment, wal::LSN, +}; +use parking_lot::{RwLockReadGuard, RwLockWriteGuard, lock_api::ArcRwLockReadGuard}; +use raphtory_api::core::entities::{ + LayerId, + properties::{meta::Meta, prop::Prop, tprop::TPropOps}, +}; +use raphtory_core::{ + entities::{EID, LayerIds, VID}, + storage::timeindex::{EventTime, TimeIndexOps}, +}; +use rayon::iter::ParallelIterator; +use std::{ + ops::{Deref, DerefMut}, + path::{Path, PathBuf}, + sync::{Arc, atomic::AtomicU32}, +}; + +pub trait EdgeSegmentOps: Send + Sync + std::fmt::Debug + 'static { + type Extension: PersistenceStrategy; + + type Entry<'a>: EdgeEntryOps<'a> + where + Self: 'a; + + type ArcLockedSegment: LockedESegment; + + fn extension(&self) -> &Self::Extension; + + fn latest(&self) -> Option; + fn earliest(&self) -> Option; + + fn t_len(&self) -> usize; + fn num_layers(&self) -> usize; + // Persistent layer count, not used for up-to-date counts + fn layer_count(&self, layer_id: LayerId) -> u32; + + fn load( + page_id: usize, + max_page_len: u32, + meta: Arc, + path: impl AsRef, + ext: Self::Extension, + ) -> Result + where + Self: Sized; + + fn new(page_id: usize, meta: Arc, path: Option, ext: Self::Extension) -> Self; + + fn segment_id(&self) -> usize; + + fn edges_counter(&self) -> &AtomicU32; + + fn num_edges(&self) -> u32 { + self.edges_counter() + .load(std::sync::atomic::Ordering::Relaxed) + } + + fn head(&self) -> RwLockReadGuard<'_, MemEdgeSegment>; + + fn head_arc(&self) -> ArcRwLockReadGuard; + + fn head_mut(&self) -> RwLockWriteGuard<'_, MemEdgeSegment>; + + fn try_head_mut(&self) -> Option>; + + fn set_dirty(&self, dirty: bool); + + fn is_dirty(&self) -> bool; + + /// notify that an edge was added (might need to write to disk) + fn notify_write( + &self, + head_lock: impl DerefMut, + ) -> Result<(), StorageError>; + + fn increment_num_edges(&self) -> u32 { + self.edges_counter() + .fetch_add(1, std::sync::atomic::Ordering::Relaxed) + } + + fn has_edge( + &self, + edge_pos: LocalPOS, + layer_id: LayerId, + locked_head: impl Deref, + ) -> bool; + + fn immut_has_edge(&self, edge_pos: LocalPOS, layer_id: LayerId) -> bool; + + fn get_edge( + &self, + edge_pos: LocalPOS, + layer_id: LayerId, + locked_head: impl Deref, + ) -> Option<(VID, VID)>; + + fn entry<'a>(&'a self, edge_pos: LocalPOS) -> Self::Entry<'a>; + + fn layer_entry<'a>( + &'a self, + edge_pos: LocalPOS, + layer_id: LayerId, + locked_head: Option>, + ) -> Option>; + + fn locked(self: &Arc) -> Self::ArcLockedSegment; + + fn vacuum( + &self, + locked_head: impl DerefMut, + ) -> Result<(), StorageError>; + + /// Returns the latest lsn for the immutable part of this segment. + fn immut_lsn(&self) -> LSN; + + fn flush(&self) -> Result<(), StorageError>; +} + +pub trait LockedESegment: Send + Sync + std::fmt::Debug { + type EntryRef<'a>: EdgeRefOps<'a> + where + Self: 'a; + + fn entry_ref<'a>(&'a self, edge_pos: impl Into) -> Self::EntryRef<'a> + where + Self: 'a; + + fn edge_iter<'a, 'b: 'a>( + &'a self, + layer_ids: &'b LayerIds, + ) -> impl Iterator> + Send + Sync + 'a; + + fn edge_par_iter<'a, 'b: 'a>( + &'a self, + layer_ids: &'b LayerIds, + ) -> impl ParallelIterator> + Sync + 'a; + + fn num_edges(&self) -> u32; +} + +pub trait EdgeEntryOps<'a>: Send + Sync { + type Ref<'b>: EdgeRefOps<'b> + where + 'a: 'b, + Self: 'b; + + fn as_ref<'b>(&'b self) -> Self::Ref<'b> + where + 'a: 'b; +} + +pub trait EdgeRefOps<'a>: Copy + Clone + Send + Sync { + type Additions: TimeIndexOps<'a, IndexType = EventTime>; + type Deletions: TimeIndexOps<'a, IndexType = EventTime>; + type TProps: TPropOps<'a>; + + fn edge(self, layer_id: LayerId) -> Option<(VID, VID)>; + + fn has_layer_inner(self, layer_id: LayerId) -> bool { + self.edge(layer_id).is_some() + } + + fn internal_num_layers(self) -> usize; + + fn layer_additions(self, layer_id: LayerId) -> Self::Additions; + fn layer_deletions(self, layer_id: LayerId) -> Self::Deletions; + + fn c_prop(self, layer_id: LayerId, prop_id: usize) -> Option; + + fn layer_t_prop(self, layer_id: LayerId, prop_id: usize) -> Self::TProps; + + fn src(&self) -> Option; + + fn dst(&self) -> Option; + + fn edge_id(&self) -> EID; +} diff --git a/db4-storage/src/api/graph_props.rs b/db4-storage/src/api/graph_props.rs new file mode 100644 index 0000000000..824510a4c6 --- /dev/null +++ b/db4-storage/src/api/graph_props.rs @@ -0,0 +1,65 @@ +use crate::{error::StorageError, segments::graph_prop::segment::MemGraphPropSegment, wal::LSN}; +use parking_lot::{RwLockReadGuard, RwLockWriteGuard}; +use raphtory_api::core::entities::properties::{meta::Meta, prop::Prop, tprop::TPropOps}; +use std::{fmt::Debug, path::Path, sync::Arc}; + +pub trait GraphPropSegmentOps: Send + Sync + Debug + 'static +where + Self: Sized, +{ + type Extension; + + type Entry<'a>: GraphPropEntryOps<'a>; + + fn new(meta: Arc, path: Option<&Path>, ext: Self::Extension) -> Self; + + fn load( + meta: Arc, + path: impl AsRef, + ext: Self::Extension, + ) -> Result; + + fn head(&self) -> RwLockReadGuard<'_, MemGraphPropSegment>; + + fn head_mut(&self) -> RwLockWriteGuard<'_, MemGraphPropSegment>; + + fn entry(&self) -> Self::Entry<'_>; + + fn increment_est_size(&self, size: usize); + + fn est_size(&self) -> usize; + + fn set_dirty(&self, dirty: bool); + + /// Returns the latest lsn for the immutable part of this segment. + fn immut_lsn(&self) -> LSN; + + fn notify_write( + &self, + mem_segment: &mut RwLockWriteGuard<'_, MemGraphPropSegment>, + ) -> Result<(), StorageError>; + + fn flush(&self) -> Result<(), StorageError>; +} + +/// Trait for returning a guard-free, copyable reference to graph properties +/// and metadata. +pub trait GraphPropEntryOps<'a>: Send + Sync + 'a { + type Ref<'b>: GraphPropRefOps<'b> + where + 'a: 'b, + Self: 'b; + + fn as_ref<'b>(&'b self) -> Self::Ref<'b> + where + 'a: 'b; +} + +/// Methods for reading graph properties and metadata from a reference on storage. +pub trait GraphPropRefOps<'a>: Copy + Clone + Send + Sync + 'a { + type TProps: TPropOps<'a>; + + fn get_temporal_prop(self, prop_id: usize) -> Self::TProps; + + fn get_metadata(self, prop_id: usize) -> Option; +} diff --git a/db4-storage/src/api/mod.rs b/db4-storage/src/api/mod.rs new file mode 100644 index 0000000000..de88345004 --- /dev/null +++ b/db4-storage/src/api/mod.rs @@ -0,0 +1,3 @@ +pub mod edges; +pub mod graph_props; +pub mod nodes; diff --git a/db4-storage/src/api/nodes.rs b/db4-storage/src/api/nodes.rs new file mode 100644 index 0000000000..d59e7447ec --- /dev/null +++ b/db4-storage/src/api/nodes.rs @@ -0,0 +1,390 @@ +use itertools::Itertools; +use parking_lot::{RwLockReadGuard, RwLockWriteGuard, lock_api::ArcRwLockReadGuard}; +use raphtory_api::{ + core::{ + Direction, + entities::properties::{ + meta::{Meta, NODE_ID_IDX, NODE_TYPE_IDX}, + prop::{Prop, PropUnwrap}, + tprop::TPropOps, + }, + }, + iter::IntoDynBoxed, +}; +use raphtory_api_macros::box_on_debug_lifetime; +use raphtory_core::{ + entities::{EID, GidRef, LayerIds, VID, edges::edge_ref::EdgeRef}, + storage::timeindex::{EventTime, TimeIndexOps}, + utils::iter::GenLockedIter, +}; +use std::{ + borrow::Cow, + fmt::Debug, + ops::{Deref, DerefMut, Range}, + path::{Path, PathBuf}, + sync::{ + Arc, + atomic::{AtomicU32, Ordering}, + }, +}; + +use crate::{ + LocalPOS, + error::StorageError, + gen_ts::LayerIter, + pages::node_store::increment_and_clamp, + segments::node::segment::MemNodeSegment, + utils::{Iter2, Iter3, Iter4}, + wal::LSN, +}; +use raphtory_api::core::entities::{LayerId, properties::meta::STATIC_GRAPH_LAYER_ID}; +use rayon::prelude::*; + +pub trait NodeSegmentOps: Send + Sync + Debug + 'static { + type Extension; + + type Entry<'a>: NodeEntryOps<'a> + where + Self: 'a; + + type ArcLockedSegment: LockedNSSegment; + + fn latest(&self) -> Option; + + fn earliest(&self) -> Option; + + fn t_len(&self) -> usize; + + fn load( + page_id: usize, + node_meta: Arc, + edge_meta: Arc, + path: impl AsRef, + ext: Self::Extension, + ) -> Result + where + Self: Sized; + + fn new( + page_id: usize, + node_meta: Arc, + edge_meta: Arc, + path: Option, + ext: Self::Extension, + ) -> Self; + + fn segment_id(&self) -> usize; + + fn head_arc(&self) -> ArcRwLockReadGuard; + + fn head(&self) -> RwLockReadGuard<'_, MemNodeSegment>; + + fn head_mut(&self) -> RwLockWriteGuard<'_, MemNodeSegment>; + + fn try_head_mut(&self) -> Option>; + + fn notify_write( + &self, + head_lock: impl DerefMut, + ) -> Result<(), StorageError>; + + fn set_dirty(&self, dirty: bool); + + fn has_node(&self, pos: LocalPOS, layer_id: LayerId) -> bool; + + fn get_out_edge( + &self, + pos: LocalPOS, + dst: impl Into, + layer_id: LayerId, + locked_head: impl Deref, + ) -> Option; + + fn get_inb_edge( + &self, + pos: LocalPOS, + src: impl Into, + layer_id: LayerId, + locked_head: impl Deref, + ) -> Option; + + fn entry(&self, pos: impl Into) -> Self::Entry<'_>; + + fn locked(&self) -> Self::ArcLockedSegment; + + fn flush(&self) -> Result<(), StorageError>; + + fn is_dirty(&self) -> bool; + + fn vacuum( + &self, + locked_head: impl DerefMut, + ) -> Result<(), StorageError>; + + /// Returns the latest lsn for the immutable part of this segment. + fn immut_lsn(&self) -> LSN; + + fn nodes_counter(&self) -> &AtomicU32; + + fn increment_num_nodes(&self, max_page_len: u32) { + increment_and_clamp(self.nodes_counter(), 1, max_page_len); + } + + fn num_nodes(&self) -> u32 { + self.nodes_counter().load(Ordering::Relaxed) + } + + fn num_layers(&self) -> usize; + + fn layer_count(&self, layer_id: LayerId) -> u32; +} + +pub trait LockedNSSegment: Debug + Send + Sync { + type EntryRef<'a>: NodeRefOps<'a> + where + Self: 'a; + + fn num_nodes(&self) -> u32; + + fn entry_ref<'a>(&'a self, pos: impl Into) -> Self::EntryRef<'a>; + + fn iter_entries<'a>(&'a self) -> impl Iterator> + Send + Sync + 'a { + let num_nodes = self.num_nodes(); + (0..num_nodes).map(move |vid| self.entry_ref(LocalPOS(vid))) + } + + fn par_iter_entries<'a>( + &'a self, + ) -> impl ParallelIterator> + Sync + 'a { + let num_nodes = self.num_nodes(); + (0..num_nodes) + .into_par_iter() + .map(move |vid| self.entry_ref(LocalPOS(vid))) + } +} + +pub trait NodeEntryOps<'a>: Send + Sync + 'a { + type Ref<'b>: NodeRefOps<'b> + where + 'a: 'b, + Self: 'b; + + fn as_ref<'b>(&'b self) -> Self::Ref<'b> + where + 'a: 'b; + + fn into_edges<'b: 'a>( + self, + layers: &'b LayerIds, + dir: Direction, + ) -> impl Iterator + Send + Sync + 'a + where + Self: Sized, + { + GenLockedIter::from((self, layers), |(e, layers)| { + e.as_ref().edges_iter(layers, dir).into_dyn_boxed() + }) + } +} + +pub trait NodeRefOps<'a>: Copy + Clone + Send + Sync + 'a { + type Additions: TimeIndexOps<'a, IndexType = EventTime>; + type EdgeAdditions: TimeIndexOps<'a, IndexType = EventTime>; + type TProps: TPropOps<'a>; + + fn out_edges(self, layer_id: LayerId) -> impl Iterator + Send + Sync + 'a; + + fn inb_edges(self, layer_id: LayerId) -> impl Iterator + Send + Sync + 'a; + + fn out_edges_sorted( + self, + layer_id: LayerId, + ) -> impl Iterator + Send + Sync + 'a; + + fn inb_edges_sorted( + self, + layer_id: LayerId, + ) -> impl Iterator + Send + Sync + 'a; + + fn vid(&self) -> VID; + + #[box_on_debug_lifetime] + fn edges_dir( + self, + layer_id: LayerId, + dir: Direction, + ) -> impl Iterator + Send + Sync + 'a + where + Self: Sized, + { + let src_pid = self.vid(); + match dir { + Direction::OUT => Iter3::I( + self.out_edges(layer_id) + .map(move |(v, e)| EdgeRef::new_outgoing(e, src_pid, v)), + ), + Direction::IN => Iter3::J( + self.inb_edges(layer_id) + .map(move |(v, e)| EdgeRef::new_incoming(e, v, src_pid)), + ), + Direction::BOTH => Iter3::K( + self.out_edges_sorted(layer_id) + .map(move |(v, e)| EdgeRef::new_outgoing(e, src_pid, v)) + .merge_by( + self.inb_edges_sorted(layer_id) + .map(move |(v, e)| EdgeRef::new_incoming(e, v, src_pid)), + |e1, e2| e1.remote() < e2.remote(), + ) + .dedup_by(|l, r| l.pid() == r.pid()), + ), + } + } + + #[box_on_debug_lifetime] + fn edges_iter<'b>( + self, + layers_ids: &'b LayerIds, + dir: Direction, + ) -> impl Iterator + Send + Sync + 'a + where + Self: Sized, + { + match layers_ids { + LayerIds::One(layer_id) => Iter4::I(self.edges_dir(*layer_id, dir)), + LayerIds::All => Iter4::J(self.edges_dir(STATIC_GRAPH_LAYER_ID, dir)), + LayerIds::Multiple(layers) => Iter4::K( + layers + .into_iter() + .map(|layer_id| self.edges_dir(layer_id, dir)) + .kmerge_by(|e1, e2| e1.remote() < e2.remote()) + .dedup_by(|l, r| l.pid() == r.pid()), + ), + LayerIds::None => Iter4::L(std::iter::empty()), + } + } + + fn node_meta(&self) -> &Arc; + + fn temp_prop_rows( + self, + w: Option>, + ) -> impl Iterator)> + 'a { + (0..self.internal_num_layers()).flat_map(move |layer_id| { + let w = w.clone(); + let additions = self.node_additions(layer_id); + let additions = w + .clone() + .map(|w| Iter2::I1(additions.range(w).iter())) + .unwrap_or_else(|| Iter2::I2(additions.iter())); + + let mut time_ordered_iter = self + .node_meta() + .temporal_prop_mapper() + .ids() + .map(move |prop_id| { + self.temporal_prop_layer(LayerId(layer_id), prop_id) + .iter_inner(w.clone()) + .map(move |(t, prop)| (t, (prop_id, prop))) + }) + .kmerge_by(|(t1, (p_id1, _)), (t2, (p_id2, _))| (t1, p_id1) < (t2, p_id2)) + .merge_join_by(additions, |(t1, _), t2| t1 <= t2) + .map(move |result| match result { + either::Either::Left((l, (prop_id, prop))) => (l, Some((prop_id, prop))), + either::Either::Right(r) => (r, None), + }); + + let mut done = false; + if let Some((mut current_time, maybe_prop)) = time_ordered_iter.next() { + let mut current_row = Vec::from_iter(maybe_prop); + Iter2::I2(std::iter::from_fn(move || { + if done { + return None; + } + for (t, maybe_prop) in time_ordered_iter.by_ref() { + if t == current_time { + current_row.extend(maybe_prop); + } else { + let mut row = std::mem::take(&mut current_row); + row.sort_unstable_by(|(a, _), (b, _)| a.cmp(b)); + let out = Some((current_time, layer_id, row)); + current_row.extend(maybe_prop); + current_time = t; + return out; + } + } + done = true; + let row = std::mem::take(&mut current_row); + Some((current_time, layer_id, row)) + })) + } else { + Iter2::I1(std::iter::empty()) + } + }) + } + + fn out_nbrs(self, layer_id: LayerId) -> impl Iterator + 'a + where + Self: Sized, + { + self.out_edges(layer_id).map(|(v, _)| v) + } + + fn inb_nbrs(self, layer_id: LayerId) -> impl Iterator + 'a + where + Self: Sized, + { + self.inb_edges(layer_id).map(|(v, _)| v) + } + + fn out_nbrs_sorted(self, layer_id: LayerId) -> impl Iterator + 'a + where + Self: Sized, + { + self.out_edges_sorted(layer_id).map(|(v, _)| v) + } + + fn inb_nbrs_sorted(self, layer_id: LayerId) -> impl Iterator + 'a + where + Self: Sized, + { + self.inb_edges_sorted(layer_id).map(|(v, _)| v) + } + + fn edge_additions>>(self, layer_id: L) -> Self::EdgeAdditions; + + fn node_additions>>(self, layer_id: L) -> Self::Additions; + + fn c_prop(self, layer_id: LayerId, prop_id: usize) -> Option; + + fn c_prop_str(self, layer_id: LayerId, prop_id: usize) -> Option<&'a str>; + + fn temporal_prop_layer(self, layer_id: LayerId, prop_id: usize) -> Self::TProps; + + fn degree(self, layers: &LayerIds, dir: Direction) -> usize; + + fn find_edge(&self, dst: VID, layers: &LayerIds) -> Option; + + fn name(&self) -> Cow<'a, str> { + self.gid().to_str() + } + + fn gid(&self) -> GidRef<'a> { + self.c_prop_str(LayerId(0), NODE_ID_IDX) + .map(GidRef::Str) + .or_else(|| { + self.c_prop(LayerId(0), NODE_ID_IDX) + .and_then(|prop| prop.into_u64().map(GidRef::U64)) + }) + .unwrap_or_else(|| panic!("GID should be present, for node {:?}", self.vid())) + } + + fn node_type_id(&self) -> usize { + self.c_prop(LayerId(0), NODE_TYPE_IDX) + .and_then(|prop| prop.into_u64()) + .map_or(0, |id| id as usize) + } + + fn internal_num_layers(&self) -> usize; + + fn has_layer_inner(self, layer_id: LayerId) -> bool; +} diff --git a/db4-storage/src/dir/mod.rs b/db4-storage/src/dir/mod.rs new file mode 100644 index 0000000000..1d7162c92f --- /dev/null +++ b/db4-storage/src/dir/mod.rs @@ -0,0 +1,47 @@ +use std::{ + io, + path::{Path, PathBuf}, +}; +use tempfile::TempDir; + +#[derive(Debug)] +pub enum GraphDir { + Temp(TempDir), + Path(PathBuf), +} + +impl GraphDir { + pub fn path(&self) -> &Path { + match self { + GraphDir::Temp(dir) => dir.path(), + GraphDir::Path(path) => path, + } + } + pub fn gid_resolver_dir(&self) -> PathBuf { + self.path().join("gid_resolver") + } + + pub fn wal_dir(&self) -> PathBuf { + self.path().join("wal") + } + + pub fn create_dir(&self) -> Result<(), io::Error> { + if let GraphDir::Path(path) = self { + std::fs::create_dir_all(path)?; + } + + Ok(()) + } +} + +impl AsRef for GraphDir { + fn as_ref(&self) -> &Path { + self.path() + } +} + +impl<'a> From<&'a Path> for GraphDir { + fn from(path: &'a Path) -> Self { + GraphDir::Path(path.to_path_buf()) + } +} diff --git a/db4-storage/src/gen_ts.rs b/db4-storage/src/gen_ts.rs new file mode 100644 index 0000000000..61b04bb6a2 --- /dev/null +++ b/db4-storage/src/gen_ts.rs @@ -0,0 +1,393 @@ +use std::ops::Range; + +use itertools::Itertools; +use raphtory_api::core::entities::LayerId; +use raphtory_core::{ + entities::{ELID, LayerIds, layers::Multiple}, + storage::timeindex::{EventTime, TimeIndexOps}, +}; + +use crate::{NodeEntryRef, segments::additions::MemAdditions, utils::Iter3}; + +#[derive(Clone, Debug)] +pub enum LayerIter<'a> { + One(LayerId), + LRef(&'a LayerIds), + Multiple(Multiple), +} + +pub static ALL_LAYERS: LayerIter<'static> = LayerIter::LRef(&LayerIds::All); +pub static NONE_LAYERS: LayerIter<'static> = LayerIter::LRef(&LayerIds::None); + +impl<'a> LayerIter<'a> { + pub fn into_iter(self, num_layers: usize) -> impl Iterator + Send + Sync + 'a { + match self { + LayerIter::One(id) => Iter3::I(std::iter::once(id)), + LayerIter::LRef(layers) => Iter3::J(layers.iter(num_layers)), + LayerIter::Multiple(ids) => Iter3::K(ids.into_iter()), + } + } +} + +impl From for LayerIter<'_> { + fn from(id: usize) -> Self { + LayerIter::One(LayerId(id)) + } +} + +impl From for LayerIter<'_> { + fn from(id: LayerId) -> Self { + LayerIter::One(id) + } +} + +impl<'a> From<&'a LayerIds> for LayerIter<'a> { + fn from(layers: &'a LayerIds) -> Self { + LayerIter::LRef(layers) + } +} + +#[derive(Clone, Debug)] +pub struct GenericTimeOps<'a, Ref> { + range: Option<(EventTime, EventTime)>, + layer_id: LayerIter<'a>, + item_ref: Ref, +} + +impl<'a, Ref> GenericTimeOps<'a, Ref> { + pub fn new_with_layer(node: Ref, layer_id: impl Into>) -> Self { + Self { + range: None, + layer_id: layer_id.into(), + item_ref: node, + } + } +} + +pub trait WithTimeCells<'a>: Copy + Clone + Send + Sync + std::fmt::Debug +where + Self: 'a, +{ + type TimeCell: TimeIndexOps<'a, IndexType = EventTime>; + + fn t_props_tc( + self, + layer_id: LayerId, + range: Option<(EventTime, EventTime)>, + ) -> impl Iterator + Send + Sync + 'a; + + fn additions_tc( + self, + layer_id: LayerId, + range: Option<(EventTime, EventTime)>, + ) -> impl Iterator + Send + Sync + 'a; + + fn deletions_tc( + self, + layer_id: LayerId, + range: Option<(EventTime, EventTime)>, + ) -> impl Iterator + Send + Sync + 'a; + + fn num_layers(&self) -> usize; +} + +pub trait WithEdgeEvents<'a>: WithTimeCells<'a> { + type TimeCell: EdgeEventOps<'a>; +} + +impl<'a> WithEdgeEvents<'a> for NodeEntryRef<'a> { + type TimeCell = MemAdditions<'a>; +} + +pub trait EdgeEventOps<'a>: TimeIndexOps<'a, IndexType = EventTime> { + fn edge_events(self) -> impl Iterator + Send + Sync + 'a; + fn edge_events_rev(self) -> impl Iterator + Send + Sync + 'a; +} + +#[derive(Clone, Copy, Debug)] +pub struct AdditionCellsRef<'a, Ref: WithTimeCells<'a> + 'a> { + node: Ref, + _mark: std::marker::PhantomData<&'a ()>, +} + +impl<'a, Ref: WithTimeCells<'a> + 'a> AdditionCellsRef<'a, Ref> { + pub fn new(node: Ref) -> Self { + Self { + node, + _mark: std::marker::PhantomData, + } + } +} + +impl<'a, Ref: WithTimeCells<'a> + 'a> WithTimeCells<'a> for AdditionCellsRef<'a, Ref> { + type TimeCell = Ref::TimeCell; + + fn t_props_tc( + self, + layer_id: LayerId, + range: Option<(EventTime, EventTime)>, + ) -> impl Iterator + 'a { + self.node.t_props_tc(layer_id, range) // Assuming t_props_tc is not used for additions + } + + fn additions_tc( + self, + _layer_id: LayerId, + _range: Option<(EventTime, EventTime)>, + ) -> impl Iterator + 'a { + std::iter::empty() + } + + fn deletions_tc( + self, + _layer_id: LayerId, + _range: Option<(EventTime, EventTime)>, + ) -> impl Iterator + 'a { + std::iter::empty() + } + + fn num_layers(&self) -> usize { + self.node.num_layers() + } +} + +#[derive(Clone, Copy, Debug)] +pub struct DeletionCellsRef<'a, Ref: WithTimeCells<'a> + 'a> { + node: Ref, + _mark: std::marker::PhantomData<&'a ()>, +} + +impl<'a, Ref: WithTimeCells<'a> + 'a> DeletionCellsRef<'a, Ref> { + pub fn new(node: Ref) -> Self { + Self { + node, + _mark: std::marker::PhantomData, + } + } +} + +impl<'a, Ref: WithTimeCells<'a> + 'a> WithTimeCells<'a> for DeletionCellsRef<'a, Ref> { + type TimeCell = Ref::TimeCell; + + fn t_props_tc( + self, + _layer_id: LayerId, + _range: Option<(EventTime, EventTime)>, + ) -> impl Iterator + 'a { + std::iter::empty() + } + + fn additions_tc( + self, + _layer_id: LayerId, + _range: Option<(EventTime, EventTime)>, + ) -> impl Iterator + 'a { + std::iter::empty() + } + + fn deletions_tc( + self, + layer_id: LayerId, + range: Option<(EventTime, EventTime)>, + ) -> impl Iterator + 'a { + self.node.deletions_tc(layer_id, range) + } + + fn num_layers(&self) -> usize { + self.node.num_layers() + } +} + +#[derive(Clone, Copy, Debug)] +pub struct EdgeAdditionCellsRef<'a, Ref: WithTimeCells<'a> + 'a> { + node: Ref, + _mark: std::marker::PhantomData<&'a ()>, +} + +impl<'a, Ref: WithTimeCells<'a> + 'a> EdgeAdditionCellsRef<'a, Ref> { + pub fn new(node: Ref) -> Self { + Self { + node, + _mark: std::marker::PhantomData, + } + } +} + +impl<'a, Ref: WithTimeCells<'a> + 'a> WithTimeCells<'a> for EdgeAdditionCellsRef<'a, Ref> { + type TimeCell = Ref::TimeCell; + + fn t_props_tc( + self, + _layer_id: LayerId, + _range: Option<(EventTime, EventTime)>, + ) -> impl Iterator + 'a { + std::iter::empty() + } + + fn additions_tc( + self, + layer_id: LayerId, + range: Option<(EventTime, EventTime)>, + ) -> impl Iterator + 'a { + self.node.additions_tc(layer_id, range) + } + + fn deletions_tc( + self, + _layer_id: LayerId, + _range: Option<(EventTime, EventTime)>, + ) -> impl Iterator + 'a { + std::iter::empty() + } + + fn num_layers(&self) -> usize { + self.node.num_layers() + } +} + +#[derive(Clone, Copy, Debug)] +pub struct PropAdditionCellsRef<'a, Ref: WithTimeCells<'a> + 'a> { + node: Ref, + _mark: std::marker::PhantomData<&'a ()>, +} + +impl<'a, Ref: WithTimeCells<'a> + 'a> PropAdditionCellsRef<'a, Ref> { + pub fn new(node: Ref) -> Self { + Self { + node, + _mark: std::marker::PhantomData, + } + } +} + +impl<'a, Ref: WithTimeCells<'a> + 'a> WithTimeCells<'a> for PropAdditionCellsRef<'a, Ref> { + type TimeCell = Ref::TimeCell; + + fn t_props_tc( + self, + layer_id: LayerId, + range: Option<(EventTime, EventTime)>, + ) -> impl Iterator + 'a { + self.node.t_props_tc(layer_id, range) + } + + fn additions_tc( + self, + _layer_id: LayerId, + _range: Option<(EventTime, EventTime)>, + ) -> impl Iterator + 'a { + std::iter::empty() + } + + fn deletions_tc( + self, + _layer_id: LayerId, + _range: Option<(EventTime, EventTime)>, + ) -> impl Iterator + 'a { + std::iter::empty() + } + + fn num_layers(&self) -> usize { + self.node.num_layers() + } +} + +impl<'a, Ref: WithEdgeEvents<'a> + 'a> GenericTimeOps<'a, EdgeAdditionCellsRef<'a, Ref>> +where + >::TimeCell: EdgeEventOps<'a>, +{ + pub fn edge_events(self) -> impl Iterator + Send + Sync + 'a { + self.layer_id + .into_iter(self.item_ref.num_layers()) + .flat_map(move |layer_id| { + self.item_ref + .additions_tc(layer_id, self.range) + .map(|t_cell| t_cell.edge_events()) + }) + .kmerge_by(|a, b| a < b) + } + + pub fn edge_events_rev(self) -> impl Iterator + Send + Sync + 'a { + self.layer_id + .into_iter(self.item_ref.num_layers()) + .flat_map(|layer_id| { + self.item_ref + .additions_tc(layer_id, self.range) + .map(|t_cell| t_cell.edge_events_rev()) + }) + .kmerge_by(|a, b| a > b) + } +} + +impl<'a, Ref: WithTimeCells<'a> + 'a> GenericTimeOps<'a, Ref> { + pub fn time_cells(self) -> impl Iterator + Send + Sync + 'a { + let range = self.range; + self.layer_id + .into_iter(self.item_ref.num_layers()) + .flat_map(move |layer_id| { + self.item_ref.t_props_tc(layer_id, range).chain( + self.item_ref + .additions_tc(layer_id, range) + .chain(self.item_ref.deletions_tc(layer_id, range)), + ) + }) + } + + fn into_iter(self) -> impl Iterator + Send + Sync + 'a { + let iters = self.time_cells(); + iters.map(|cell| cell.iter()).kmerge() + } + + fn into_iter_rev(self) -> impl Iterator + Send + Sync + 'a { + let iters = self.time_cells(); + iters.map(|cell| cell.iter_rev()).kmerge_by(|a, b| a > b) + } +} + +impl<'a, Ref: WithTimeCells<'a> + 'a> TimeIndexOps<'a> for GenericTimeOps<'a, Ref> { + type IndexType = EventTime; + + type RangeType = Self; + + fn active(&self, w: Range) -> bool { + self.clone() + .time_cells() + .any(|t_cell| t_cell.active(w.clone())) + } + + fn range(&self, w: Range) -> Self::RangeType { + GenericTimeOps { + range: Some((w.start, w.end)), + item_ref: self.item_ref, + layer_id: self.layer_id.clone(), + } + } + + fn first(&self) -> Option { + Iterator::min( + self.clone() + .time_cells() + .filter_map(|t_cell| t_cell.first()), + ) + } + + fn last(&self) -> Option { + Iterator::max(self.clone().time_cells().filter_map(|t_cell| t_cell.last())) + } + + fn iter(self) -> impl Iterator + Send + Sync + 'a { + self.into_iter() + } + + fn iter_rev(self) -> impl Iterator + Send + Sync + 'a { + self.into_iter_rev() + } + + fn len(&self) -> usize { + self.clone().time_cells().map(|t_cell| t_cell.len()).sum() + } + + fn is_empty(&self) -> bool { + self.clone().time_cells().all(|t_cell| t_cell.is_empty()) + } +} diff --git a/db4-storage/src/generic_t_props.rs b/db4-storage/src/generic_t_props.rs new file mode 100644 index 0000000000..e2d96955e9 --- /dev/null +++ b/db4-storage/src/generic_t_props.rs @@ -0,0 +1,133 @@ +use std::{borrow::Borrow, ops::Range}; + +use either::Either; +use itertools::Itertools; +use raphtory_api::core::entities::{ + LayerId, + properties::{prop::Prop, tprop::TPropOps}, +}; +use raphtory_api_macros::box_on_debug_lifetime; +use raphtory_core::{entities::LayerIds, storage::timeindex::EventTime}; + +use crate::utils::Iter4; + +/// `WithTProps` defines behavior for types that store multiple temporal +/// properties either in memory or on disk. +/// +/// Used by `GenericTProps` to implement `TPropOps` for such types. +pub trait WithTProps<'a>: Clone + Copy + Send + Sync +where + Self: 'a, +{ + type TProp: TPropOps<'a>; + + fn num_layers(&self) -> usize; + + fn into_t_props( + self, + layer_id: LayerId, + prop_id: usize, + ) -> impl Iterator + Send + Sync + 'a; + + #[box_on_debug_lifetime] + fn into_t_props_layers( + self, + layers: impl Borrow, + prop_id: usize, + ) -> impl Iterator + Send + Sync + 'a { + match layers.borrow() { + LayerIds::None => Iter4::I(std::iter::empty()), + LayerIds::One(layer_id) => Iter4::J(self.into_t_props(*layer_id, prop_id)), + LayerIds::All => Iter4::K( + (0..self.num_layers()) + .map(LayerId) + .flat_map(move |layer_id| self.into_t_props(layer_id, prop_id)), + ), + LayerIds::Multiple(layers) => Iter4::L( + layers + .clone() + .into_iter() + .flat_map(move |layer_id| self.into_t_props(layer_id, prop_id)), + ), + } + } +} + +/// A generic implementation of `TPropOps` that aggregates temporal properties +/// across storage. +/// +/// Wraps types implementing `WithTProps` (eg, `MemNodeRef`, `DiskNodeRef`) +/// to provide unified access to temporal properties. Also handles k-merging +/// temporal properties when queried. +#[derive(Clone, Copy)] +pub struct GenericTProps<'a, Ref: WithTProps<'a>> { + reference: Ref, + layer_id: Either<&'a LayerIds, LayerId>, + prop_id: usize, +} + +impl<'a, Ref: WithTProps<'a>> GenericTProps<'a, Ref> { + pub fn new(reference: Ref, layer_id: &'a LayerIds, prop_id: usize) -> Self { + Self { + reference, + layer_id: Either::Left(layer_id), + prop_id, + } + } + + pub fn new_with_layer(reference: Ref, layer_id: LayerId, prop_id: usize) -> Self { + Self { + reference, + layer_id: Either::Right(layer_id), + prop_id, + } + } +} + +impl<'a, Ref: WithTProps<'a>> GenericTProps<'a, Ref> { + #[box_on_debug_lifetime] + fn tprops(self, prop_id: usize) -> impl Iterator + Send + Sync + 'a { + match self.layer_id { + Either::Left(layer_ids) => { + Either::Left(self.reference.into_t_props_layers(layer_ids, prop_id)) + } + Either::Right(layer_id) => { + Either::Right(self.reference.into_t_props(layer_id, prop_id)) + } + } + } +} + +impl<'a, Ref: WithTProps<'a>> TPropOps<'a> for GenericTProps<'a, Ref> { + fn last_before(&self, t: EventTime) -> Option<(EventTime, Prop)> { + self.tprops(self.prop_id) + .filter_map(|t_props| t_props.last_before(t)) + .max_by_key(|(t, _)| *t) + } + + fn iter_inner( + self, + w: Option>, + ) -> impl Iterator + Send + Sync + 'a { + let tprops = self.tprops(self.prop_id); + tprops + .map(|t_prop| t_prop.iter_inner(w.clone())) + .kmerge_by(|(a, _), (b, _)| a < b) + } + + fn iter_inner_rev( + self, + w: Option>, + ) -> impl Iterator + Send + Sync + 'a { + let tprops = self + .tprops(self.prop_id) + .map(move |t_cell| t_cell.iter_inner_rev(w.clone())); + tprops.kmerge_by(|(a, _), (b, _)| a > b) + } + + fn at(&self, ti: &EventTime) -> Option { + self.tprops(self.prop_id) + .flat_map(|t_props| t_props.at(ti)) + .next() // TODO: need to figure out how to handle this + } +} diff --git a/db4-storage/src/lib.rs b/db4-storage/src/lib.rs new file mode 100644 index 0000000000..623737957a --- /dev/null +++ b/db4-storage/src/lib.rs @@ -0,0 +1,215 @@ +use crate::{ + gen_ts::{ + AdditionCellsRef, DeletionCellsRef, EdgeAdditionCellsRef, GenericTimeOps, + PropAdditionCellsRef, + }, + generic_t_props::GenericTProps, + pages::{ + GraphStore, ReadLockedGraphStore, edge_store::ReadLockedEdgeStorage, + node_store::ReadLockedNodeStorage, + }, + persist::strategy::{NoOpStrategy, PersistenceStrategy}, + resolver::mapping_resolver::MappingResolver, + segments::{ + edge::{ + entry::{MemEdgeEntry, MemEdgeRef}, + segment::EdgeSegmentView, + }, + graph_prop::entry::{MemGraphPropEntry, MemGraphPropRef}, + node::{ + entry::{MemNodeEntry, MemNodeRef}, + segment::NodeSegmentView, + }, + }, +}; +use parking_lot::RwLock; +use raphtory_api::core::entities::{EID, VID}; +use segments::{ + edge::segment::MemEdgeSegment, graph_prop::GraphPropSegmentView, node::segment::MemNodeSegment, +}; +use std::{ + path::{Path, PathBuf}, + thread, + time::Duration, +}; + +pub mod api; +pub mod dir; +pub mod gen_ts; +pub mod generic_t_props; +pub mod pages; +pub mod persist; +pub mod properties; +pub mod resolver; +pub mod segments; +pub mod state; +pub mod transaction; +pub mod utils; +pub mod wal; + +pub type Extension = NoOpStrategy; +pub type NS

= NodeSegmentView

; +pub type ES

= EdgeSegmentView

; +pub type GS

= GraphPropSegmentView

; +pub type Layer

= GraphStore, ES

, GS

, P>; + +pub type Wal = ::Wal; +pub type ControlFile = ::ControlFile; +pub type Config = ::Config; +pub type GIDResolver = MappingResolver; + +pub type ReadLockedLayer

= ReadLockedGraphStore, ES

, GS

, P>; +pub type ReadLockedNodes

= ReadLockedNodeStorage, P>; +pub type ReadLockedEdges

= ReadLockedEdgeStorage, P>; + +pub type NodeEntry<'a> = MemNodeEntry<'a, parking_lot::RwLockReadGuard<'a, MemNodeSegment>>; +pub type EdgeEntry<'a> = MemEdgeEntry<'a, parking_lot::RwLockReadGuard<'a, MemEdgeSegment>>; +pub type GraphPropEntry<'a> = MemGraphPropEntry<'a>; +pub type NodeEntryRef<'a> = MemNodeRef<'a>; +pub type EdgeEntryRef<'a> = MemEdgeRef<'a>; +pub type GraphPropEntryRef<'a> = MemGraphPropRef<'a>; + +pub type NodePropAdditions<'a> = GenericTimeOps<'a, PropAdditionCellsRef<'a, MemNodeRef<'a>>>; +pub type NodeEdgeAdditions<'a> = GenericTimeOps<'a, EdgeAdditionCellsRef<'a, MemNodeRef<'a>>>; + +pub type EdgeAdditions<'a> = GenericTimeOps<'a, AdditionCellsRef<'a, MemEdgeRef<'a>>>; +pub type EdgeDeletions<'a> = GenericTimeOps<'a, DeletionCellsRef<'a, MemEdgeRef<'a>>>; + +pub type NodeTProps<'a> = GenericTProps<'a, MemNodeRef<'a>>; +pub type EdgeTProps<'a> = GenericTProps<'a, MemEdgeRef<'a>>; +pub type GraphTProps<'a> = GenericTProps<'a, MemGraphPropRef<'a>>; + +pub mod error { + use std::{io, panic::Location, path::PathBuf, sync::Arc}; + + use crate::resolver::mapping_resolver::InvalidNodeId; + use raphtory_api::core::{entities::properties::prop::PropError, utils::time::ParseTimeError}; + use raphtory_core::entities::properties::props::MetadataError; + + #[derive(thiserror::Error, Debug)] + pub enum StorageError { + #[error("External Storage Error {0}")] + External(#[from] Arc), + #[error("{source} at {location}")] + IO { + source: io::Error, + location: &'static Location<'static>, + }, + #[error("Serde error: {0}")] + Serde(#[from] serde_json::Error), + #[error("Arrow-rs error: {0}")] + ArrowRS(#[from] arrow_schema::ArrowError), + #[error("Parquet error: {0}")] + Parquet(#[from] parquet::errors::ParquetError), + #[error(transparent)] + PropError(#[from] PropError), + #[error(transparent)] + MetadataError(#[from] MetadataError), + #[error("Empty Graph: {0}")] + EmptyGraphDir(PathBuf), + #[error("Failed to parse time string")] + ParseTime { + #[from] + source: ParseTimeError, + }, + // #[error("Failed to mutate: {0}")] + // MutationError(#[from] MutationError), + #[error("Unnamed Failure: {0}")] + GenericFailure(String), + #[error(transparent)] + InvalidNodeId(#[from] InvalidNodeId), + + #[error("Failed to vacuum storage")] + VacuumError, + + #[error("Disk storage not supported")] + DiskStorageNotSupported, + } + + impl StorageError { + pub fn from_external(error: E) -> Self { + Self::External(Arc::new(error)) + } + } + + impl From for StorageError { + #[track_caller] + fn from(source: io::Error) -> Self { + let location = Location::caller(); + StorageError::IO { source, location } + } + } +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, serde::Serialize)] +#[repr(transparent)] +pub struct LocalPOS(pub u32); + +impl From for LocalPOS { + fn from(value: usize) -> Self { + assert!(value <= u32::MAX as usize); + LocalPOS(value as u32) + } +} + +impl LocalPOS { + pub fn as_vid(self, page_id: usize, max_page_len: u32) -> VID { + VID(page_id * (max_page_len as usize) + (self.0 as usize)) + } + + pub fn as_eid(self, page_id: usize, max_page_len: u32) -> EID { + EID(page_id * (max_page_len as usize) + (self.0 as usize)) + } + + pub fn as_index(self) -> usize { + self.0 as usize + } +} + +pub fn calculate_size_recursive(path: &Path) -> Result { + let mut size = 0; + if path.is_dir() { + for entry in std::fs::read_dir(path)? { + let entry = entry?; + let path = entry.path(); + if path.is_dir() { + size += calculate_size_recursive(&path)?; + } else { + size += path.metadata()?.len() as usize; + } + } + } else { + size += path.metadata()?.len() as usize; + } + Ok(size) +} + +pub fn collect_tree_paths(path: &Path) -> Vec { + let mut paths = Vec::new(); + if path.is_dir() { + for entry in std::fs::read_dir(path).unwrap() { + let entry = entry.unwrap(); + let entry_path = entry.path(); + if entry_path.is_dir() { + paths.extend(collect_tree_paths(&entry_path)); + } else { + paths.push(entry_path); + } + } + } else { + paths.push(path.to_path_buf()); + } + paths +} + +pub fn loop_lock_write(l: &RwLock) -> parking_lot::RwLockWriteGuard<'_, A> { + const MAX_BACKOFF_US: u64 = 1000; // 1ms max + let mut backoff_us = 1; + loop { + if let Some(guard) = l.try_write_for(Duration::from_micros(50)) { + return guard; + } + thread::park_timeout(Duration::from_micros(backoff_us)); + backoff_us = (backoff_us * 2).min(MAX_BACKOFF_US); + } +} diff --git a/db4-storage/src/loaders/mod.rs b/db4-storage/src/loaders/mod.rs new file mode 100644 index 0000000000..7a2c1f2f00 --- /dev/null +++ b/db4-storage/src/loaders/mod.rs @@ -0,0 +1,516 @@ +use crate::{EdgeSegmentOps, NodeSegmentOps, error::StorageError, pages::GraphStore}; +use arrow::buffer::ScalarBuffer; +use arrow_array::{ + Array, PrimitiveArray, RecordBatch, TimestampMicrosecondArray, TimestampMillisecondArray, + TimestampNanosecondArray, types::Int64Type, +}; +use arrow_csv::reader::Format; +use arrow_schema::{ArrowError, DataType, Schema, TimeUnit}; +use bytemuck::checked::cast_slice_mut; +use either::Either; +use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder; +use raphtory_api::core::{ + entities::properties::prop::PropType, + storage::{dict_mapper::MaybeNew, timeindex::TimeIndexEntry}, +}; +use raphtory_core::entities::{EID, VID, graph::logical_to_physical::Mapping}; +use rayon::prelude::*; +use std::{ + fs::File, + path::{Path, PathBuf}, + sync::{ + Arc, + atomic::{self, AtomicBool, AtomicUsize}, + }, +}; + +pub struct Loader<'a> { + path: PathBuf, + src_col: Either<&'a str, usize>, + dst_col: Either<&'a str, usize>, + time_col: Either<&'a str, usize>, + format: FileFormat, +} + +pub enum FileFormat { + CSV { + delimiter: u8, + has_header: bool, + sample_records: usize, + }, + Parquet, +} + +pub struct Rows { + rb: RecordBatch, + src: usize, + dst: usize, + t_properties: Vec, + t_indices: Vec, + time_col: ScalarBuffer, +} + +impl Rows { + pub fn srcs(&self) -> Result { + let arr = self.rb.column(self.src); + let arr = arr.as_ref(); + let srcs = NodeCol::try_from(arr)?; + Ok(srcs) + } + + pub fn dsts(&self) -> Result { + let arr = self.rb.column(self.dst); + let arr = arr.as_ref(); + let dsts = NodeCol::try_from(arr)?; + Ok(dsts) + } + + pub fn time(&self) -> &[i64] { + &self.time_col + } + + pub fn properties( + &self, + prop_id_resolver: impl Fn(&str, PropType) -> Result, StorageError>, + ) -> Result { + combine_properties_arrow( + &self.t_properties, + &self.t_indices, + self.rb.columns(), + prop_id_resolver, + ) + } + + fn new(rb: RecordBatch, src: usize, dst: usize, time: usize) -> Result { + let (t_indices, t_properties): (Vec<_>, Vec<_>) = rb + .schema() + .fields() + .iter() + .enumerate() + .filter_map(|(id, f)| { + if id == src || id == dst || id == time { + None + } else { + Some((id, f.name().to_owned())) + } + }) + .unzip(); + + let time_arr = rb.column(time); + let values = if let Some(arr) = time_arr + .as_any() + .downcast_ref::>() + { + arr.values().clone() + } else if let Some(arr) = time_arr.as_any().downcast_ref::() { + let arr_to_millis = + arrow::compute::cast(&arr, &DataType::Timestamp(TimeUnit::Millisecond, None))?; + let arr = arr_to_millis + .as_any() + .downcast_ref::() + .unwrap(); + arr.values().clone() + } else if let Some(arr) = time_arr + .as_any() + .downcast_ref::() + { + let arr_to_millis = + arrow::compute::cast(&arr, &DataType::Timestamp(TimeUnit::Millisecond, None))?; + let arr = arr_to_millis + .as_any() + .downcast_ref::() + .unwrap(); + arr.values().clone() + } else if let Some(arr) = time_arr + .as_any() + .downcast_ref::() + { + arr.values().clone() + } else { + return Err(StorageError::ArrowRS(ArrowError::CastError(format!( + "failed to cast time column {} to i64", + time_arr.data_type() + )))); + }; + + Ok(Self { + rb, + src, + dst, + t_indices, + t_properties, + time_col: values, + }) + } + + fn num_rows(&self) -> usize { + self.rb.num_rows() + } +} + +impl<'a> Loader<'a> { + pub fn new( + path: &Path, + src_col: Either<&'a str, usize>, + dst_col: Either<&'a str, usize>, + time_col: Either<&'a str, usize>, + format: FileFormat, + ) -> Result { + Ok(Self { + path: path.to_owned(), + src_col, + dst_col, + time_col, + format, + }) + } + + pub fn iter_file( + &self, + path: &Path, + rows_per_batch: usize, + ) -> Result> + Send>, StorageError> { + match &self.format { + FileFormat::CSV { + delimiter, + has_header, + sample_records, + } => { + let file = File::open(path).unwrap(); + let (schema, _) = Format::default() + .with_header(*has_header) + .with_delimiter(*delimiter) + .infer_schema(file, Some(*sample_records))?; + let schema = Arc::new(schema); + + let (src, dst, time) = self.src_dst_time_cols(&schema)?; + + let file = File::open(path)?; + + let reader = arrow_csv::reader::ReaderBuilder::new(schema.clone()) + .with_header(*has_header) + .with_delimiter(*delimiter) + .with_batch_size(rows_per_batch) + .build(file)?; + Ok(Box::new(reader.map(move |rb| { + rb.map_err(StorageError::from) + .and_then(|rb| Rows::new(rb, src, dst, time)) + }))) + } + FileFormat::Parquet => { + let file = File::open(path)?; + let builder = + ParquetRecordBatchReaderBuilder::try_new(file)?.with_batch_size(rows_per_batch); + + let (src, dst, time) = self.src_dst_time_cols(&builder.schema())?; + let reader = builder.build()?; + Ok(Box::new(reader.map(move |rb| { + rb.map_err(StorageError::from) + .and_then(|rb| Rows::new(rb, src, dst, time)) + }))) + } + } + } + + pub fn iter( + &self, + rows_per_batch: usize, + ) -> Result> + Send>, StorageError> { + if self.path.is_dir() { + let mut files = vec![]; + for entry in std::fs::read_dir(&self.path)? { + let entry = entry?; + if entry.file_type()?.is_file() { + files.push(entry.path()); + } + } + let iterators: Vec<_> = files + .into_iter() + .map(|path| self.iter_file(&path, rows_per_batch)) + .collect::, _>>()?; + Ok(Box::new(iterators.into_iter().flatten())) + } else { + Ok(self.iter_file(&self.path, rows_per_batch)?) + } + } + + fn src_dst_time_cols(&self, schema: &Schema) -> Result<(usize, usize, usize), StorageError> { + let src_field = match self.src_col { + Either::Left(name) => schema.index_of(name)?, + Either::Right(idx) => idx, + }; + let dst_field = match self.dst_col { + Either::Left(name) => schema.index_of(name)?, + Either::Right(idx) => idx, + }; + + let time_field = match self.time_col { + Either::Left(name) => schema.index_of(name)?, + Either::Right(idx) => idx, + }; + + Ok((src_field, dst_field, time_field)) + } + + pub fn load_into< + NS: NodeSegmentOps, + ES: EdgeSegmentOps, + EXT: Clone + Default + Send + Sync, + >( + &self, + graph: &GraphStore, + rows_per_batch: usize, + ) -> Result { + let mut src_col_resolved: Vec = vec![]; + let mut dst_col_resolved: Vec = vec![]; + let mut eid_col_resolved: Vec = vec![]; + let mut eids_exist: Vec = vec![]; // exists or needs to be created + + let max_edge_id = AtomicUsize::new(graph.edges().num_edges().saturating_sub(1)); + + let resolver = Mapping::new(); + + let next_id = AtomicUsize::new(0); + let mut offset = 0; + + let now = std::time::Instant::now(); + for chunk in self.iter(rows_per_batch)? { + let now_chunk = std::time::Instant::now(); + let rb = chunk?; + + let props = rb.properties(|name, p_type| { + graph + .edge_meta() + .resolve_prop_id(name, p_type, false) + .map_err(StorageError::from) + })?; + + let srcs = rb.srcs()?; + let dsts = rb.dsts()?; + + src_col_resolved.resize_with(rb.num_rows(), Default::default); + srcs.par_iter() + .zip(src_col_resolved.par_iter_mut()) + .try_for_each(|(gid, resolved)| { + let gid = gid.ok_or_else(|| LoadError::MissingSrcError)?; + let id = resolver + .get_or_init(gid, || VID(next_id.fetch_add(1, atomic::Ordering::Relaxed))) + .unwrap() + .inner(); + *resolved = id; + Ok::<(), StorageError>(()) + })?; + + dst_col_resolved.resize_with(rb.num_rows(), Default::default); + dsts.par_iter() + .zip(dst_col_resolved.par_iter_mut()) + .try_for_each(|(gid, resolved)| { + let gid = gid.ok_or_else(|| LoadError::MissingDstError)?; + let id = resolver + .get_or_init(gid, || VID(next_id.fetch_add(1, atomic::Ordering::Relaxed))) + .unwrap() + .inner(); + *resolved = id; + Ok::<(), StorageError>(()) + })?; + + eid_col_resolved.resize_with(rb.num_rows(), Default::default); + eids_exist.resize_with(rb.num_rows(), Default::default); + let eid_col_shared = atomic_usize_from_mut_slice(cast_slice_mut(&mut eid_col_resolved)); + + let num_pages = + next_id.load(atomic::Ordering::Relaxed) / graph.nodes().max_page_len() + 1; + graph.nodes().grow(num_pages); + + let mut node_writers = graph.nodes().locked(); + + node_writers.par_iter_mut().try_for_each(|locked_page| { + for (row, (&src, &dst)) in src_col_resolved + .iter() + .zip(dst_col_resolved.iter()) + .enumerate() + { + if let Some(src_pos) = locked_page.resolve_pos(src) { + let mut writer = locked_page.writer(); + if let Some(edge_id) = writer.get_out_edge(src_pos, dst) { + eid_col_shared[row].store(edge_id.0, atomic::Ordering::Relaxed); + eids_exist[row].store(true, atomic::Ordering::Relaxed); + } else { + let edge_id = EID(max_edge_id.fetch_add(1, atomic::Ordering::Relaxed)); + writer.add_outbound_edge(0, src_pos, dst, edge_id.with_layer(0), 0); // FIXME: when we update this to work with layers use the correct layer + eid_col_shared[row].store(edge_id.0, atomic::Ordering::Relaxed); + eids_exist[row].store(false, atomic::Ordering::Relaxed); + } + } + } + + Ok::<_, StorageError>(()) + })?; + + node_writers.par_iter_mut().try_for_each(|locked_page| { + for (&edge_id, (&src, &dst)) in eid_col_resolved + .iter() + .zip(src_col_resolved.iter().zip(&dst_col_resolved)) + { + if let Some(dst_pos) = locked_page.resolve_pos(dst) { + let mut writer = locked_page.writer(); + if !writer.get_inb_edge(dst_pos, src).is_some() { + let edge_id = EID(edge_id.0); + writer.add_inbound_edge(0, dst_pos, src, edge_id.with_layer(0), 0); // FIXME: when we update this to work with layers use the correct layer + } + } + } + + Ok::<_, StorageError>(()) + })?; + + // now edges + + let num_pages = + max_edge_id.load(atomic::Ordering::Relaxed) / graph.edges().max_page_len() + 1; + + graph.edges().grow(num_pages); + + let mut edge_writers = graph.edges().locked(); + + let time_col = rb.time(); + + edge_writers.iter_mut().for_each(|edge_writer| { + for (row_idx, ((((&src, &dst), &eid), edge_exists), time)) in src_col_resolved + .iter() + .zip(&dst_col_resolved) + .zip(&eid_col_resolved) + .zip( + eids_exist + .iter() + .map(|exists| exists.load(atomic::Ordering::Relaxed)), + ) + .zip(time_col) + .enumerate() + { + if let Some(local_pos) = edge_writer.resolve_pos(eid) { + let mut writer = edge_writer.writer(); + let time = TimeIndexEntry::new(*time, offset + row_idx); + writer.add_edge( + time, + Some(local_pos), + src, + dst, + props.iter_row(row_idx), + 0, + Some(edge_exists), + ); + } + } + }); + + src_col_resolved.clear(); + dst_col_resolved.clear(); + eid_col_resolved.clear(); + eids_exist.clear(); + offset += rb.num_rows(); + } + + Ok(resolver) + } +} + +#[cfg(test)] +mod test { + use crate::{Layer, pages::test_utils::check_load_support}; + use proptest::{collection::vec, prelude::*}; + + fn check_load(edges: &[(i64, u64, u64)], max_page_len: usize) { + check_load_support(edges, false, |path| { + Layer::<()>::new(path, max_page_len, max_page_len) + }); + } + + #[test] + fn test_one_edge() { + check_load(&[(0, 0, 1)], 32); + } + + #[test] + fn test_load_graph_from_csv() { + let edge_strat = (1u64..100).prop_flat_map(|num_nodes| { + (1usize..100).prop_flat_map(move |num_edges| { + vec(((0i64..100), (0..num_nodes), (0..num_nodes)), num_edges) + }) + }); + + proptest!(|(edges in edge_strat, max_page_len in 1usize .. 100)| { + check_load(&edges, max_page_len); + }); + } + + #[test] + fn teas_load_graph_from_csv_5() { + let edges = [ + (42, 16, 24), + (96, 41, 8), + (37, 9, 9), + (62, 37, 57), + (12, 49, 23), + (8, 60, 44), + (56, 35, 0), + (9, 48, 58), + (59, 20, 37), + (36, 17, 46), + ]; + let max_page_len = 7; + check_load(&edges, max_page_len); + } + + #[test] + fn test_load_graph_from_csv_4() { + let edges = [ + (27, 20, 85), + (2, 29, 77), + (55, 59, 22), + (72, 47, 73), + (26, 66, 36), + (22, 39, 37), + (5, 49, 88), + (2, 48, 13), + (97, 23, 57), + ]; + let max_page_len = 8; + check_load(&edges, max_page_len); + } + + #[test] + fn test_load_graph_from_csv_1() { + let edges = [(0, 33, 31), (1, 12, 20), (2, 22, 32)]; + + check_load(&edges, 32); + } + + #[test] + fn test_load_graph_from_csv_2() { + let edges = [ + (0, 23, 61), + (1, 52, 14), + (2, 62, 62), + (3, 13, 9), + (4, 29, 6), + (5, 13, 7), + ]; + + check_load(&edges, 5); + } + + #[test] + fn test_load_graph_from_csv_3() { + let edges = [(0, 0, 32)]; + + check_load(&edges, 51); + } + + #[test] + fn test_edges_1() { + let edges = [(0, 1, 0), (0, 0, 0), (0, 0, 0)]; + + check_load(&edges, 32); + } +} diff --git a/db4-storage/src/pages/edge_page/mod.rs b/db4-storage/src/pages/edge_page/mod.rs new file mode 100644 index 0000000000..d3baa81782 --- /dev/null +++ b/db4-storage/src/pages/edge_page/mod.rs @@ -0,0 +1 @@ +pub mod writer; diff --git a/db4-storage/src/pages/edge_page/writer.rs b/db4-storage/src/pages/edge_page/writer.rs new file mode 100644 index 0000000000..87110e321e --- /dev/null +++ b/db4-storage/src/pages/edge_page/writer.rs @@ -0,0 +1,240 @@ +use crate::{ + LocalPOS, + api::edges::EdgeSegmentOps, + error::StorageError, + pages::{layer_counter::GraphStats, resolve_pos}, + segments::edge::segment::MemEdgeSegment, + wal::LSN, +}; +use raphtory_api::core::entities::{ + EID, LayerId, VID, + properties::{ + meta::STATIC_GRAPH_LAYER_ID, + prop::{AsPropRef, Prop}, + }, +}; +use raphtory_core::storage::timeindex::{AsTime, EventTime}; +use std::ops::DerefMut; + +pub struct EdgeWriter< + 'a, + MP: DerefMut + std::fmt::Debug, + ES: EdgeSegmentOps, +> { + pub page: &'a ES, + pub writer: MP, + pub graph_stats: &'a GraphStats, + old_estimated_size: usize, +} + +impl<'a, MP: DerefMut + std::fmt::Debug, ES: EdgeSegmentOps> + EdgeWriter<'a, MP, ES> +{ + pub fn new(global_num_edges: &'a GraphStats, page: &'a ES, writer: MP) -> Self { + let old_estimated_size = writer.est_size(); + Self { + page, + writer, + graph_stats: global_num_edges, + old_estimated_size, + } + } + + fn new_local_pos(&self, layer_id: LayerId) -> LocalPOS { + let new_pos = LocalPOS(self.page.increment_num_edges()); + self.increment_layer_num_edges(layer_id); + new_pos + } + + pub fn add_edge( + &mut self, + t: T, + edge_pos: LocalPOS, + src: VID, + dst: VID, + props: impl IntoIterator, + layer_id: LayerId, + ) -> LocalPOS { + self.graph_stats.update_time(t.t()); + if self + .writer + .insert_edge_internal(t, edge_pos, src, dst, layer_id, props) + && !self.page.immut_has_edge(edge_pos, layer_id) + { + // edge is new to this writer and also the immutable part of the segment + self.increment_layer_num_edges(layer_id); + } + + edge_pos + } + + pub fn delete_edge( + &mut self, + t: T, + edge_pos: LocalPOS, + src: VID, + dst: VID, + layer_id: LayerId, + ) { + self.graph_stats.update_time(t.t()); + if self + .writer + .delete_edge_internal(t, edge_pos, src, dst, layer_id) + && !self.page.immut_has_edge(edge_pos, layer_id) + { + // edge is new to this writer and also the immutable part of the segment + self.increment_layer_num_edges(layer_id); + } + } + + /// Adds a static edge to the graph. + /// + /// If `edge_pos` is `None`, a new position is allocated. If `Some`, the provided position + /// is used. + /// Set `already_counted` to `true` when bulk loading to avoid double-counting statistics. + pub fn add_static_edge( + &mut self, + edge_pos: Option, + src: impl Into, + dst: impl Into, + already_counted: bool, + ) -> LocalPOS { + if edge_pos.is_some() && !already_counted { + self.page.increment_num_edges(); + self.increment_layer_num_edges(STATIC_GRAPH_LAYER_ID); + } + + let edge_pos = edge_pos.unwrap_or_else(|| self.new_local_pos(STATIC_GRAPH_LAYER_ID)); + self.writer + .insert_static_edge_internal(edge_pos, src, dst, STATIC_GRAPH_LAYER_ID); + + edge_pos + } + + pub fn bulk_add_edge( + &mut self, + t: EventTime, + edge_pos: LocalPOS, + src: VID, + dst: VID, + edge_exists: bool, + layer_id: LayerId, + c_props: impl IntoIterator, + t_props: impl IntoIterator, + ) { + if !edge_exists { + if self + .writer + .insert_static_edge_internal(edge_pos, src, dst, STATIC_GRAPH_LAYER_ID) + { + self.increment_layer_num_edges(STATIC_GRAPH_LAYER_ID); + } + } + + if self + .writer + .insert_edge_internal(t, edge_pos, src, dst, layer_id, t_props) + && !self.page.immut_has_edge(edge_pos, layer_id) + { + self.increment_layer_num_edges(layer_id); + } + + self.graph_stats.update_time(t.t()); + + self.writer + .update_const_properties(edge_pos, src, dst, layer_id, c_props); + } + + pub fn bulk_delete_edge( + &mut self, + t: EventTime, + edge_pos: LocalPOS, + src: VID, + dst: VID, + exists: bool, + layer_id: LayerId, + ) { + if !exists { + if self + .writer + .insert_static_edge_internal(edge_pos, src, dst, STATIC_GRAPH_LAYER_ID) + { + self.increment_layer_num_edges(STATIC_GRAPH_LAYER_ID); + } + } + + self.graph_stats.update_time(t.t()); + if self + .writer + .delete_edge_internal(t, edge_pos, src, dst, layer_id) + && !self.page.immut_has_edge(edge_pos, layer_id) + { + self.increment_layer_num_edges(layer_id); + } + } + + pub fn segment_id(&self) -> usize { + self.page.segment_id() + } + + fn increment_layer_num_edges(&self, layer_id: LayerId) { + self.graph_stats.increment(layer_id); + } + + pub fn get_edge(&self, layer_id: LayerId, edge_pos: LocalPOS) -> Option<(VID, VID)> { + self.page.get_edge(edge_pos, layer_id, self.writer.deref()) + } + + pub fn set_lsn(&mut self, lsn: LSN) { + self.writer.set_lsn(lsn); + } + + pub fn check_metadata( + &self, + edge_pos: LocalPOS, + layer_id: LayerId, + props: &[(usize, P)], + ) -> Result<(), StorageError> { + self.writer.check_metadata(edge_pos, layer_id, props) + } + + pub fn update_c_props( + &mut self, + edge_pos: LocalPOS, + src: VID, + dst: VID, + layer_id: LayerId, + props: impl IntoIterator, + ) { + let existing_edge = self.page.has_edge(edge_pos, layer_id, self.writer.deref()); + + if !existing_edge { + self.increment_layer_num_edges(layer_id); + } + self.writer + .update_const_properties(edge_pos, src, dst, layer_id, props); + } + + #[inline(always)] + pub fn resolve_pos(&self, edge_id: EID) -> Option { + let (page, pos) = resolve_pos(edge_id, self.writer.max_page_len()); + + if page == self.page.segment_id() { + Some(pos) + } else { + None + } + } +} + +impl<'a, MP: DerefMut + std::fmt::Debug, ES: EdgeSegmentOps> Drop + for EdgeWriter<'a, MP, ES> +{ + fn drop(&mut self) { + let delta = self.writer.est_size() - self.old_estimated_size; + self.writer.increment_global_memory(delta); + if let Err(err) = self.page.notify_write(self.writer.deref_mut()) { + eprintln!("Failed to persist {}, err: {}", self.segment_id(), err) + } + } +} diff --git a/db4-storage/src/pages/edge_store.rs b/db4-storage/src/pages/edge_store.rs new file mode 100644 index 0000000000..9aba63b807 --- /dev/null +++ b/db4-storage/src/pages/edge_store.rs @@ -0,0 +1,657 @@ +use super::{edge_page::writer::EdgeWriter, resolve_pos}; +use crate::{ + LocalPOS, + api::edges::{EdgeRefOps, EdgeSegmentOps, LockedESegment}, + error::StorageError, + pages::{ + SegmentCounts, + layer_counter::GraphStats, + locked::edges::{LockedEdgePage, WriteLockedEdgePages}, + row_group_par_iter, + }, + persist::{config::ConfigOps, strategy::PersistenceStrategy}, + segments::edge::segment::MemEdgeSegment, +}; +use parking_lot::{RwLock, RwLockWriteGuard}; +use raphtory_api::core::entities::{ + EID, LayerId, VID, + properties::meta::{Meta, STATIC_GRAPH_LAYER_ID}, +}; +use raphtory_core::{ + entities::{ELID, LayerIds}, + storage::timeindex::{AsTime, EventTime}, +}; +use rayon::prelude::*; +use std::{ + collections::HashMap, + ops::Deref, + path::{Path, PathBuf}, + sync::{Arc, LazyLock}, +}; + +pub static N: LazyLock = LazyLock::new(|| rayon::current_num_threads()); + +#[derive(Debug)] +pub struct EdgeStorageInner { + segments: boxcar::Vec>, + layer_counter: Arc, + free_pages: Box<[RwLock]>, + edges_path: Option, + prop_meta: Arc, + ext: EXT, +} + +#[derive(Debug)] +pub struct ReadLockedEdgeStorage, EXT> { + storage: Arc>, + locked_pages: Box<[ES::ArcLockedSegment]>, +} + +impl, EXT: PersistenceStrategy> + ReadLockedEdgeStorage +{ + pub fn storage(&self) -> &EdgeStorageInner { + &self.storage + } + + pub fn edge_ref( + &self, + e_id: impl Into, + ) -> <::ArcLockedSegment as LockedESegment>::EntryRef<'_> { + let e_id = e_id.into(); + let (page_id, pos) = self.storage.resolve_pos(e_id); + let locked_page = &self.locked_pages[page_id]; + locked_page.entry_ref(pos) + } + + pub fn iter<'a, 'b: 'a>( + &'a self, + layer_ids: &'b LayerIds, + ) -> impl Iterator< + Item = <::ArcLockedSegment as LockedESegment>::EntryRef<'a>, + > + 'a { + self.locked_pages + .iter() + .flat_map(move |page| page.edge_iter(layer_ids)) + } + + pub fn par_iter<'a, 'b: 'a>( + &'a self, + layer_ids: &'b LayerIds, + ) -> impl ParallelIterator< + Item = <::ArcLockedSegment as LockedESegment>::EntryRef<'a>, + > + 'a { + self.locked_pages + .par_iter() + .flat_map(move |page| page.edge_par_iter(layer_ids)) + } + + /// Returns an iterator over the segments of the edge store, where each segment is + /// a tuple of the segment index and an iterator over the entries in that segment. + pub fn segmented_par_iter( + &self, + ) -> impl ParallelIterator)> + '_ { + self.locked_pages + .par_iter() + .enumerate() + .map(move |(segment_id, page)| { + ( + segment_id, + page.edge_iter(&LayerIds::All).map(|e| e.edge_id()), + ) + }) + } + + pub fn row_groups_par_iter( + &self, + ) -> impl IndexedParallelIterator + '_)> { + let max_actual_seg_len = self + .storage + .segments + .iter() + .map(|(_, seg)| seg.num_edges()) + .max() + .unwrap_or(0); + let max_seg_len = self.storage.max_page_len(); + row_group_par_iter( + max_seg_len as usize, + self.locked_pages.len(), + max_seg_len, + max_actual_seg_len, + ) + .map(|(row_group_id, iter)| { + ( + row_group_id, + iter.filter(|eid| self.edge_ref(*eid).edge(LayerId(0)).is_some()), + ) + }) + } + + pub fn segment_counts(&self) -> SegmentCounts { + SegmentCounts::new( + self.storage.max_page_len(), + self.locked_pages.iter().map(|seg| seg.num_edges()), + ) + } +} + +impl, EXT: PersistenceStrategy> + EdgeStorageInner +{ + pub fn locked(self: &Arc) -> ReadLockedEdgeStorage { + let locked_pages = self + .segments + .iter() + .map(|(_, segment)| segment.locked()) + .collect::>(); + ReadLockedEdgeStorage { + storage: self.clone(), + locked_pages, + } + } + + pub fn edge_meta(&self) -> &Arc { + &self.prop_meta + } + + pub fn stats(&self) -> &Arc { + &self.layer_counter + } + + pub fn segments(&self) -> &boxcar::Vec> { + &self.segments + } + + pub fn num_segments(&self) -> usize { + self.segments.count() + } + + pub fn new_with_meta(edges_path: Option, edge_meta: Arc, ext: EXT) -> Self { + let free_pages = (0..(*N)).map(RwLock::new).collect::>(); + let empty = Self { + segments: boxcar::Vec::new(), + layer_counter: GraphStats::new().into(), + free_pages: free_pages.try_into().unwrap(), + edges_path, + prop_meta: edge_meta, + ext, + }; + let layer_mapper = empty.edge_meta().layer_meta(); + let prop_mapper = empty.edge_meta().temporal_prop_mapper(); + let metadata_mapper = empty.edge_meta().metadata_mapper(); + + if layer_mapper.num_fields() > 0 + || prop_mapper.num_fields() > 0 + || metadata_mapper.num_fields() > 0 + { + let segment = empty.get_or_create_segment(STATIC_GRAPH_LAYER_ID.0); + let mut head = segment.head_mut(); + + for layer in layer_mapper.ids() { + head.get_or_create_layer(LayerId(layer)); + } + + if prop_mapper.num_fields() > 0 { + head.get_or_create_layer(LayerId(0)) + .properties_mut() + .set_has_properties() + } + + segment.set_dirty(true); + } + empty + } + + pub fn new(edges_path: Option, ext: EXT) -> Self { + Self::new_with_meta(edges_path, Meta::new_for_edges().into(), ext) + } + + pub fn pages(&self) -> &boxcar::Vec> { + &self.segments + } + + pub fn edges_path(&self) -> Option<&Path> { + self.edges_path.as_ref().map(|path| path.as_path()) + } + + pub fn earliest(&self) -> Option { + Iterator::min(self.segments.iter().filter_map(|(_, page)| page.earliest())) + // see : https://github.com/rust-lang/rust-analyzer/issues/10653 + } + + pub fn latest(&self) -> Option { + Iterator::max(self.segments.iter().filter_map(|(_, page)| page.latest())) + } + + pub fn t_len(&self) -> usize { + self.segments.iter().map(|(_, page)| page.t_len()).sum() + } + + pub fn prop_meta(&self) -> &Arc { + &self.prop_meta + } + + #[inline(always)] + pub fn resolve_pos(&self, e_id: EID) -> (usize, LocalPOS) { + resolve_pos(e_id, self.max_page_len()) + } + + pub fn load(edges_path: impl AsRef, ext: EXT) -> Result { + let edges_path = edges_path.as_ref(); + let max_page_len = ext.config().max_edge_page_len(); + + let meta = Arc::new(Meta::new_for_edges()); + + if !edges_path.exists() { + return Ok(Self::new(Some(edges_path.to_path_buf()), ext.clone())); + } + + let mut pages = std::fs::read_dir(edges_path)? + .par_bridge() + .filter(|entry| { + entry + .as_ref() + .ok() + .and_then(|entry| entry.file_type().ok().map(|ft| ft.is_dir())) + .unwrap_or_default() + }) + .filter_map(|entry| { + let entry = entry.ok()?; + let page_id = entry + .path() + .file_stem() + .and_then(|name| name.to_str().and_then(|name| name.parse::().ok()))?; + let page = ES::load(page_id, max_page_len, meta.clone(), edges_path, ext.clone()) + .map(|page| (page_id, page)); + + Some(page) + }) + .collect::, _>>()?; + + if pages.is_empty() { + return Err(StorageError::EmptyGraphDir(edges_path.to_path_buf())); + } + + let max_page = Iterator::max(pages.keys().copied()).unwrap(); + + let pages: boxcar::Vec> = (0..=max_page) + .map(|page_id| { + let np = pages.remove(&page_id).unwrap_or_else(|| { + ES::new( + page_id, + meta.clone(), + Some(edges_path.to_path_buf()), + ext.clone(), + ) + }); + Arc::new(np) + }) + .collect::>(); + + let first_page = pages.iter().next().unwrap().1; + let first_p_id = first_page.segment_id(); + + if first_p_id != 0 { + return Err(StorageError::GenericFailure(format!( + "First page id is not 0 in {edges_path:?}" + ))); + } + + let mut free_pages = pages + .iter() + .filter_map(|(_, page)| { + let len = page.num_edges(); + if len < max_page_len { + Some(RwLock::new(page.segment_id())) + } else { + None + } + }) + .collect::>(); + + let mut next_free_page = free_pages + .last() + .map(|page| *(page.read())) + .map(|last| last + 1) + .unwrap_or_else(|| pages.count()); + + free_pages.resize_with(*N, || { + let lock = RwLock::new(next_free_page); + next_free_page += 1; + lock + }); + + let mut layer_counts = vec![]; + + for (_, page) in pages.iter() { + for layer_id in 0..page.num_layers() { + let count = page.layer_count(LayerId(layer_id)) as usize; + if layer_counts.len() <= layer_id { + layer_counts.resize(layer_id + 1, 0); + } + layer_counts[layer_id] += count; + } + } + + let earliest = pages + .iter() + .filter_map(|(_, page)| page.earliest().filter(|t| t.t() != i64::MAX)) + .map(|t| t.t()) + .min() + .unwrap_or(i64::MAX); + + let latest = pages + .iter() + .filter_map(|(_, page)| page.latest().filter(|t| t.t() != i64::MIN)) + .map(|t| t.t()) + .max() + .unwrap_or(i64::MIN); + + let stats = GraphStats::load(layer_counts, earliest, latest); + + Ok(Self { + segments: pages, + edges_path: Some(edges_path.to_path_buf()), + layer_counter: stats.into(), + free_pages: free_pages.try_into().unwrap(), + prop_meta: meta, + ext, + }) + } + + pub fn grow(&self, size: usize) { + self.get_or_create_segment(size - 1); + } + + pub fn push_new_page(&self) -> usize { + let segment_id = self.segments.push_with(|segment_id| { + Arc::new(ES::new( + segment_id, + self.prop_meta.clone(), + self.edges_path.clone(), + self.ext.clone(), + )) + }); + + while self.segments.get(segment_id).is_none() { + // wait + } + + segment_id + } + + pub fn increment_edge_segment_count(&self, eid: EID) { + let (segment_id, _) = resolve_pos(eid, self.max_page_len()); + let segment = self.get_or_create_segment(segment_id); + segment.increment_num_edges(); + } + + pub fn get_or_create_segment(&self, segment_id: usize) -> &Arc { + if let Some(segment) = self.segments.get(segment_id) { + return segment; + } + + let count = self.segments.count(); + + if count > segment_id { + // Something has allocated the segment, wait for it to be added. + loop { + if let Some(segment) = self.segments.get(segment_id) { + return segment; + } else { + // wait for the segment to be created + std::thread::yield_now(); + } + } + } else { + // We need to create the segment. + self.segments.reserve(segment_id + 1 - count); + + loop { + let new_segment_id = self.segments.push_with(|segment_id| { + Arc::new(ES::new( + segment_id, + self.prop_meta.clone(), + self.edges_path.clone(), + self.ext.clone(), + )) + }); + + if new_segment_id >= segment_id { + loop { + if let Some(segment) = self.segments.get(segment_id) { + return segment; + } else { + // Wait for the segment to be created. + std::thread::yield_now(); + } + } + } + } + } + } + + #[inline(always)] + pub fn max_page_len(&self) -> u32 { + self.ext.config().max_edge_page_len() + } + + pub fn write_locked<'a>(&'a self) -> WriteLockedEdgePages<'a, ES> { + WriteLockedEdgePages::new( + self.segments + .iter() + .map(|(page_id, page)| { + LockedEdgePage::new( + page_id, + self.max_page_len(), + page.as_ref(), + &self.layer_counter, + page.head_mut(), + ) + }) + .collect(), + ) + } + + /// Retrieve the segment for an edge given its EID + pub fn get_edge_segment(&self, eid: EID) -> Option<&Arc> { + let (segment_id, _) = resolve_pos(eid, self.max_page_len()); + self.segments.get(segment_id) + } + + pub fn get_edge(&self, e_id: ELID) -> Option<(VID, VID)> { + let layer = e_id.layer(); + let e_id = e_id.edge; + let (segment_id, local_edge) = resolve_pos(e_id, self.max_page_len()); + let segment = self.segments.get(segment_id)?; + segment.get_edge(local_edge, layer, segment.head()) + } + + pub fn edge(&self, e_id: impl Into) -> ES::Entry<'_> { + let e_id = e_id.into(); + let (segment_id, local_edge) = resolve_pos(e_id, self.max_page_len()); + let segment = self.segments.get(segment_id).unwrap_or_else(|| { + panic!( + "{e_id:?} Not found in seg: {segment_id}, pos: {local_edge:?}, num_segments: {}", + self.segments.count() + ) + }); + segment.entry(local_edge) + } + + pub fn num_edges(&self) -> usize { + self.layer_counter.get(LayerId(0)) + } + + pub fn num_edges_layer(&self, layer_id: LayerId) -> usize { + self.layer_counter.get(layer_id) + } + + pub fn get_writer<'a>( + &'a self, + e_id: EID, + ) -> EdgeWriter<'a, RwLockWriteGuard<'a, MemEdgeSegment>, ES> { + let (chunk, _) = resolve_pos(e_id, self.max_page_len()); + let page = self.get_or_create_segment(chunk); + EdgeWriter::new(&self.layer_counter, page, page.head_mut()) + } + + pub fn try_get_writer<'a>( + &'a self, + e_id: EID, + ) -> Result, ES>, StorageError> { + let (segment_id, _) = resolve_pos(e_id, self.max_page_len()); + let page = self.get_or_create_segment(segment_id); + let writer = page.head_mut(); + Ok(EdgeWriter::new(&self.layer_counter, page, writer)) + } + + pub fn get_free_writer<'a>( + &'a self, + ) -> EdgeWriter<'a, RwLockWriteGuard<'a, MemEdgeSegment>, ES> { + // optimistic first try to get a free page 3 times + let num_edges = self.num_edges(); + let slot_idx = num_edges % *N; + let maybe_free_page = self.free_pages[slot_idx..] + .iter() + .cycle() + .take(3) + .filter_map(|lock| lock.try_read()) + .filter_map(|page_id| { + let page = self.segments.get(*page_id)?; + let guard = page.try_head_mut()?; + if page.num_edges() < self.max_page_len() { + Some((page, guard)) + } else { + None + } + }) + .next(); + + if let Some((edge_page, writer)) = maybe_free_page { + EdgeWriter::new(&self.layer_counter, edge_page, writer) + } else { + // not lucky, go wait on your slot + loop { + let mut slot = self.free_pages[slot_idx].write(); + match self.segments.get(*slot).map(|page| (page, page.head_mut())) { + Some((edge_page, writer)) if edge_page.num_edges() < self.max_page_len() => { + return EdgeWriter::new(&self.layer_counter, edge_page, writer); + } + _ => { + *slot = self.push_new_page(); + } + } + } + } + } + + pub fn reserve_new_eid(&self, row: usize) -> EID { + let (segment_id, local_pos) = self.reserve_free_pos(row); + local_pos.as_eid(segment_id, self.max_page_len()) + } + + pub fn reserve_free_pos(&self, row: usize) -> (usize, LocalPOS) { + let slot_idx = row % *N; + let maybe_free_page = { + let lock_slot = self.free_pages[slot_idx].read_recursive(); + let page_id = *lock_slot; + let page = self.segments.get(page_id); + page.and_then(|page| { + self.reserve_page_row(page) + .map(|pos| (page.segment_id(), LocalPOS(pos))) + }) + }; + + if let Some(reserved_pos) = maybe_free_page { + reserved_pos + } else { + // not lucky, go wait on your slot + let mut slot = self.free_pages[slot_idx].write(); + loop { + if let Some(page) = self.segments.get(*slot) + && let Some(pos) = self.reserve_page_row(page) + { + return (page.segment_id(), LocalPOS(pos)); + } + *slot = self.push_new_page(); + } + } + } + + fn reserve_page_row(&self, page: &Arc) -> Option { + // TODO: if this becomes a hotspot, we can switch to a fetch_add followed by a fetch_min + // this means when we read the counter we need to clamp it to max_page_len so the iterators don't break + page.edges_counter() + .fetch_update( + std::sync::atomic::Ordering::Relaxed, + std::sync::atomic::Ordering::Relaxed, + |current| { + if current < self.max_page_len() { + Some(current + 1) + } else { + None + } + }, + ) + .ok() + } + + pub fn par_iter_segments(&self) -> impl ParallelIterator { + (0..self.segments.count()) + .into_par_iter() + .filter_map(|idx| self.segments.get(idx).map(|seg| seg.deref())) + } + + pub fn par_iter(&self, layer: LayerId) -> impl ParallelIterator> + '_ { + self.par_iter_segments().flat_map(move |page| { + (0..page.num_edges()) + .into_par_iter() + .map(LocalPOS) + .filter_map(move |local_edge| { + page.layer_entry(local_edge, layer, Some(page.head())) + }) + }) + } + + pub fn iter(&self, layer: LayerId) -> impl Iterator> + '_ { + (0..self.segments.count()) + .filter_map(move |page_id| self.segments.get(page_id)) + .flat_map(move |page| { + (0..page.num_edges()).filter_map(move |local_edge| { + page.layer_entry(LocalPOS(local_edge), layer, Some(page.head())) + }) + }) + } + + /// Returns an iterator over the segments of the edge store, where each segment is + /// a tuple of the segment index and an iterator over the entries in that segment. + pub fn segmented_par_iter( + &self, + ) -> impl ParallelIterator)> + '_ { + let max_page_len = self.max_page_len(); + (0..self.segments.count()) + .into_par_iter() + .filter_map(move |segment_id| { + self.segments.get(segment_id).map(move |page| { + ( + segment_id, + (0..page.num_edges()).map(move |edge_pos| { + LocalPOS(edge_pos).as_eid(segment_id, max_page_len) + }), + ) + }) + }) + } + + pub fn segment_counts(&self) -> SegmentCounts { + SegmentCounts::new( + self.max_page_len(), + self.pages().iter().map(|(_, seg)| seg.num_edges()), + ) + } + + pub fn flush(&self) -> Result<(), StorageError> { + self.par_iter_segments().try_for_each(|seg| seg.flush()) + } +} diff --git a/db4-storage/src/pages/graph_prop_page/mod.rs b/db4-storage/src/pages/graph_prop_page/mod.rs new file mode 100644 index 0000000000..d3baa81782 --- /dev/null +++ b/db4-storage/src/pages/graph_prop_page/mod.rs @@ -0,0 +1 @@ +pub mod writer; diff --git a/db4-storage/src/pages/graph_prop_page/writer.rs b/db4-storage/src/pages/graph_prop_page/writer.rs new file mode 100644 index 0000000000..a6eb8981bf --- /dev/null +++ b/db4-storage/src/pages/graph_prop_page/writer.rs @@ -0,0 +1,60 @@ +use crate::{ + api::graph_props::GraphPropSegmentOps, error::StorageError, + segments::graph_prop::segment::MemGraphPropSegment, wal::LSN, +}; +use parking_lot::RwLockWriteGuard; +use raphtory_api::core::entities::properties::prop::AsPropRef; +use raphtory_core::storage::timeindex::AsTime; + +/// Provides mutable access to a graph segment. Holds an exclusive write lock +/// on the in-memory segment for the duration of its lifetime. +pub struct GraphPropWriter<'a, GS: GraphPropSegmentOps> { + pub mem_segment: RwLockWriteGuard<'a, MemGraphPropSegment>, + pub graph_props: &'a GS, +} + +impl<'a, GS: GraphPropSegmentOps> GraphPropWriter<'a, GS> { + pub fn new( + graph_props: &'a GS, + mem_segment: RwLockWriteGuard<'a, MemGraphPropSegment>, + ) -> Self { + Self { + mem_segment, + graph_props, + } + } + + pub fn add_properties( + &mut self, + t: T, + props: impl IntoIterator, + ) { + let add = self.mem_segment.add_properties(t, props); + + self.graph_props.increment_est_size(add); + self.graph_props.set_dirty(true); + } + + pub fn update_metadata(&mut self, props: impl IntoIterator) { + let add = self.mem_segment.update_metadata(props); + + self.graph_props.increment_est_size(add); + self.graph_props.set_dirty(true); + } + + pub fn check_metadata(&self, props: &[(usize, P)]) -> Result<(), StorageError> { + self.mem_segment.check_metadata(props) + } + + pub fn set_lsn(&mut self, lsn: LSN) { + self.mem_segment.set_lsn(lsn); + } +} + +impl Drop for GraphPropWriter<'_, GS> { + fn drop(&mut self) { + self.graph_props + .notify_write(&mut self.mem_segment) + .expect("Failed to persist node page"); + } +} diff --git a/db4-storage/src/pages/graph_prop_store.rs b/db4-storage/src/pages/graph_prop_store.rs new file mode 100644 index 0000000000..c91f436dd5 --- /dev/null +++ b/db4-storage/src/pages/graph_prop_store.rs @@ -0,0 +1,89 @@ +use crate::{ + api::graph_props::GraphPropSegmentOps, + error::StorageError, + pages::{ + graph_prop_page::writer::GraphPropWriter, + locked::graph_props::{LockedGraphPropPage, WriteLockedGraphPropPages}, + }, + persist::strategy::PersistenceStrategy, +}; +use raphtory_api::core::entities::properties::meta::Meta; +use std::{ + path::{Path, PathBuf}, + sync::Arc, +}; + +/// Backing store for graph temporal properties and graph metadata. +#[derive(Debug)] +pub struct GraphPropStorageInner { + /// The graph props segment that contains all graph properties and graph metadata. + /// Unlike node and edge segments, which are split into multiple segments, + /// there is always only one graph props segment. + page: Arc, + + /// Stores graph prop metadata (prop name -> prop id mappings). + meta: Arc, + + path: Option, + + ext: EXT, +} + +impl, EXT: PersistenceStrategy> + GraphPropStorageInner +{ + pub fn new_with_meta(path: Option<&Path>, meta: Arc, ext: EXT) -> Self { + let page = Arc::new(GS::new(meta.clone(), path, ext.clone())); + + Self { + page, + path: path.map(|p| p.to_path_buf()), + meta, + ext, + } + } + + pub fn load(path: impl AsRef, ext: EXT) -> Result { + let graph_props_meta = Arc::new(Meta::new_for_graph_props()); + + Ok(Self { + page: Arc::new(GS::load( + graph_props_meta.clone(), + path.as_ref(), + ext.clone(), + )?), + path: Some(path.as_ref().to_path_buf()), + meta: graph_props_meta, + ext, + }) + } + + pub fn meta(&self) -> &Arc { + &self.meta + } + + pub fn graph_entry(&self) -> GS::Entry<'_> { + self.page.entry() + } + + pub fn segment(&self) -> &Arc { + &self.page + } + + pub fn writer(&self) -> GraphPropWriter<'_, GS> { + let head = self.page.head_mut(); + let graph_props = &self.page; + GraphPropWriter::new(graph_props, head) + } + + pub fn write_locked<'a>(&'a self) -> WriteLockedGraphPropPages<'a, GS> { + WriteLockedGraphPropPages::new(LockedGraphPropPage::new( + self.page.as_ref(), + self.page.head_mut(), + )) + } + + pub fn flush(&self) -> Result<(), StorageError> { + self.page.flush() + } +} diff --git a/db4-storage/src/pages/layer_counter.rs b/db4-storage/src/pages/layer_counter.rs new file mode 100644 index 0000000000..e94f8567b7 --- /dev/null +++ b/db4-storage/src/pages/layer_counter.rs @@ -0,0 +1,119 @@ +use raphtory_api::core::entities::LayerId; +use raphtory_core::entities::graph::timer::{MaxCounter, MinCounter, TimeCounterTrait}; +use std::sync::atomic::AtomicUsize; + +#[derive(Debug)] +pub struct GraphStats { + layers: boxcar::Vec, + earliest: MinCounter, + latest: MaxCounter, +} + +impl> From for GraphStats { + fn from(iter: I) -> Self { + let layers = iter.into_iter().map(AtomicUsize::new).collect(); + Self { + layers, + earliest: MinCounter::new(), + latest: MaxCounter::new(), + } + } +} + +impl Default for GraphStats { + fn default() -> Self { + Self::new() + } +} + +impl GraphStats { + pub fn new() -> Self { + let layers = boxcar::Vec::new(); + layers.push_with(|_| Default::default()); + Self { + layers, + earliest: MinCounter::new(), + latest: MaxCounter::new(), + } + } + + pub fn load(counts: impl IntoIterator, earliest: i64, latest: i64) -> Self { + let layers = counts.into_iter().map(AtomicUsize::new).collect(); + let earliest = MinCounter::from(earliest); + let latest = MaxCounter::from(latest); + Self { + layers, + earliest, + latest, + } + } + + pub fn len(&self) -> usize { + self.layers.count() + } + + #[must_use] + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + pub fn update_time(&self, t: i64) { + self.earliest.update(t); + self.latest.update(t); + } + + pub fn earliest(&self) -> i64 { + self.earliest.get() + } + + pub fn latest(&self) -> i64 { + self.latest.get() + } + + pub fn increment(&self, layer_id: LayerId) -> usize { + let counter = self.get_or_create_layer(layer_id); + counter.fetch_add(1, std::sync::atomic::Ordering::Release) + } + + pub fn get(&self, layer_id: LayerId) -> usize { + let counter = self.get_or_create_layer(layer_id); + counter.load(std::sync::atomic::Ordering::Acquire) + } + + pub fn get_counter(&self, layer_id: LayerId) -> &AtomicUsize { + self.get_or_create_layer(layer_id) + } + + fn get_or_create_layer(&self, layer_id: LayerId) -> &AtomicUsize { + let layer_id = layer_id.0; + if let Some(counter) = self.layers.get(layer_id) { + return counter; + } + + if self.layers.count() > layer_id { + // something has allocated the layer, wait for it to be added + loop { + if let Some(counter) = self.layers.get(layer_id) { + return counter; + } else { + // wait for the layer to be created + std::thread::yield_now(); + } + } + } else { + loop { + let new_layer_id = self.layers.push_with(|_| Default::default()); + if new_layer_id >= layer_id { + loop { + if let Some(counter) = self.layers.get(layer_id) { + return counter; + } else { + // wait for the layer to be created + std::thread::yield_now(); + } + } + } + } + } + } +} diff --git a/db4-storage/src/pages/locked/edges.rs b/db4-storage/src/pages/locked/edges.rs new file mode 100644 index 0000000000..b365df5ce1 --- /dev/null +++ b/db4-storage/src/pages/locked/edges.rs @@ -0,0 +1,141 @@ +use std::ops::{Deref, DerefMut}; + +use crate::{ + LocalPOS, + api::edges::EdgeSegmentOps, + error::StorageError, + pages::{edge_page::writer::EdgeWriter, layer_counter::GraphStats, resolve_pos}, + persist::strategy::PersistenceStrategy, + segments::edge::segment::MemEdgeSegment, +}; +use parking_lot::RwLockWriteGuard; +use raphtory_api::core::entities::LayerId; +use raphtory_core::entities::{EID, ELID}; +use rayon::prelude::*; + +#[derive(Debug)] +pub struct LockedEdgePage<'a, ES> { + page_id: usize, + max_page_len: u32, + page: &'a ES, + num_edges: &'a GraphStats, + lock: RwLockWriteGuard<'a, MemEdgeSegment>, +} + +impl<'a, ES: EdgeSegmentOps> LockedEdgePage<'a, ES> { + pub fn new( + page_id: usize, + max_page_len: u32, + page: &'a ES, + num_edges: &'a GraphStats, + lock: RwLockWriteGuard<'a, MemEdgeSegment>, + ) -> Self { + Self { + page_id, + max_page_len, + page, + num_edges, + lock, + } + } + + #[inline(always)] + pub fn writer(&mut self) -> EdgeWriter<'_, &mut MemEdgeSegment, ES> { + EdgeWriter::new(self.num_edges, self.page, self.lock.deref_mut()) + } + + #[inline(always)] + pub fn page_id(&self) -> usize { + self.page_id + } + + #[inline(always)] + pub fn resolve_pos(&self, edge_id: EID) -> Option { + let (page, pos) = resolve_pos(edge_id, self.max_page_len); + + if page == self.page_id { + Some(pos) + } else { + None + } + } + + pub fn ensure_layer(&mut self, layer_id: LayerId) { + self.lock.get_or_create_layer(layer_id); + } + + pub fn page(&self) -> &ES { + &self.page + } +} +#[derive(Debug)] +pub struct WriteLockedEdgePages<'a, ES> { + writers: Vec>, +} + +impl Default for WriteLockedEdgePages<'_, ES> { + fn default() -> Self { + Self { + writers: Vec::new(), + } + } +} + +impl<'a, EXT: PersistenceStrategy, ES: EdgeSegmentOps> + WriteLockedEdgePages<'a, ES> +{ + pub fn new(writers: Vec>) -> Self { + Self { writers } + } + + #[inline] + pub fn get_mut(&mut self, segment_id: usize) -> Option<&mut LockedEdgePage<'a, ES>> { + self.writers.get_mut(segment_id) + } + + pub fn par_iter_mut(&mut self) -> rayon::slice::IterMut<'_, LockedEdgePage<'a, ES>> { + self.writers.par_iter_mut() + } + + pub fn iter_mut(&mut self) -> std::slice::IterMut<'_, LockedEdgePage<'a, ES>> { + self.writers.iter_mut() + } + + pub fn into_par_iter(self) -> impl ParallelIterator> + 'a { + self.writers.into_par_iter() + } + + pub fn ensure_layer(&mut self, layer_id: LayerId) { + for writer in &mut self.writers { + writer.ensure_layer(layer_id); + } + } + + pub fn exists(&self, elid: ELID) -> bool { + let max_page_len = if !self.writers.is_empty() { + self.writers[0].max_page_len + } else { + return false; + }; + let (page_id, pos) = resolve_pos(elid.edge, max_page_len); + self.writers.get(page_id).is_some_and(|page| { + let locked_head = page.lock.deref(); + page.page.has_edge(pos, elid.layer(), locked_head) + }) + } + + pub fn vacuum(&mut self) -> Result<(), StorageError> { + for LockedEdgePage { page, lock, .. } in &mut self.writers { + page.vacuum(lock.deref_mut())?; + } + Ok(()) + } + + pub fn len(&self) -> usize { + self.writers.len() + } + + pub fn is_empty(&self) -> bool { + self.writers.is_empty() + } +} diff --git a/db4-storage/src/pages/locked/graph_props.rs b/db4-storage/src/pages/locked/graph_props.rs new file mode 100644 index 0000000000..319835ab22 --- /dev/null +++ b/db4-storage/src/pages/locked/graph_props.rs @@ -0,0 +1,73 @@ +use crate::{ + api::graph_props::GraphPropSegmentOps, segments::graph_prop::segment::MemGraphPropSegment, + wal::LSN, +}; +use parking_lot::RwLockWriteGuard; +use raphtory_api::core::entities::properties::prop::Prop; +use raphtory_core::storage::timeindex::AsTime; + +pub struct LockedGraphPropPage<'a, GS: GraphPropSegmentOps> { + page: &'a GS, + lock: RwLockWriteGuard<'a, MemGraphPropSegment>, +} + +impl<'a, GS: GraphPropSegmentOps> LockedGraphPropPage<'a, GS> { + pub fn new(page: &'a GS, lock: RwLockWriteGuard<'a, MemGraphPropSegment>) -> Self { + Self { page, lock } + } + + pub fn segment(&self) -> &GS { + self.page + } + + /// Add temporal properties to the graph + pub fn add_properties( + &mut self, + t: T, + props: impl IntoIterator, + ) { + let add = self.lock.add_properties(t, props); + + self.page.increment_est_size(add); + self.page.set_dirty(true); + } + + /// Add metadata (constant properties) to the graph + pub fn add_metadata(&mut self, props: impl IntoIterator) { + self.update_metadata(props); + } + + /// Update metadata (constant properties) on the graph + pub fn update_metadata(&mut self, props: impl IntoIterator) { + let add = self.lock.update_metadata(props); + + self.page.increment_est_size(add); + self.page.set_dirty(true); + } + + pub fn set_lsn(&mut self, lsn: LSN) { + self.lock.set_lsn(lsn); + } +} + +impl Drop for LockedGraphPropPage<'_, GS> { + fn drop(&mut self) { + self.page + .notify_write(&mut self.lock) + .expect("Failed to persist graph props page"); + } +} + +pub struct WriteLockedGraphPropPages<'a, GS: GraphPropSegmentOps> { + writer: LockedGraphPropPage<'a, GS>, +} + +impl<'a, GS: GraphPropSegmentOps> WriteLockedGraphPropPages<'a, GS> { + pub fn new(writer: LockedGraphPropPage<'a, GS>) -> Self { + Self { writer } + } + + pub fn writer(&mut self) -> &mut LockedGraphPropPage<'a, GS> { + &mut self.writer + } +} diff --git a/db4-storage/src/pages/locked/mod.rs b/db4-storage/src/pages/locked/mod.rs new file mode 100644 index 0000000000..de88345004 --- /dev/null +++ b/db4-storage/src/pages/locked/mod.rs @@ -0,0 +1,3 @@ +pub mod edges; +pub mod graph_props; +pub mod nodes; diff --git a/db4-storage/src/pages/locked/nodes.rs b/db4-storage/src/pages/locked/nodes.rs new file mode 100644 index 0000000000..5f46be2042 --- /dev/null +++ b/db4-storage/src/pages/locked/nodes.rs @@ -0,0 +1,132 @@ +use crate::{ + LocalPOS, + api::nodes::NodeSegmentOps, + error::StorageError, + pages::{layer_counter::GraphStats, node_page::writer::NodeWriter, resolve_pos}, + persist::strategy::PersistenceStrategy, + segments::node::segment::MemNodeSegment, +}; +use parking_lot::RwLockWriteGuard; +use raphtory_api::core::entities::LayerId; +use raphtory_core::entities::VID; +use rayon::prelude::*; +use std::ops::DerefMut; + +#[derive(Debug)] +pub struct LockedNodePage<'a, NS> { + segment_id: usize, + max_page_len: u32, + layer_counter: &'a GraphStats, + page: &'a NS, + lock: RwLockWriteGuard<'a, MemNodeSegment>, +} + +impl<'a, NS: NodeSegmentOps> LockedNodePage<'a, NS> { + pub fn new( + segment_id: usize, + layer_counter: &'a GraphStats, + max_page_len: u32, + page: &'a NS, + lock: RwLockWriteGuard<'a, MemNodeSegment>, + ) -> Self { + Self { + segment_id, + layer_counter, + max_page_len, + page, + lock, + } + } + + pub fn segment(&self) -> &NS { + self.page + } + + #[inline(always)] + pub fn writer(&mut self) -> NodeWriter<'_, &mut MemNodeSegment, NS> { + NodeWriter::new(self.page, self.layer_counter, self.lock.deref_mut()) + } + + pub fn head(&mut self) -> &mut MemNodeSegment { + self.lock.deref_mut() + } + + pub fn vacuum(&mut self) { + let _ = self.page.vacuum(self.lock.deref_mut()); + } + + #[inline(always)] + pub fn segment_id(&self) -> usize { + self.segment_id + } + + #[inline(always)] + pub fn resolve_pos(&self, node_id: VID) -> Option { + let (page, pos) = resolve_pos(node_id, self.max_page_len); + + if page == self.segment_id { + Some(pos) + } else { + None + } + } + + pub fn ensure_layer(&mut self, layer_id: LayerId) { + self.lock.get_or_create_layer(layer_id); + self.layer_counter.get(layer_id); + } +} + +pub struct WriteLockedNodePages<'a, NS> { + writers: Vec>, +} + +impl Default for WriteLockedNodePages<'_, NS> { + fn default() -> Self { + Self { + writers: Vec::new(), + } + } +} + +impl<'a, EXT: PersistenceStrategy, NS: NodeSegmentOps> + WriteLockedNodePages<'a, NS> +{ + pub fn new(writers: Vec>) -> Self { + Self { writers } + } + + pub fn len(&self) -> usize { + self.writers.len() + } + + #[inline] + pub fn get_mut(&mut self, segment_id: usize) -> Option<&mut LockedNodePage<'a, NS>> { + self.writers.get_mut(segment_id) + } + + pub fn par_iter_mut(&mut self) -> rayon::slice::IterMut<'_, LockedNodePage<'a, NS>> { + self.writers.par_iter_mut() + } + + pub fn iter_mut(&mut self) -> std::slice::IterMut<'_, LockedNodePage<'a, NS>> { + self.writers.iter_mut() + } + + pub fn into_par_iter(self) -> impl ParallelIterator> + 'a { + self.writers.into_par_iter() + } + + pub fn ensure_layer(&mut self, layer_id: LayerId) { + for writer in &mut self.writers { + writer.ensure_layer(layer_id); + } + } + + pub fn vacuum(&mut self) -> Result<(), StorageError> { + for LockedNodePage { page, lock, .. } in &mut self.writers { + page.vacuum(lock.deref_mut())?; + } + Ok(()) + } +} diff --git a/db4-storage/src/pages/mod.rs b/db4-storage/src/pages/mod.rs new file mode 100644 index 0000000000..dcc1c07b6f --- /dev/null +++ b/db4-storage/src/pages/mod.rs @@ -0,0 +1,454 @@ +use crate::{ + EID, LocalPOS, VID, + api::{edges::EdgeSegmentOps, graph_props::GraphPropSegmentOps, nodes::NodeSegmentOps}, + error::StorageError, + pages::{edge_store::ReadLockedEdgeStorage, node_store::ReadLockedNodeStorage}, + persist::{ + config::ConfigOps, + control_file::{ControlFileOps, DBState}, + strategy::PersistenceStrategy, + }, + properties::props_meta_writer::PropsMetaWriter, + segments::{edge::segment::MemEdgeSegment, node::segment::MemNodeSegment}, + state::StateIndex, + wal::{GraphWalOps, WalOps}, +}; +use edge_page::writer::EdgeWriter; +use edge_store::EdgeStorageInner; +use graph_prop_store::GraphPropStorageInner; +use node_page::writer::NodeWriter; +use node_store::NodeStorageInner; +use parking_lot::RwLockWriteGuard; +use raphtory_api::core::entities::properties::meta::Meta; +use rayon::prelude::*; +use std::{ + path::{Path, PathBuf}, + sync::{ + Arc, + atomic::{self, AtomicUsize}, + }, +}; +use tinyvec::TinyVec; + +pub mod edge_page; +pub mod edge_store; +pub mod graph_prop_page; +pub mod graph_prop_store; +pub mod layer_counter; +pub mod locked; +pub mod node_page; +pub mod node_store; +pub mod session; + +#[cfg(any(test, feature = "test-utils"))] +pub mod test_utils; + +// graph // (node/edges) // segment // layer_ids (0, 1, 2, ...) // actual graphy bits + +#[derive(Debug)] +pub struct GraphStore< + NS: NodeSegmentOps, + ES: EdgeSegmentOps, + GS: GraphPropSegmentOps, + EXT: PersistenceStrategy, +> { + nodes: Arc>, + edges: Arc>, + graph_props: Arc>, + graph_dir: Option, + event_id: AtomicUsize, + ext: EXT, +} + +impl< + NS: NodeSegmentOps, + ES: EdgeSegmentOps, + GS: GraphPropSegmentOps, + EXT: PersistenceStrategy, +> GraphStore +{ + pub fn flush(&self) -> Result<(), StorageError> { + let node_types = self.nodes.prop_meta().get_all_node_types(); + let config = self.ext.config().with_node_types(node_types); + + if let Some(graph_dir) = self.graph_dir.as_ref() { + config.save_to_dir(graph_dir)?; + } + + self.nodes.flush()?; + self.edges.flush()?; + self.graph_props.flush()?; + + Ok(()) + } +} + +#[derive(Debug)] +pub struct ReadLockedGraphStore< + NS: NodeSegmentOps, + ES: EdgeSegmentOps, + GS: GraphPropSegmentOps, + EXT: PersistenceStrategy, +> { + pub nodes: Arc>, + pub edges: Arc>, + pub graph: Arc>, +} + +impl< + NS: NodeSegmentOps, + ES: EdgeSegmentOps, + GS: GraphPropSegmentOps, + EXT: PersistenceStrategy, +> GraphStore +{ + pub fn new(graph_dir: Option<&Path>, ext: EXT) -> Self { + let node_meta = Meta::new_for_nodes(); + let edge_meta = Meta::new_for_edges(); + let graph_props_meta = Meta::new_for_graph_props(); + + Self::new_with_meta(graph_dir, node_meta, edge_meta, graph_props_meta, ext) + } + + pub fn new_with_meta( + graph_dir: Option<&Path>, + node_meta: Meta, + edge_meta: Meta, + graph_props_meta: Meta, + ext: EXT, + ) -> Self { + let nodes_path = graph_dir.map(|graph_dir| graph_dir.join("nodes")); + let edges_path = graph_dir.map(|graph_dir| graph_dir.join("edges")); + let graph_props_path = graph_dir.map(|graph_dir| graph_dir.join("graph_props")); + + let node_meta = Arc::new(node_meta); + let edge_meta = Arc::new(edge_meta); + let graph_props_meta = Arc::new(graph_props_meta); + + let node_storage = Arc::new(NodeStorageInner::new_with_meta( + nodes_path, + node_meta, + edge_meta.clone(), + ext.clone(), + )); + let edge_storage = Arc::new(EdgeStorageInner::new_with_meta( + edges_path, + edge_meta, + ext.clone(), + )); + let graph_prop_storage = Arc::new(GraphPropStorageInner::new_with_meta( + graph_props_path.as_deref(), + graph_props_meta, + ext.clone(), + )); + + if let Some(graph_dir) = graph_dir { + ext.config() + .save_to_dir(graph_dir) + .expect("Failed to write config to disk"); + } + + Self { + nodes: node_storage, + edges: edge_storage, + graph_props: graph_prop_storage, + event_id: AtomicUsize::new(0), + graph_dir: graph_dir.map(|p| p.to_path_buf()), + ext, + } + } + + pub fn load(graph_dir: impl AsRef, ext: EXT) -> Result { + let nodes_path = graph_dir.as_ref().join("nodes"); + let edges_path = graph_dir.as_ref().join("edges"); + let graph_props_path = graph_dir.as_ref().join("graph_props"); + + let edge_storage = Arc::new(EdgeStorageInner::load(edges_path, ext.clone())?); + let edge_meta = edge_storage.edge_meta().clone(); + let node_storage = Arc::new(NodeStorageInner::load(nodes_path, edge_meta, ext.clone())?); + let node_meta = node_storage.prop_meta(); + + // Load graph temporal properties and metadata. + let graph_prop_storage = + Arc::new(GraphPropStorageInner::load(graph_props_path, ext.clone())?); + + for node_type in ext.config().node_types().iter() { + node_meta.get_or_create_node_type_id(node_type); + } + + let t_len = edge_storage.t_len(); + + Ok(Self { + nodes: node_storage, + edges: edge_storage, + graph_props: graph_prop_storage, + event_id: AtomicUsize::new(t_len), + graph_dir: Some(graph_dir.as_ref().to_path_buf()), + ext, + }) + } + + pub fn read_locked(self: &Arc) -> ReadLockedGraphStore { + let nodes = self.nodes.locked().into(); + let edges = self.edges.locked().into(); + + ReadLockedGraphStore { + nodes, + edges, + graph: self.clone(), + } + } + + pub fn extension(&self) -> &EXT { + &self.ext + } + + pub fn nodes(&self) -> &Arc> { + &self.nodes + } + + pub fn edges(&self) -> &Arc> { + &self.edges + } + + pub fn graph_props(&self) -> &Arc> { + &self.graph_props + } + + pub fn edge_meta(&self) -> &Meta { + self.edges.edge_meta() + } + + pub fn node_meta(&self) -> &Meta { + self.nodes.prop_meta() + } + + pub fn graph_props_meta(&self) -> &Meta { + self.graph_props.meta() + } + + pub fn earliest(&self) -> i64 { + self.nodes + .stats() + .earliest() + .min(self.edges.stats().earliest()) + } + + pub fn latest(&self) -> i64 { + self.nodes.stats().latest().max(self.edges.stats().latest()) + } + + pub fn node_segment_counts(&self) -> SegmentCounts { + self.nodes.segment_counts() + } + + pub fn edge_segment_counts(&self) -> SegmentCounts { + self.edges.segment_counts() + } + + pub fn read_event_id(&self) -> usize { + self.event_id.load(atomic::Ordering::Relaxed) + } + + pub fn set_event_id(&self, event_id: usize) { + self.event_id.store(event_id, atomic::Ordering::Relaxed); + } + + pub fn next_event_id(&self) -> usize { + self.event_id.fetch_add(1, atomic::Ordering::Relaxed) + } + + pub fn reserve_event_ids(&self, num_ids: usize) -> usize { + self.event_id.fetch_add(num_ids, atomic::Ordering::Relaxed) + } + + pub fn set_max_event_id(&self, value: usize) -> usize { + self.event_id.fetch_max(value, atomic::Ordering::Relaxed) + } + + pub fn node_writer( + &self, + node_segment: usize, + ) -> NodeWriter<'_, RwLockWriteGuard<'_, MemNodeSegment>, NS> { + self.nodes().writer(node_segment) + } + + pub fn edge_writer( + &self, + eid: EID, + ) -> EdgeWriter<'_, RwLockWriteGuard<'_, MemEdgeSegment>, ES> { + self.edges().get_writer(eid) + } + + pub fn get_free_writer(&self) -> EdgeWriter<'_, RwLockWriteGuard<'_, MemEdgeSegment>, ES> { + self.edges().get_free_writer() + } + + pub fn vacuum(self: &Arc) -> Result<(), StorageError> { + let mut locked_nodes = self.nodes.write_locked(); + let mut locked_edges = self.edges.write_locked(); + + locked_nodes.vacuum()?; + locked_edges.vacuum()?; + + Ok(()) + } +} + +#[derive(Debug)] +pub struct SegmentCounts { + max_seg_len: u32, + counts: TinyVec<[u32; 32]>, // this might come to be a problem + _marker: std::marker::PhantomData, +} + +impl + Into> SegmentCounts { + pub fn new(max_seg_len: u32, counts: impl IntoIterator) -> Self { + let counts: TinyVec<[u32; 32]> = counts.into_iter().collect(); + + Self { + max_seg_len, + counts, + _marker: std::marker::PhantomData, + } + } + + pub fn into_iter(self) -> impl Iterator { + let max_seg_len = self.max_seg_len as usize; + self.counts.into_iter().enumerate().flat_map(move |(i, c)| { + let g_pos = i * max_seg_len as usize; + (0..c).map(move |offset| I::from(g_pos + offset as usize)) + }) + } + + pub fn into_index(self) -> StateIndex { + StateIndex::from(self) + } + + pub fn counts(&self) -> &[u32] { + &self.counts + } + + pub(crate) fn max_seg_len(&self) -> u32 { + self.max_seg_len + } +} +impl + Send> SegmentCounts { + pub fn into_par_iter(self) -> impl ParallelIterator { + let max_seg_len = self.max_seg_len as usize; + (0..self.counts.len()).into_par_iter().flat_map(move |i| { + let c = self.counts[i]; + let g_pos = i * max_seg_len; + (0..c) + .into_par_iter() + .map(move |offset| I::from(g_pos + offset as usize)) + }) + } +} + +impl< + NS: NodeSegmentOps, + ES: EdgeSegmentOps, + GS: GraphPropSegmentOps, + EXT: PersistenceStrategy, +> Drop for GraphStore +{ + fn drop(&mut self) { + let wal = self.ext.wal(); + let control_file = self.ext.control_file(); + + match self.flush() { + Ok(_) => { + // Log a checkpoint record in the WAL, indicating that the DB was shutdown + // with all the segments flushed to disk. + // On startup, recovery is skipped since there are no pending writes to replay. + let checkpoint_lsn = match wal.log_shutdown_checkpoint() { + Ok(lsn) => lsn, + Err(err) => { + eprintln!("Failed to log shutdown checkpoint in drop: {err}"); + return; + } + }; + + // Flush up to the end of the WAL stream. + let flush_lsn = wal.position(); + + if let Err(err) = wal.flush(flush_lsn) { + eprintln!("Failed to flush checkpoint record in drop: {err}"); + return; + } + + // Record the checkpoint and shutdown state and write control file to disk. + control_file.set_checkpoint(checkpoint_lsn); + control_file.set_db_state(DBState::Shutdown); + + if let Err(err) = control_file.save() { + eprintln!("Failed to save control file in drop: {err}"); + return; + } + } + Err(err) => { + eprintln!("Failed to flush storage in drop: {err}") + } + } + } +} + +#[inline(always)] +pub fn resolve_pos>(i: I, max_page_len: u32) -> (usize, LocalPOS) { + let i = i.into(); + let seg = i / max_page_len as usize; + let pos = i % max_page_len as usize; + (seg, LocalPOS(pos as u32)) +} + +pub fn row_group_par_iter>( + chunk_size: usize, + num_segments: usize, + max_seg_len: u32, + max_actual_seg_len: u32, +) -> impl IndexedParallelIterator)> { + let (num_chunks, chunk_size) = if num_segments != 0 { + let chunk_size = (chunk_size / num_segments).max(1); + let num_chunks = (max_seg_len as usize + chunk_size - 1) / chunk_size; + (num_chunks, chunk_size) + } else { + (0, 0) + }; + + (0..num_chunks).into_par_iter().map(move |chunk_id| { + let start = chunk_id * chunk_size; + let end = ((chunk_id + 1) * chunk_size).min(max_actual_seg_len as usize); + + let iter = (start..end).flat_map(move |x| { + (0..num_segments).map(move |seg| I::from(seg * max_seg_len as usize + x)) + }); + + (chunk_id, iter) + }) +} + +#[cfg(test)] +mod test { + use rayon::iter::ParallelIterator; + + #[test] + fn test_iterleave() { + let chunk_size = 3; + let num_segments = 3; + let max_seg_len = 4; + + let actual = super::row_group_par_iter(chunk_size, num_segments, max_seg_len, max_seg_len) + .map(|(c, items)| (c, items.collect::>())) + .collect::>(); + + let expected = vec![ + (0, vec![0, 4, 8]), + (1, vec![1, 5, 9]), + (2, vec![2, 6, 10]), + (3, vec![3, 7, 11]), + ]; + + assert_eq!(actual, expected); + } +} diff --git a/db4-storage/src/pages/node_page/mod.rs b/db4-storage/src/pages/node_page/mod.rs new file mode 100644 index 0000000000..d3baa81782 --- /dev/null +++ b/db4-storage/src/pages/node_page/mod.rs @@ -0,0 +1 @@ +pub mod writer; diff --git a/db4-storage/src/pages/node_page/writer.rs b/db4-storage/src/pages/node_page/writer.rs new file mode 100644 index 0000000000..ccc81acc3a --- /dev/null +++ b/db4-storage/src/pages/node_page/writer.rs @@ -0,0 +1,300 @@ +use crate::{ + LocalPOS, + api::nodes::NodeSegmentOps, + error::StorageError, + pages::{layer_counter::GraphStats, resolve_pos}, + segments::node::segment::MemNodeSegment, + wal::LSN, +}; +use parking_lot::RwLockWriteGuard; +use raphtory_api::core::entities::{ + EID, GID, LayerId, VID, + properties::{ + meta::{NODE_ID_IDX, NODE_TYPE_IDX, STATIC_GRAPH_LAYER_ID}, + prop::{AsPropRef, Prop}, + }, +}; +use raphtory_core::{ + entities::{ELID, GidRef}, + storage::timeindex::AsTime, +}; +use std::ops::DerefMut; + +#[derive(Debug)] +pub struct NodeWriter<'a, MP: DerefMut + 'a, NS: NodeSegmentOps> { + pub page: &'a NS, + pub mut_segment: MP, + pub l_counter: &'a GraphStats, + pub old_est_size: usize, +} + +impl<'a, MP: DerefMut + 'a, NS: NodeSegmentOps> NodeWriter<'a, MP, NS> { + pub fn new(page: &'a NS, global_num_nodes: &'a GraphStats, writer: MP) -> Self { + let old_est_size = writer.est_size(); + Self { + page, + mut_segment: writer, + l_counter: global_num_nodes, + old_est_size, + } + } + #[inline(always)] + pub fn resolve_pos(&self, node_id: VID) -> Option { + let (page, pos) = resolve_pos(node_id, self.mut_segment.max_page_len()); + + if page == self.mut_segment.segment_id() { + Some(pos) + } else { + None + } + } + + pub fn add_outbound_edge( + &mut self, + t: Option, + src_pos: impl Into, + dst: impl Into, + e_id: impl Into, + ) { + self.add_outbound_edge_inner(t, src_pos, dst, e_id); + } + + pub fn add_static_outbound_edge( + &mut self, + src_pos: LocalPOS, + dst: impl Into, + e_id: impl Into, + ) { + let e_id = e_id.into(); + self.add_outbound_edge_inner::( + None, + src_pos, + dst, + e_id.with_layer(STATIC_GRAPH_LAYER_ID), + ); + } + + fn add_outbound_edge_inner( + &mut self, + t: Option, + src_pos: impl Into, + dst: impl Into, + e_id: impl Into, + ) { + let src_pos = src_pos.into(); + let dst = dst.into(); + if let Some(t) = t { + self.l_counter.update_time(t.t()); + } + + let e_id = e_id.into(); + let layer_id = e_id.layer(); + let (is_new_node, add) = self.mut_segment.add_outbound_edge(t, src_pos, dst, e_id); + self.mut_segment.increment_est_size(add); + + if is_new_node && !self.page.has_node(src_pos, layer_id) { + self.l_counter.increment(layer_id); + } + } + + pub fn add_inbound_edge( + &mut self, + t: Option, + dst_pos: impl Into, + src: impl Into, + e_id: impl Into, + ) { + self.add_inbound_edge_inner(t, dst_pos, src, e_id); + } + + pub fn add_static_inbound_edge( + &mut self, + dst_pos: LocalPOS, + src: impl Into, + e_id: impl Into, + ) { + let e_id = e_id.into(); + self.add_inbound_edge_inner::( + None, + dst_pos, + src, + e_id.with_layer(STATIC_GRAPH_LAYER_ID), + ); + } + + fn add_inbound_edge_inner( + &mut self, + t: Option, + dst_pos: impl Into, + src: impl Into, + e_id: impl Into, + ) { + let e_id = e_id.into(); + let src = src.into(); + if let Some(t) = t { + self.l_counter.update_time(t.t()); + } + let layer = e_id.layer(); + let dst_pos = dst_pos.into(); + let (is_new_node, add) = self.mut_segment.add_inbound_edge(t, dst_pos, src, e_id); + + self.mut_segment.increment_est_size(add); + + if is_new_node && !self.page.has_node(dst_pos, layer) { + self.l_counter.increment(layer); + } + } + + pub fn add_props( + &mut self, + t: T, + pos: LocalPOS, + layer_id: LayerId, + props: impl IntoIterator, + ) { + self.l_counter.update_time(t.t()); + let (is_new_node, add) = self.mut_segment.add_props(t, pos, layer_id, props); + self.mut_segment.increment_est_size(add); + if is_new_node && !self.page.has_node(pos, layer_id) { + self.l_counter.increment(layer_id); + } + } + + pub fn check_metadata( + &self, + pos: LocalPOS, + layer_id: LayerId, + props: &[(usize, P)], + ) -> Result<(), StorageError> { + self.mut_segment.check_metadata(pos, layer_id, props) + } + + pub fn update_c_props( + &mut self, + pos: LocalPOS, + layer_id: LayerId, + props: impl IntoIterator, + ) { + let (is_new_node, add) = self.mut_segment.update_metadata(pos, layer_id, props); + self.mut_segment.increment_est_size(add); + if is_new_node && !self.page.has_node(pos, layer_id) { + self.l_counter.increment(layer_id); + } + } + + pub fn get_metadata(&self, pos: LocalPOS, layer_id: LayerId, prop_id: usize) -> Option { + self.mut_segment.get_metadata(pos, layer_id, prop_id) + } + + pub fn update_timestamp(&mut self, t: T, pos: LocalPOS, e_id: ELID) { + self.l_counter.update_time(t.t()); + let add = self.mut_segment.update_timestamp(t, pos, e_id); + self.mut_segment.increment_est_size(add); + } + + #[inline] + pub fn get_out_edge(&self, pos: LocalPOS, dst: VID, layer_id: LayerId) -> Option { + self.page + .get_out_edge(pos, dst, layer_id, self.mut_segment.deref()) + } + + pub fn get_inb_edge(&self, pos: LocalPOS, src: VID, layer_id: LayerId) -> Option { + self.page + .get_inb_edge(pos, src, layer_id, self.mut_segment.deref()) + } + + pub fn store_node_id_and_node_type( + &mut self, + pos: LocalPOS, + layer_id: LayerId, + gid: GidRef<'_>, + node_type: usize, + ) { + let node_type = (node_type != 0).then_some(node_type); + self.update_c_props(pos, layer_id, node_info_as_props(Some(gid), node_type)); + } + + pub fn store_node_id(&mut self, pos: LocalPOS, layer_id: LayerId, gid: GID) { + let gid = match gid { + GID::U64(id) => Prop::U64(id), + GID::Str(s) => Prop::str(s), + }; + let props = [(NODE_ID_IDX, gid)]; + self.update_c_props(pos, layer_id, props); + } + + pub fn store_node_type(&mut self, pos: LocalPOS, layer_id: LayerId, node_type: usize) { + let props = [(NODE_TYPE_IDX, Prop::U64(node_type as u64))]; + self.update_c_props(pos, layer_id, props); + } + + pub fn update_deletion_time(&mut self, t: T, node: LocalPOS, e_id: ELID) { + self.update_timestamp(t, node, e_id); + } + + pub fn increment_seg_num_nodes(&mut self) { + self.page + .increment_num_nodes(self.mut_segment.max_page_len()); + } + + pub fn has_node(&self, node: LocalPOS, layer_id: LayerId) -> bool { + self.mut_segment.has_node(node, layer_id) || self.page.has_node(node, layer_id) + } + + pub fn set_lsn(&mut self, lsn: LSN) { + self.mut_segment.set_lsn(lsn); + } +} + +impl<'a, NS: NodeSegmentOps> NodeWriter<'a, RwLockWriteGuard<'a, MemNodeSegment>, NS> { + pub fn unlocked(&mut self, op: impl FnOnce() -> R) -> R { + RwLockWriteGuard::unlocked(&mut self.mut_segment, op) + } +} + +pub fn node_info_as_props( + gid: Option, + node_type: Option, +) -> impl Iterator { + gid.into_iter().map(|g| (NODE_ID_IDX, g.into())).chain( + node_type + .into_iter() + .map(|nt| (NODE_TYPE_IDX, Prop::U64(nt as u64))), + ) +} + +impl<'a, MP: DerefMut + 'a, NS: NodeSegmentOps> Drop + for NodeWriter<'a, MP, NS> +{ + fn drop(&mut self) { + self.mut_segment + .increment_global_est_size(self.mut_segment.est_size() - self.old_est_size); + self.page + .notify_write(self.mut_segment.deref_mut()) + .expect("Failed to persist node page"); + } +} + +/// Holds writers for src and dst node segments when adding an edge. +/// If both nodes are in the same segment, `dst` is `None` and `src` is used for both. +pub struct NodeWriters<'a, MP: DerefMut, NS: NodeSegmentOps> { + pub src: NodeWriter<'a, MP, NS>, + pub dst: Option>, +} + +impl<'a, MP: DerefMut, NS: NodeSegmentOps> NodeWriters<'a, MP, NS> { + pub fn get_mut_src(&mut self) -> &mut NodeWriter<'a, MP, NS> { + &mut self.src + } + + pub fn get_mut_dst(&mut self) -> &mut NodeWriter<'a, MP, NS> { + self.dst.as_mut().unwrap_or(&mut self.src) + } + + pub fn set_lsn(&mut self, lsn: LSN) { + self.src.set_lsn(lsn); + if let Some(dst) = &mut self.dst { + dst.set_lsn(lsn); + } + } +} diff --git a/db4-storage/src/pages/node_store.rs b/db4-storage/src/pages/node_store.rs new file mode 100644 index 0000000000..b28420d1b3 --- /dev/null +++ b/db4-storage/src/pages/node_store.rs @@ -0,0 +1,647 @@ +use super::{node_page::writer::NodeWriter, resolve_pos}; +use crate::{ + LocalPOS, + api::nodes::{LockedNSSegment, NodeSegmentOps}, + error::StorageError, + pages::{ + SegmentCounts, + layer_counter::GraphStats, + locked::nodes::{LockedNodePage, WriteLockedNodePages}, + row_group_par_iter, + }, + persist::{config::ConfigOps, strategy::PersistenceStrategy}, + segments::node::segment::MemNodeSegment, +}; +use parking_lot::{RwLock, RwLockWriteGuard}; +use raphtory_api::core::entities::{GidType, LayerId, properties::meta::Meta}; +use raphtory_core::{ + entities::{EID, VID}, + storage::timeindex::AsTime, +}; +use rayon::prelude::*; +use std::{ + collections::HashMap, + ops::Deref, + path::{Path, PathBuf}, + sync::{Arc, LazyLock, atomic::AtomicU32}, +}; + +// graph // (nodes|edges) // graph segments // layers // chunks +pub static N: LazyLock = LazyLock::new(|| rayon::current_num_threads()); + +#[derive(Debug)] +pub struct NodeStorageInner { + segments: boxcar::Vec>, + stats: Arc, + + /// Contains ids of segments that can accomodate new nodes. + free_segments: Box<[RwLock]>, + + nodes_path: Option, + node_meta: Arc, + edge_meta: Arc, + ext: EXT, +} + +#[derive(Debug)] +pub struct ReadLockedNodeStorage, EXT> { + storage: Arc>, + locked_segments: Box<[NS::ArcLockedSegment]>, +} + +impl, EXT: PersistenceStrategy> + ReadLockedNodeStorage +{ + pub fn node_ref( + &self, + node: impl Into, + ) -> <::ArcLockedSegment as LockedNSSegment>::EntryRef<'_> { + let (segment_id, pos) = self.storage.resolve_pos(node); + let locked_segment = &self.locked_segments[segment_id]; + locked_segment.entry_ref(pos) + } + + pub fn try_node_ref( + &self, + node: VID, + ) -> Option<<::ArcLockedSegment as LockedNSSegment>::EntryRef<'_>> { + let (segment_id, pos) = self.storage.resolve_pos(node); + let locked_segment = &self.locked_segments.get(segment_id)?; + if pos.0 < locked_segment.num_nodes() { + Some(locked_segment.entry_ref(pos)) + } else { + None + } + } + + pub fn len(&self) -> usize { + self.storage.num_nodes() + } + + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + pub fn iter( + &self, + ) -> impl Iterator< + Item = <::ArcLockedSegment as LockedNSSegment>::EntryRef<'_>, + > + '_ { + self.locked_segments + .iter() + .flat_map(move |segment| segment.iter_entries()) + } + + pub fn segment_counts(&self) -> SegmentCounts { + SegmentCounts::new( + self.storage.max_segment_len(), + self.locked_segments.iter().map(|seg| seg.num_nodes()), + ) + } + + pub fn par_iter( + &self, + ) -> impl rayon::iter::ParallelIterator< + Item = <::ArcLockedSegment as LockedNSSegment>::EntryRef<'_>, + > + '_ { + self.locked_segments + .par_iter() + .flat_map(move |segment| segment.par_iter_entries()) + } + + pub fn row_groups_par_iter( + &self, + ) -> impl IndexedParallelIterator + '_)> { + let max_actual_seg_len = self + .locked_segments + .iter() + .map(|seg| seg.num_nodes()) + .max() + .unwrap_or(0); + row_group_par_iter( + self.storage.max_segment_len() as usize, + self.locked_segments.len(), + self.storage.max_segment_len(), + max_actual_seg_len, + ) + .map(|(s_id, iter)| (s_id, iter.filter(|vid| self.has_vid(*vid)))) + } + + fn has_vid(&self, vid: VID) -> bool { + let (segment_id, pos) = self.storage.resolve_pos(vid); + segment_id < self.locked_segments.len() + && pos.0 < self.locked_segments[segment_id].num_nodes() + } +} + +impl, EXT: PersistenceStrategy> + NodeStorageInner +{ + pub fn prop_meta(&self) -> &Arc { + &self.node_meta + } + + pub fn num_layers(&self) -> usize { + self.stats.len() + } + + pub fn num_nodes(&self) -> usize { + self.stats.get(LayerId(0)) + } + + // FIXME: this should be called by the high level APIs on layer filter + pub fn layer_num_nodes(&self, layer_id: usize) -> usize { + self.stats.get(LayerId(layer_id)) + } + + pub fn stats(&self) -> &Arc { + &self.stats + } + + pub fn segments_iter(&self) -> impl Iterator { + let count = self.segments.count(); + (0..count).map(|id| { + self.get_segment(id) + .expect("segment should exist given count") + }) + } + + pub fn num_segments(&self) -> usize { + self.segments.count() + } + + // pub fn segments(&self) -> &boxcar::Vec> { + // &self.segments + // } + + pub fn segments_par_iter(&self) -> impl ParallelIterator { + let len = self.segments.count(); + (0..len) + .into_par_iter() + .filter_map(|idx| self.segments.get(idx).map(|seg| seg.deref())) + } + + pub fn nodes_path(&self) -> Option<&Path> { + self.nodes_path.as_deref() + } + + /// Return the position of the chunk and the position within the chunk + pub fn resolve_pos(&self, i: impl Into) -> (usize, LocalPOS) { + resolve_pos(i.into(), self.max_segment_len()) + } + + pub fn max_segment_len(&self) -> u32 { + self.ext.config().max_node_page_len() + } +} + +impl, EXT: PersistenceStrategy> + NodeStorageInner +{ + pub fn new_with_meta( + nodes_path: Option, + node_meta: Arc, + edge_meta: Arc, + ext: EXT, + ) -> Self { + let free_segments = (0..(*N)).map(RwLock::new).collect::>(); + let empty = Self { + segments: boxcar::Vec::new(), + stats: GraphStats::new().into(), + free_segments: free_segments.try_into().unwrap(), + nodes_path, + node_meta, + edge_meta, + ext, + }; + let layer_mapper = empty.node_meta.layer_meta(); + let prop_mapper = empty.node_meta.temporal_prop_mapper(); + let metadata_mapper = empty.node_meta.metadata_mapper(); + if layer_mapper.num_fields() > 0 + || prop_mapper.num_fields() > 0 + || metadata_mapper.num_fields() > 0 + { + let segment = empty.get_or_create_segment(0); + let mut head = segment.head_mut(); + if prop_mapper.num_fields() > 0 { + head.get_or_create_layer(LayerId(0)) + .properties_mut() + .set_has_properties() + } + segment.set_dirty(true); + } + empty + } + + pub fn locked(self: &Arc) -> ReadLockedNodeStorage { + let locked_segments = self + .segments_iter() + .map(|segment| segment.locked()) + .collect::>(); + ReadLockedNodeStorage { + storage: self.clone(), + locked_segments, + } + } + + pub fn write_locked<'a>(&'a self) -> WriteLockedNodePages<'a, NS> { + WriteLockedNodePages::new( + self.segments + .iter() + .map(|(page_id, page)| { + LockedNodePage::new( + page_id, + &self.stats, + self.max_segment_len(), + page.as_ref(), + page.head_mut(), + ) + }) + .collect(), + ) + } + + pub fn reserve_vid(&self, row: usize) -> VID { + let (seg, pos) = self.reserve_free_pos(row); + pos.as_vid(seg, self.max_segment_len()) + } + + pub fn reserve_free_pos(&self, row: usize) -> (usize, LocalPOS) { + let slot_idx = row % *N; + let maybe_free_page = { + let page_id = *self.free_segments[slot_idx].read_recursive(); + let page = self.segments.get(page_id); + + page.and_then(|page| { + self.reserve_segment_row(page) + .map(|pos| (page.segment_id(), LocalPOS(pos))) + }) + }; + + if let Some(reserved_pos) = maybe_free_page { + reserved_pos + } else { + // not lucky, go wait on your slot + let mut slot = self.free_segments[slot_idx].write(); + loop { + if let Some(page) = self.segments.get(*slot) + && let Some(pos) = self.reserve_segment_row(page) + { + return (page.segment_id(), LocalPOS(pos)); + } + *slot = self.push_new_segment(); + } + } + } + + /// Select a segment using `row` as a hint and reserves `num_rows` in that segment. + /// Returns the reserved position and a locked writer for that segment. + /// + /// # Deadlock Safety: do not hold any node segment locks when calling this function! + pub fn reserve_and_lock_segment( + &self, + row: usize, + num_rows: u32, + ) -> ( + LocalPOS, + NodeWriter<'_, RwLockWriteGuard<'_, MemNodeSegment>, NS>, + ) { + let slot_idx = row % *N; + // No point in multiple threads getting past here as they would just content on the writer lock + let mut slot = self.free_segments[slot_idx].write(); + let mut segment_id = *slot; + + let writer = self.writer(segment_id); + match self.reserve_segment_rows(writer.page, num_rows) { + None => { + // The current segment is full, drop its lock and push a new free segment + drop(writer); + segment_id = self.push_new_segment(); + *slot = segment_id; + let writer = self.writer(segment_id); + let local_pos = self + .reserve_segment_rows(writer.page, num_rows) + .expect("new segment should never be full"); + (LocalPOS(local_pos), writer) + } + Some(local_pos) => (LocalPOS(local_pos), writer), + } + } + + /// Reserves a single row in the given segment and returns the position if successful. + /// Returns `None` if the segment is full. + pub fn reserve_segment_row(&self, segment: &NS) -> Option { + self.reserve_segment_rows(segment, 1) + } + + /// Reserves `rows` in the given segment and returns the position if successful. + /// Returns `None` if the segment is full. + fn reserve_segment_rows(&self, segment: &NS, rows: u32) -> Option { + increment_and_clamp(segment.nodes_counter(), rows, self.max_segment_len()) + } + + fn push_new_segment(&self) -> usize { + let segment_id = self.segments.push_with(|segment_id| { + Arc::new(NS::new( + segment_id, + self.node_meta.clone(), + self.edge_meta.clone(), + self.nodes_path.clone(), + self.ext.clone(), + )) + }); + + while self.segments.get(segment_id).is_none() { + std::thread::yield_now(); + } + + segment_id + } + + pub fn node<'a>(&'a self, node: impl Into) -> NS::Entry<'a> { + let (page_id, pos) = self.resolve_pos(node); + let node_page = self + .get_segment(page_id) + .expect("Internal error: page not found"); + node_page.entry(pos) + } + + pub fn try_node(&self, node: VID) -> Option> { + let (page_id, pos) = self.resolve_pos(node); + let node_page = self.segments.get(page_id)?; + if pos.0 < node_page.num_nodes() { + Some(node_page.entry(pos)) + } else { + None + } + } + + #[inline(always)] + pub fn writer<'a>( + &'a self, + segment_id: usize, + ) -> NodeWriter<'a, RwLockWriteGuard<'a, MemNodeSegment>, NS> { + let segment = self.get_or_create_segment(segment_id); + let head = segment.head_mut(); + NodeWriter::new(segment, &self.stats, head) + } + + pub fn try_writer<'a>( + &'a self, + segment_id: usize, + ) -> Option, NS>> { + let segment = self.get_or_create_segment(segment_id); + let head = segment.try_head_mut()?; + Some(NodeWriter::new(segment, &self.stats, head)) + } + + pub fn id_type(&self) -> Option { + self.node_meta + .metadata_mapper() + .d_types() + .first() + .and_then(GidType::from_prop_type) + } + + pub fn load( + nodes_path: impl AsRef, + edge_meta: Arc, + ext: EXT, + ) -> Result { + let nodes_path = nodes_path.as_ref(); + let max_page_len = ext.config().max_node_page_len(); + let node_meta = Arc::new(Meta::new_for_nodes()); + + if !nodes_path.exists() { + return Ok(Self::new_with_meta( + Some(nodes_path.to_path_buf()), + node_meta, + edge_meta, + ext.clone(), + )); + } + + let mut pages = std::fs::read_dir(nodes_path)? + .par_bridge() + .filter(|entry| { + entry + .as_ref() + .ok() + .and_then(|entry| entry.file_type().ok().map(|ft| ft.is_dir())) + .unwrap_or_default() + }) + .filter_map(|entry| { + let entry = entry.ok()?; + let page_id = entry + .path() + .file_stem() + .and_then(|name| name.to_str().and_then(|name| name.parse::().ok()))?; + let page = NS::load( + page_id, + node_meta.clone(), + edge_meta.clone(), + nodes_path, + ext.clone(), + ) + .map(|page| (page_id, page)); + Some(page) + }) + .collect::, _>>()?; + + if pages.is_empty() { + return Err(StorageError::EmptyGraphDir(nodes_path.to_path_buf())); + } + + let max_page = Iterator::max(pages.keys().copied()).unwrap(); + + let pages = (0..=max_page) + .map(|page_id| { + let np = pages.remove(&page_id).unwrap_or_else(|| { + NS::new( + page_id, + node_meta.clone(), + edge_meta.clone(), + Some(nodes_path.to_path_buf()), + ext.clone(), + ) + }); + Arc::new(np) + }) + .collect::>(); + + let first_page = pages.iter().next().unwrap().1; + let first_p_id = first_page.segment_id(); + + if first_p_id != 0 { + return Err(StorageError::GenericFailure(format!( + "First page id is not 0 in {nodes_path:?}" + ))); + } + + let mut layer_counts = vec![]; + + for (_, page) in pages.iter() { + for layer_id in 0..page.num_layers() { + let count = page.layer_count(LayerId(layer_id)) as usize; + if layer_counts.len() <= layer_id { + layer_counts.resize(layer_id + 1, 0); + } + layer_counts[layer_id] += count; + } + } + + let earliest = pages + .iter() + .filter_map(|(_, page)| page.earliest().filter(|t| t.t() != i64::MAX)) + .map(|t| t.t()) + .min() + .unwrap_or(i64::MAX); + + let latest = pages + .iter() + .filter_map(|(_, page)| page.latest().filter(|t| t.t() != i64::MIN)) + .map(|t| t.t()) + .max() + .unwrap_or(i64::MIN); + + let mut free_pages = pages + .iter() + .filter_map(|(_, page)| { + let len = page.num_nodes(); + if len < max_page_len { + Some(RwLock::new(page.segment_id())) + } else { + None + } + }) + .collect::>(); + + let mut next_free_page = free_pages + .last() + .map(|page| *(page.read())) + .map(|last| last + 1) + .unwrap_or_else(|| pages.count()); + + free_pages.resize_with(*N, || { + let lock = RwLock::new(next_free_page); + next_free_page += 1; + lock + }); + + let stats = GraphStats::load(layer_counts, earliest, latest); + + Ok(Self { + segments: pages, + free_segments: free_pages.try_into().unwrap(), + nodes_path: Some(nodes_path.to_path_buf()), + stats: stats.into(), + node_meta, + edge_meta, + ext, + }) + } + + pub fn get_edge(&self, src: VID, dst: VID, layer_id: LayerId) -> Option { + let (src_chunk, src_pos) = self.resolve_pos(src); + if src_chunk >= self.segments.count() { + return None; + } + let src_page = &self.segments[src_chunk]; + src_page.get_out_edge(src_pos, dst, layer_id, src_page.head()) + } + + pub fn grow(&self, new_len: usize) { + self.get_or_create_segment(new_len - 1); + } + + pub fn get_segment(&self, segment_id: usize) -> Option<&NS> { + self.segments + .get(segment_id) + .map(|seg| seg.deref()) + .or_else(|| { + let count = self.segments.count(); + if segment_id < count { + // Another thread has allocated the segment, wait for it to be added. + Some(self.wait_for_segment(segment_id).deref()) + } else { + None + } + }) + } + + fn wait_for_segment(&self, segment_id: usize) -> &Arc { + loop { + if let Some(segment) = self.segments.get(segment_id) { + return segment; + } else { + // Wait for the segment to be created. + std::thread::yield_now(); + } + } + } + + pub fn get_or_create_segment(&self, segment_id: usize) -> &Arc { + if let Some(segment) = self.segments.get(segment_id) { + return segment; + } + + let count = self.segments.count(); + + if segment_id < count { + // Another thread has allocated the segment, wait for it to be added. + self.wait_for_segment(segment_id) + } else { + // we need to create the segment. + self.segments.reserve(segment_id + 1 - count); + + loop { + // Create consecutive segments until the required segment is created. + let new_segment_id = self.segments.push_with(|segment_id| { + Arc::new(NS::new( + segment_id, + self.node_meta.clone(), + self.edge_meta.clone(), + self.nodes_path.clone(), + self.ext.clone(), + )) + }); + + // The segment has been created. + if segment_id <= new_segment_id { + return self.wait_for_segment(segment_id); + } + } + } + } + + pub(crate) fn segment_counts(&self) -> SegmentCounts { + SegmentCounts::new( + self.max_segment_len(), + self.segments_iter().map(|seg| seg.num_nodes()), + ) + } + + pub(crate) fn flush(&self) -> Result<(), StorageError> { + self.segments_par_iter().try_for_each(|seg| seg.flush()) + } +} + +/// Atomically increments `counter` and returns the previous value, but only if the result stays +/// within bounds. +/// If the result exceeds `limit`, leaves the counter unchanged and returns `None`. +pub fn increment_and_clamp(counter: &AtomicU32, increment: u32, limit: u32) -> Option { + counter + .fetch_update( + std::sync::atomic::Ordering::Relaxed, + std::sync::atomic::Ordering::Relaxed, + |current| { + let updated = current + increment; + if updated <= limit { + Some(updated) + } else { + None + } + }, + ) + .ok() +} diff --git a/db4-storage/src/pages/session.rs b/db4-storage/src/pages/session.rs new file mode 100644 index 0000000000..6beed825ad --- /dev/null +++ b/db4-storage/src/pages/session.rs @@ -0,0 +1,207 @@ +use super::{ + GraphStore, edge_page::writer::EdgeWriter, node_page::writer::NodeWriters, resolve_pos, +}; +use crate::{ + api::{edges::EdgeSegmentOps, graph_props::GraphPropSegmentOps, nodes::NodeSegmentOps}, + persist::strategy::PersistenceStrategy, + segments::{edge::segment::MemEdgeSegment, node::segment::MemNodeSegment}, + wal::LSN, +}; +use parking_lot::RwLockWriteGuard; +use raphtory_api::core::{ + entities::properties::{meta::STATIC_GRAPH_LAYER_ID, prop::Prop}, + storage::dict_mapper::MaybeNew, +}; +use raphtory_core::{ + entities::{EID, ELID, VID}, + storage::timeindex::AsTime, +}; + +pub struct EdgeWriteSession< + 'a, + NS: NodeSegmentOps, + ES: EdgeSegmentOps, + GS: GraphPropSegmentOps, + EXT: PersistenceStrategy, +> { + node_writers: NodeWriters<'a, RwLockWriteGuard<'a, MemNodeSegment>, NS>, + edge_writer: EdgeWriter<'a, RwLockWriteGuard<'a, MemEdgeSegment>, ES>, + graph: &'a GraphStore, +} + +impl< + 'a, + NS: NodeSegmentOps, + ES: EdgeSegmentOps, + GS: GraphPropSegmentOps, + EXT: PersistenceStrategy, +> EdgeWriteSession<'a, NS, ES, GS, EXT> +{ + pub fn new( + node_writers: NodeWriters<'a, RwLockWriteGuard<'a, MemNodeSegment>, NS>, + edge_writer: EdgeWriter<'a, RwLockWriteGuard<'a, MemEdgeSegment>, ES>, + graph: &'a GraphStore, + ) -> Self { + Self { + node_writers, + edge_writer, + graph, + } + } + + pub fn add_edge_into_layer( + &mut self, + t: T, + src: impl Into, + dst: impl Into, + edge: MaybeNew, + props: impl IntoIterator, + ) { + let src = src.into(); + let dst = dst.into(); + let e_id = edge.inner(); + let layer = e_id.layer(); + + // assert!(layer > 0, "Edge must be in a layer greater than 0"); + + let (_, src_pos) = self.graph.nodes().resolve_pos(src); + let (_, dst_pos) = self.graph.nodes().resolve_pos(dst); + + let edge_max_page_len = self + .edge_writer + .writer + .get_or_create_layer(layer) + .max_page_len(); + let (_, edge_pos) = resolve_pos(e_id.edge, edge_max_page_len); + + self.edge_writer + .add_edge(t, edge_pos, src, dst, props, layer); + + let edge_id = edge.inner(); + + if edge.is_new() + || self + .node_writers + .get_mut_src() + .get_out_edge(src_pos, dst, edge_id.layer()) + .is_none() + { + self.node_writers + .get_mut_src() + .add_outbound_edge(Some(t), src_pos, dst, edge_id); + self.node_writers + .get_mut_dst() + .add_inbound_edge(Some(t), dst_pos, src, edge_id); + } + + self.node_writers + .get_mut_src() + .update_timestamp(t, src_pos, e_id); + self.node_writers + .get_mut_dst() + .update_timestamp(t, dst_pos, e_id); + } + + pub fn delete_edge_from_layer( + &mut self, + t: T, + src: impl Into, + dst: impl Into, + edge: MaybeNew, + ) { + let src = src.into(); + let dst = dst.into(); + let e_id = edge.inner(); + let layer = e_id.layer(); + + // assert!(layer > 0, "Edge must be in a layer greater than 0"); + + let (_, src_pos) = self.graph.nodes().resolve_pos(src); + let (_, dst_pos) = self.graph.nodes().resolve_pos(dst); + + let edge_max_page_len = self + .edge_writer + .writer + .get_or_create_layer(layer) + .max_page_len(); + let (_, edge_pos) = resolve_pos(e_id.edge, edge_max_page_len); + + self.edge_writer.delete_edge(t, edge_pos, src, dst, layer); + + let edge_id = edge.inner(); + + if edge_id.layer() > STATIC_GRAPH_LAYER_ID { + if edge.is_new() + || self + .node_writers + .get_mut_src() + .get_out_edge(src_pos, dst, edge_id.layer()) + .is_none() + { + self.node_writers + .get_mut_src() + .add_outbound_edge(Some(t), src_pos, dst, edge_id); + + self.node_writers + .get_mut_dst() + .add_inbound_edge(Some(t), dst_pos, src, edge_id); + } + + self.node_writers + .get_mut_src() + .update_deletion_time(t, src_pos, e_id); + + self.node_writers + .get_mut_dst() + .update_deletion_time(t, dst_pos, e_id); + } + } + + pub fn add_static_edge(&mut self, src: impl Into, dst: impl Into) -> MaybeNew { + let src = src.into(); + let dst = dst.into(); + + let (_, src_pos) = self.graph.nodes().resolve_pos(src); + let (_, dst_pos) = self.graph.nodes().resolve_pos(dst); + + let existing_eid = + self.node_writers + .get_mut_src() + .get_out_edge(src_pos, dst, STATIC_GRAPH_LAYER_ID); + + // Edge already exists, so no need to add it again. + if let Some(eid) = existing_eid { + return MaybeNew::Existing(eid); + } + + let edge_pos = None; + let already_counted = false; + let edge_pos = self + .edge_writer + .add_static_edge(edge_pos, src, dst, already_counted); + let edge_id = edge_pos.as_eid( + self.edge_writer.segment_id(), + self.graph.edges().max_page_len(), + ); + + self.node_writers + .get_mut_src() + .add_static_outbound_edge(src_pos, dst, edge_id); + self.node_writers + .get_mut_dst() + .add_static_inbound_edge(dst_pos, src, edge_id); + + MaybeNew::New(edge_id) + } + + pub fn node_writers( + &mut self, + ) -> &mut NodeWriters<'a, RwLockWriteGuard<'a, MemNodeSegment>, NS> { + &mut self.node_writers + } + + pub fn set_lsn(&mut self, lsn: LSN) { + self.node_writers.set_lsn(lsn); + self.edge_writer.set_lsn(lsn); + } +} diff --git a/db4-storage/src/pages/test_utils/fixtures.rs b/db4-storage/src/pages/test_utils/fixtures.rs new file mode 100644 index 0000000000..bd650cdf26 --- /dev/null +++ b/db4-storage/src/pages/test_utils/fixtures.rs @@ -0,0 +1,167 @@ +use super::props::{make_props, prop_type}; +use proptest::{collection, prelude::*}; +use raphtory_api::core::entities::{LayerId, properties::prop::Prop}; +use raphtory_core::entities::VID; +use std::{collections::HashMap, ops::Range}; + +pub type AddEdge = ( + VID, + VID, + i64, + Vec<(String, Prop)>, + Vec<(String, Prop)>, + Option<&'static str>, +); + +#[derive(Debug)] +pub struct NodeFixture { + pub temp_props: Vec<(VID, i64, Vec<(String, Prop)>)>, + pub const_props: Vec<(VID, Vec<(String, Prop)>)>, +} + +#[derive(Debug)] +pub struct Fixture { + pub edges: Vec, + pub const_props: HashMap<(VID, VID), Vec<(String, Prop)>>, +} + +impl From> for Fixture { + fn from(edges: Vec) -> Self { + let mut const_props = HashMap::new(); + for (src, dst, _, _, c_props, _) in &edges { + for (k, v) in c_props { + const_props + .entry((*src, *dst)) + .or_insert_with(|| vec![]) + .push((k.clone(), v.clone())); + } + } + const_props.iter_mut().for_each(|(_, v)| { + v.sort_by(|a, b| a.0.cmp(&b.0)); + v.dedup_by(|a, b| a.0 == b.0); + }); + Self { edges, const_props } + } +} + +pub fn make_edges(num_edges: usize, num_nodes: usize) -> impl Strategy { + assert!(num_edges > 0); + assert!(num_nodes > 0); + (1..=num_edges, 1..=num_nodes) + .prop_flat_map(|(len, num_nodes)| build_raw_edges(len, num_nodes)) + .prop_map(|edges| edges.into()) +} + +pub type PropsFixture = (Vec<(i64, Vec<(String, Prop)>)>, Vec<(String, Prop)>); + +pub fn make_props_strat(num_props: Range) -> impl Strategy { + let schema = proptest::collection::hash_map( + (0i32..10).prop_map(|i| i.to_string()), + prop_type(), + num_props.clone(), + ); + + schema.prop_flat_map(move |schema| { + let (t_props, c_props) = make_props(&schema); + let temp_props = proptest::collection::vec((0i64..1000, t_props), num_props.clone()); + + temp_props.prop_flat_map(move |temp_props| { + c_props + .clone() + .prop_map(move |const_props| (temp_props.clone(), const_props)) + }) + }) +} + +pub fn make_nodes(num_nodes: usize) -> impl Strategy { + assert!(num_nodes > 0); + let schema = + proptest::collection::hash_map((0i32..10).prop_map(|i| i.to_string()), prop_type(), 0..30); + + schema.prop_flat_map(move |schema| { + let (t_props, c_props) = make_props(&schema); + let temp_props = proptest::collection::vec( + ((0..num_nodes).prop_map(VID), 0i64..1000, t_props), + 1..=num_nodes, + ); + + let const_props = + proptest::collection::vec(((0..num_nodes).prop_map(VID), c_props), 1..=num_nodes); + + let const_props = const_props.prop_map(|mut nodes_with_const| { + nodes_with_const.sort_by(|(vid, _), (vid2, _)| vid.cmp(vid2)); + nodes_with_const + .chunk_by(|(vid, _), (vid2, _)| *vid == *vid2) + .map(|stuff| { + let props = stuff + .iter() + .flat_map(|(_, values)| values.clone()) + .collect::>(); + let vid = stuff[0].0; + (vid, props.into_iter().collect::>()) + }) + .collect() + }); + + (temp_props, const_props).prop_map(|(temp_props, const_props)| NodeFixture { + temp_props, + const_props, + }) + }) +} + +pub fn edges_strat(size: usize) -> impl Strategy> { + (1..=size).prop_flat_map(|num_nodes| { + let num_edges = 0..(num_nodes * num_nodes); + let srcs = (0usize..num_nodes).prop_map(VID); + let dsts = (0usize..num_nodes).prop_map(VID); + num_edges.prop_flat_map(move |num_edges| { + collection::vec((srcs.clone(), dsts.clone()), num_edges) + }) + }) +} + +pub fn edges_strat_with_layers( + size: usize, +) -> impl Strategy)>> { + const MAX_LAYERS: usize = 16; + + (1..=size).prop_flat_map(|num_nodes| { + let num_edges = 0..(num_nodes * num_nodes); + let srcs = (0usize..num_nodes).prop_map(VID); + let dsts = (0usize..num_nodes).prop_map(VID); + let layer_ids = (1usize..MAX_LAYERS).prop_map(|id| Some(LayerId(id))); + + num_edges.prop_flat_map(move |num_edges| { + collection::vec((srcs.clone(), dsts.clone(), layer_ids.clone()), num_edges) + }) + }) +} + +pub type EdgeValues = ( + VID, + VID, + i64, + Vec<(String, Prop)>, + Vec<(String, Prop)>, + Option<&'static str>, +); + +pub fn build_raw_edges(len: usize, num_nodes: usize) -> impl Strategy> { + proptest::collection::hash_map((0i32..1000).prop_map(|i| i.to_string()), prop_type(), 0..20) + .prop_flat_map(move |schema| { + let (t_props, c_props) = make_props(&schema); + + proptest::collection::vec( + ( + (0..num_nodes).prop_map(VID), + (0..num_nodes).prop_map(VID), + 0i64..(num_nodes as i64 * 5), + t_props, + c_props, + proptest::sample::select(vec![Some("a"), Some("b"), None]), + ), + 1..=len, + ) + }) +} diff --git a/db4-storage/src/pages/test_utils/mod.rs b/db4-storage/src/pages/test_utils/mod.rs new file mode 100644 index 0000000000..e36d69e73b --- /dev/null +++ b/db4-storage/src/pages/test_utils/mod.rs @@ -0,0 +1,5 @@ +mod fixtures; +mod props; + +pub use fixtures::*; +pub use props::*; diff --git a/db4-storage/src/pages/test_utils/props.rs b/db4-storage/src/pages/test_utils/props.rs new file mode 100644 index 0000000000..51153167a8 --- /dev/null +++ b/db4-storage/src/pages/test_utils/props.rs @@ -0,0 +1,136 @@ +use bigdecimal::BigDecimal; +use chrono::{DateTime, NaiveDateTime, Utc}; +use itertools::Itertools; +use proptest::prelude::*; +use raphtory_api::core::entities::properties::prop::{DECIMAL_MAX, Prop, PropArray, PropType}; +use std::collections::HashMap; + +pub fn prop_type() -> impl Strategy { + let leaf = proptest::sample::select(&[ + PropType::Str, + PropType::I64, + PropType::F64, + PropType::F32, + PropType::I32, + PropType::U8, + PropType::Bool, + PropType::DTime, + PropType::NDTime, + PropType::Decimal { scale: 7 }, // decimal breaks the tests because of polars-parquet + ]); + + leaf.prop_recursive(3, 10, 10, |inner| { + let keys = (0..1_000_000).prop_map(|i| format!("k_{i}")); + let dict = + proptest::collection::hash_map(keys, inner.clone(), 1..10).prop_map(PropType::map); + let list = inner + .clone() + .prop_map(|p_type| PropType::List(Box::new(p_type))); + prop_oneof![inner, list, dict] + }) +} + +pub fn make_props( + schema: &HashMap, +) -> ( + BoxedStrategy>, + BoxedStrategy>, +) { + let mut iter = schema.iter(); + + // split in half, one temporal one constant + let t_prop_s = (&mut iter) + .take(schema.len() / 2) + .map(|(k, v)| (k.clone(), v.clone())) + .collect::>(); + let c_prop_s = iter + .map(|(k, v)| (k.clone(), v.clone())) + .collect::>(); + + let num_tprops = t_prop_s.len(); + let num_cprops = c_prop_s.len(); + + let t_props = proptest::sample::subsequence(t_prop_s, 0..=num_tprops).prop_flat_map(|schema| { + schema + .into_iter() + .map(|(k, v)| prop(&v).prop_map(move |prop| (k.clone(), prop))) + .collect::>() + }); + let c_props = proptest::sample::subsequence(c_prop_s, 0..=num_cprops).prop_flat_map(|schema| { + schema + .into_iter() + .map(|(k, v)| prop(&v).prop_map(move |prop| (k.clone(), prop))) + .collect::>() + }); + (t_props.boxed(), c_props.boxed()) +} + +pub(crate) fn prop(p_type: &PropType) -> impl Strategy + use<> { + match p_type { + PropType::Str => (0i32..1000).prop_map(|s| Prop::str(s.to_string())).boxed(), + PropType::I64 => any::().prop_map(Prop::I64).boxed(), + PropType::I32 => any::().prop_map(Prop::I32).boxed(), + PropType::F64 => any::().prop_map(Prop::F64).boxed(), + PropType::F32 => any::().prop_map(Prop::F32).boxed(), + PropType::U8 => any::().prop_map(Prop::U8).boxed(), + PropType::Bool => any::().prop_map(Prop::Bool).boxed(), + PropType::DTime => (1900..2024, 1..=12, 1..28, 0..24, 0..60, 0..60) + .prop_map(|(year, month, day, h, m, s)| { + Prop::DTime( + format!( + "{:04}-{:02}-{:02}T{:02}:{:02}:{:02}Z", + year, month, day, h, m, s + ) + .parse::>() + .unwrap(), + ) + }) + .boxed(), + PropType::NDTime => (1970..2024, 1..=12, 1..28, 0..24, 0..60, 0..60) + .prop_map(|(year, month, day, h, m, s)| { + // 2015-09-18T23:56:04 + Prop::NDTime( + format!( + "{:04}-{:02}-{:02}T{:02}:{:02}:{:02}", + year, month, day, h, m, s + ) + .parse::() + .unwrap(), + ) + }) + .boxed(), + // TODO: empty lists are a type nightmare + PropType::List(p_type) => proptest::collection::vec(prop(p_type), 1..10) + .prop_map(|props| Prop::List(PropArray::Vec(props.into()))) + .boxed(), + PropType::Map(p_types) => { + let prop_types: Vec> = p_types + .iter() + .map(|(a, b)| (a.clone(), b.clone())) + .collect::>() + .into_iter() + .map(|(name, p_type)| { + prop(&p_type) + .prop_map(move |prop| (name.clone(), prop.clone())) + .boxed() + }) + .collect_vec(); + + let props = proptest::sample::select(prop_types).prop_flat_map(|prop| prop); + + proptest::collection::vec(props, 1..10) + .prop_map(Prop::map) + .boxed() + } + PropType::Decimal { scale } => { + let scale = *scale; + let dec_max = DECIMAL_MAX; + ((scale as i128)..dec_max) + .prop_map(move |int| Prop::Decimal(BigDecimal::new(int.into(), scale))) + .boxed() + } + pt => { + panic!("Unsupported prop type: {:?}", pt); + } + } +} diff --git a/db4-storage/src/persist/config.rs b/db4-storage/src/persist/config.rs new file mode 100644 index 0000000000..80435eaa16 --- /dev/null +++ b/db4-storage/src/persist/config.rs @@ -0,0 +1,167 @@ +use crate::error::StorageError; +use clap::{ + Args, Command, + error::{ContextKind, ContextValue}, +}; +use serde::{Deserialize, Serialize, de::DeserializeOwned}; +use std::{iter, path::Path}; +use tracing::error; + +pub const DEFAULT_MAX_PAGE_LEN_NODES: u32 = 131_072; // 2^17 +pub const DEFAULT_MAX_PAGE_LEN_EDGES: u32 = 1_048_576; // 2^20 + +const CONFIG_FILE_NAME: &str = "config.json"; + +pub trait ConfigOps: Serialize + DeserializeOwned + Args + Sized { + fn max_node_page_len(&self) -> u32; + + fn max_edge_page_len(&self) -> u32; + + fn node_types(&self) -> &[String]; + + fn with_max_node_page_len(self, page_len: u32) -> Self; + + fn with_max_edge_page_len(self, page_len: u32) -> Self; + + fn with_node_types(&self, node_types: impl IntoIterator>) -> Self; + + fn load_from_dir(dir: &Path) -> Result { + let config_file = dir.join(CONFIG_FILE_NAME); + let config_file = std::fs::File::open(config_file)?; + let config = serde_json::from_reader(config_file)?; + Ok(config) + } + + fn save_to_dir(&self, dir: &Path) -> Result<(), StorageError> { + let config_file = dir.join(CONFIG_FILE_NAME); + let config_file = std::fs::File::create(&config_file)?; + serde_json::to_writer_pretty(config_file, self)?; + Ok(()) + } + + fn update(&mut self, new: Self); +} + +#[derive(Debug, Copy, Clone, Serialize, Deserialize, Args)] +#[serde(default)] +pub struct BaseConfig { + #[arg(long, default_value_t=DEFAULT_MAX_PAGE_LEN_NODES, env="RAPHTORY_MAX_NODE_PAGE_LEN")] + max_node_page_len: u32, + + #[arg(long, default_value_t=DEFAULT_MAX_PAGE_LEN_EDGES, env="RAPHTORY_MAX_EDGE_PAGE_LEN")] + max_edge_page_len: u32, +} + +pub trait ClapDefault: Args { + fn clap_default() -> Self; +} + +fn display_error(err: &clap::Error, cm: &Command) -> String { + if let Some(ContextValue::String(variable)) = err.get(ContextKind::InvalidArg) { + if let Some(ContextValue::String(value)) = err.get(ContextKind::InvalidValue) { + if let Some(arg) = cm.get_arguments().find(|arg| { + arg.get_long().is_some_and(|long| { + variable.starts_with(&format!("--{long}")) + || arg + .get_short() + .is_some_and(|short| variable.starts_with(&format!("-{short}"))) + }) + }) { + if let Some(env) = arg.get_env() { + let id = arg.get_id(); + let env = env.display(); + return format!("Invalid value from environment for '{id}': '{env}={value}'"); + } + } + } + } + err.to_string() +} + +impl ClapDefault for T { + fn clap_default() -> Self { + let cm = Self::augment_args(Command::default().no_binary_name(true)); + cm.clone() + .try_get_matches_from(iter::empty::()) + .and_then(|mut matches| Self::from_arg_matches_mut(&mut matches)) + .unwrap_or_else(|err| { + error!( + "{}, ignoring environment variables.", + display_error(&err, &cm) + ); + // unset environment variables and try again + cm.mut_args(|arg| arg.env(None)) + .try_get_matches_from(iter::empty::()) + .and_then(|mut matches| Self::from_arg_matches_mut(&mut matches)) + .expect("Reading defaults without environment variables should not fail.") + }) + } +} + +impl Default for BaseConfig { + fn default() -> Self { + Self::clap_default() + } +} + +impl BaseConfig { + pub fn new(max_node_page_len: u32, max_edge_page_len: u32) -> Self { + Self { + max_node_page_len, + max_edge_page_len, + } + } +} + +impl ConfigOps for BaseConfig { + fn max_node_page_len(&self) -> u32 { + self.max_node_page_len + } + + fn max_edge_page_len(&self) -> u32 { + self.max_edge_page_len + } + + fn with_max_node_page_len(mut self, page_len: u32) -> Self { + self.max_node_page_len = page_len; + self + } + + fn with_max_edge_page_len(mut self, page_len: u32) -> Self { + self.max_edge_page_len = page_len; + self + } + + fn node_types(&self) -> &[String] { + &[] + } + + fn with_node_types(&self, _node_types: impl IntoIterator>) -> Self { + *self + } + + fn update(&mut self, _new: Self) { + // cannot update page lengths for an existing graph + } +} + +#[cfg(test)] +mod tests { + use crate::persist::config::{ + BaseConfig, DEFAULT_MAX_PAGE_LEN_EDGES, DEFAULT_MAX_PAGE_LEN_NODES, + }; + + #[test_log::test] + fn test_default() { + let default = BaseConfig::default(); + assert_eq!(default.max_edge_page_len, DEFAULT_MAX_PAGE_LEN_EDGES); + assert_eq!(default.max_node_page_len, DEFAULT_MAX_PAGE_LEN_NODES); + } + + #[test] + fn test_deserialize() { + let default: BaseConfig = serde_json::from_str("{}").unwrap(); + assert_eq!(default.max_edge_page_len, DEFAULT_MAX_PAGE_LEN_EDGES); + assert_eq!(default.max_node_page_len, DEFAULT_MAX_PAGE_LEN_NODES); + } +} diff --git a/db4-storage/src/persist/control_file.rs b/db4-storage/src/persist/control_file.rs new file mode 100644 index 0000000000..9c8c942884 --- /dev/null +++ b/db4-storage/src/persist/control_file.rs @@ -0,0 +1,53 @@ +use crate::{error::StorageError, wal::LSN}; +use serde::{Deserialize, Serialize}; +use std::path::Path; + +#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] +pub enum DBState { + Running, + Shutdown, + CrashRecovery, + NotSupported, +} + +// Starting value for `last_checkpoint` in the control file. +pub const LAST_CHECKPOINT_INIT: LSN = 0; + +pub trait ControlFileOps: Sized { + fn load(dir: &Path) -> Result; + + fn save(&self) -> Result<(), StorageError>; + + fn db_state(&self) -> DBState; + + fn last_checkpoint(&self) -> LSN; + + fn set_db_state(&self, state: DBState); + + fn set_checkpoint(&self, lsn: LSN); +} + +#[derive(Debug, Clone)] +pub struct NoControlFile; + +impl ControlFileOps for NoControlFile { + fn load(_dir: &Path) -> Result { + Ok(NoControlFile) + } + + fn save(&self) -> Result<(), StorageError> { + Ok(()) + } + + fn db_state(&self) -> DBState { + DBState::NotSupported + } + + fn last_checkpoint(&self) -> LSN { + 0 + } + + fn set_db_state(&self, state: DBState) {} + + fn set_checkpoint(&self, lsn: LSN) {} +} diff --git a/db4-storage/src/persist/mod.rs b/db4-storage/src/persist/mod.rs new file mode 100644 index 0000000000..7609d5b63e --- /dev/null +++ b/db4-storage/src/persist/mod.rs @@ -0,0 +1,3 @@ +pub mod config; +pub mod control_file; +pub mod strategy; diff --git a/db4-storage/src/persist/strategy.rs b/db4-storage/src/persist/strategy.rs new file mode 100644 index 0000000000..f733365aab --- /dev/null +++ b/db4-storage/src/persist/strategy.rs @@ -0,0 +1,173 @@ +use crate::{ + api::{edges::EdgeSegmentOps, graph_props::GraphPropSegmentOps, nodes::NodeSegmentOps}, + error::StorageError, + persist::{ + config::{BaseConfig, ConfigOps}, + control_file::{ControlFileOps, NoControlFile}, + }, + segments::{ + edge::segment::{EdgeSegmentView, MemEdgeSegment}, + graph_prop::{GraphPropSegmentView, segment::MemGraphPropSegment}, + node::segment::{MemNodeSegment, NodeSegmentView}, + }, + wal::{GraphWalOps, WalOps, no_wal::NoWal}, +}; +use std::{ + fmt::Debug, + ops::DerefMut, + path::Path, + sync::{ + Arc, + atomic::{AtomicUsize, Ordering}, + }, +}; + +pub trait PersistenceStrategy: Debug + Clone + Send + Sync + 'static { + type NS: NodeSegmentOps; + type ES: EdgeSegmentOps; + type GS: GraphPropSegmentOps; + type Wal: WalOps + GraphWalOps; + type Config: ConfigOps; + type ControlFile: ControlFileOps; + + fn new(config: Self::Config, graph_dir: Option<&Path>) -> Result; + + fn load(graph_dir: &Path) -> Result; + + fn load_with_config(graph_dir: &Path, config: Self::Config) -> Result; + + fn config(&self) -> &Self::Config; + + fn config_mut(&mut self) -> &mut Self::Config; + + fn wal(&self) -> &Self::Wal; + + fn control_file(&self) -> &Self::ControlFile; + + /// Called after every write and checks memory limits to decide if a flush is needed + fn persist_node_segment>( + &self, + node_segment: &Self::NS, + writer: MP, + ) where + Self: Sized; + + /// Called after every write and checks memory limits to decide if a flush is needed + fn persist_edge_segment>( + &self, + edge_segment: &Self::ES, + writer: MP, + ) where + Self: Sized; + + fn persist_graph_prop_segment>( + &self, + graph_prop_segment: &Self::GS, + writer: MP, + ) where + Self: Sized; + + /// Indicates whether the strategy persists to disk or not. + fn disk_storage_enabled() -> bool; + + /// Estimated global memory used + fn memory_tracker(&self) -> &Arc; + + fn estimated_size(&self) -> usize { + self.memory_tracker().load(Ordering::Relaxed) + } + + /// Called by bulk loaders to decide if a global flush should be triggered + fn should_flush(&self) -> bool; + fn should_pause(&self) -> bool; +} + +#[derive(Debug, Clone)] +pub struct NoOpStrategy { + config: BaseConfig, + memory_tracker: Arc, + wal: NoWal, + control_file: NoControlFile, +} + +impl PersistenceStrategy for NoOpStrategy { + type NS = NodeSegmentView; + type ES = EdgeSegmentView; + type GS = GraphPropSegmentView; + type Wal = NoWal; + type Config = BaseConfig; + type ControlFile = NoControlFile; + + fn new(config: BaseConfig, _graph_dir: Option<&Path>) -> Result { + Ok(Self { + config, + wal: NoWal, + control_file: NoControlFile, + memory_tracker: Arc::new(AtomicUsize::new(0)), + }) + } + + fn load(_graph_dir: &Path) -> Result { + Err(StorageError::DiskStorageNotSupported) + } + + fn load_with_config(_graph_dir: &Path, _config: Self::Config) -> Result { + Err(StorageError::DiskStorageNotSupported) + } + + fn config(&self) -> &Self::Config { + &self.config + } + + fn config_mut(&mut self) -> &mut Self::Config { + &mut self.config + } + + fn wal(&self) -> &Self::Wal { + &self.wal + } + + fn control_file(&self) -> &Self::ControlFile { + &self.control_file + } + + fn persist_node_segment>( + &self, + _node_page: &Self::NS, + _writer: MP, + ) { + // No operation + } + + fn persist_edge_segment>( + &self, + _edge_page: &Self::ES, + _writer: MP, + ) { + // No operation + } + + fn persist_graph_prop_segment>( + &self, + _graph_segment: &Self::GS, + _writer: MP, + ) { + // No operation + } + + fn disk_storage_enabled() -> bool { + false + } + + fn memory_tracker(&self) -> &Arc { + &self.memory_tracker + } + + fn should_flush(&self) -> bool { + false + } + + fn should_pause(&self) -> bool { + false + } +} diff --git a/db4-storage/src/properties/mod.rs b/db4-storage/src/properties/mod.rs new file mode 100644 index 0000000000..39b361b3ae --- /dev/null +++ b/db4-storage/src/properties/mod.rs @@ -0,0 +1,383 @@ +use crate::error::StorageError; +use arrow_array::{ + ArrayRef, BooleanArray, Decimal128Array, Float32Array, Float64Array, Int32Array, Int64Array, + StringViewArray, TimestampMillisecondArray, UInt8Array, UInt16Array, UInt32Array, UInt64Array, +}; +use arrow_schema::DECIMAL128_MAX_PRECISION; +use bigdecimal::ToPrimitive; +use raphtory_api::core::entities::properties::{ + meta::PropMapper, + prop::{ + AsPropRef, Prop, PropRef, PropType, SerdeArrowList, SerdeArrowMap, + arrow_dtype_from_prop_type, list_array_from_props, struct_array_from_props, + }, +}; +use raphtory_core::{ + entities::{ + ELID, + properties::{props::MetadataError, tcell::TCell, tprop::TPropCell}, + }, + storage::{PropColumn, TColumns, timeindex::EventTime}, +}; +use std::sync::Arc; + +pub mod props_meta_writer; + +#[derive(Debug, Default)] +pub struct Properties { + c_properties: Vec, + + additions: Vec>, + deletions: Vec>, + times_from_props: Vec>>, + + t_properties: TColumns, + earliest: Option, + latest: Option, + has_additions: bool, + has_properties: bool, + has_deletions: bool, + pub additions_count: usize, +} + +pub(crate) struct PropMutEntry<'a> { + row: usize, + properties: &'a mut Properties, +} + +#[derive(Debug, Clone, Copy)] +pub struct PropEntry<'a> { + row: usize, + properties: &'a Properties, +} + +impl Properties { + pub fn est_size(&self) -> usize { + self.t_properties.len() + self.c_properties.len() + } + + pub(crate) fn get_mut_entry(&mut self, row: usize) -> PropMutEntry<'_> { + PropMutEntry { + row, + properties: self, + } + } + + pub(crate) fn get_entry(&self, row: usize) -> PropEntry<'_> { + PropEntry { + row, + properties: self, + } + } + + pub fn earliest(&self) -> Option { + self.earliest + } + + pub fn latest(&self) -> Option { + self.latest + } + + pub fn t_column(&self, prop_id: usize) -> Option<&PropColumn> { + self.t_properties.get(prop_id) + } + + pub fn t_column_mut(&mut self, prop_id: usize) -> Option<&mut PropColumn> { + self.t_properties.get_mut(prop_id) + } + + pub fn c_column(&self, prop_id: usize) -> Option<&PropColumn> { + self.c_properties.get(prop_id) + } + + pub fn num_t_columns(&self) -> usize { + self.t_properties.num_columns() + } + + pub fn num_c_columns(&self) -> usize { + self.c_properties.len() + } + + pub(crate) fn additions(&self, row: usize) -> Option<&TCell> { + self.additions.get(row) + } + + pub(crate) fn deletions(&self, row: usize) -> Option<&TCell> { + self.deletions.get(row) + } + + pub(crate) fn times_from_props(&self, row: usize) -> Option<&TCell>> { + self.times_from_props.get(row) + } + + pub fn has_properties(&self) -> bool { + self.has_properties + } + + pub fn set_has_properties(&mut self) { + self.has_properties = true + } + + pub fn has_additions(&self) -> bool { + self.has_additions + } + + pub fn has_deletions(&self) -> bool { + self.has_deletions + } + + pub(crate) fn column_as_array( + &self, + column: &PropColumn, + col_id: usize, + meta: &PropMapper, + indices: impl Iterator, + ) -> Option { + match column { + PropColumn::Empty(_) => None, + PropColumn::U32(lazy_vec) => Some(Arc::new(UInt32Array::from_iter( + indices.map(|i| lazy_vec.get_opt(i).copied()), + ))), + PropColumn::Bool(lazy_vec) => Some(Arc::new(BooleanArray::from_iter( + indices.map(|i| lazy_vec.get_opt(i).copied()), + ))), + PropColumn::U8(lazy_vec) => Some(Arc::new(UInt8Array::from_iter( + indices.map(|i| lazy_vec.get_opt(i).copied()), + ))), + PropColumn::U16(lazy_vec) => Some(Arc::new(UInt16Array::from_iter( + indices.map(|i| lazy_vec.get_opt(i).copied()), + ))), + PropColumn::U64(lazy_vec) => Some(Arc::new(UInt64Array::from_iter( + indices.map(|i| lazy_vec.get_opt(i).copied()), + ))), + PropColumn::I32(lazy_vec) => Some(Arc::new(Int32Array::from_iter( + indices.map(|i| lazy_vec.get_opt(i).copied()), + ))), + PropColumn::I64(lazy_vec) => Some(Arc::new(Int64Array::from_iter( + indices.map(|i| lazy_vec.get_opt(i).copied()), + ))), + PropColumn::F32(lazy_vec) => Some(Arc::new(Float32Array::from_iter( + indices.map(|i| lazy_vec.get_opt(i).copied()), + ))), + PropColumn::F64(lazy_vec) => Some(Arc::new(Float64Array::from_iter( + indices.map(|i| lazy_vec.get_opt(i).copied()), + ))), + PropColumn::Str(lazy_vec) => Some(Arc::new(StringViewArray::from_iter( + indices.map(|i| lazy_vec.get_opt(i)), + ))), + PropColumn::DTime(lazy_vec) => Some(Arc::new( + TimestampMillisecondArray::from_iter( + indices.map(|i| lazy_vec.get_opt(i).copied().map(|dt| dt.timestamp_millis())), + ) + .with_timezone("UTC"), + )), + PropColumn::NDTime(lazy_vec) => Some(Arc::new(TimestampMillisecondArray::from_iter( + indices.map(|i| { + lazy_vec + .get_opt(i) + .copied() + .map(|dt| dt.and_utc().timestamp_millis()) + }), + ))), + PropColumn::Decimal(lazy_vec) => { + let scale = meta + .get_dtype(col_id) + .and_then(|dtype| match dtype { + PropType::Decimal { scale } => Some(scale as i8), + _ => None, + }) + .unwrap(); + Some(Arc::new( + Decimal128Array::from_iter(indices.map(|i| { + lazy_vec.get_opt(i).and_then(|bd| { + let (num, _) = bd.as_bigint_and_scale(); + num.to_i128() + }) + })) + .with_precision_and_scale(DECIMAL128_MAX_PRECISION, scale) + .unwrap(), + )) + } + PropColumn::Map(lazy_vec) => { + let dt = meta + .get_dtype(col_id) + .as_ref() + .map(arrow_dtype_from_prop_type)?; + let array_iter = indices + .map(|i| lazy_vec.get_opt(i)) + .map(|e| e.map(|m| SerdeArrowMap(m))); + + let struct_array = struct_array_from_props(&dt, array_iter).ok()?; + + Some(Arc::new(struct_array)) + } + PropColumn::List(lazy_vec) => { + let dt = meta + .get_dtype(col_id) + .as_ref() + .map(arrow_dtype_from_prop_type) + .unwrap(); + + let array_iter = indices + .map(|i| lazy_vec.get_opt(i)) + .map(|opt_list| opt_list.map(SerdeArrowList)); + + let list_array = list_array_from_props(&dt, array_iter).ok()?; + + Some(Arc::new(list_array)) + } + } + } + + pub fn take_t_column( + &self, + col_id: usize, + meta: &PropMapper, + indices: impl ExactSizeIterator, + ) -> Option { + let column = self.t_properties.get(col_id)?; + self.column_as_array(column, col_id, meta, indices) + } + + pub fn take_c_column( + &self, + col: usize, + meta: &PropMapper, + indices: impl Iterator, + ) -> Option { + let column = self.c_properties.get(col)?; + self.column_as_array(column, col, meta, indices) + } + + fn update_earliest_latest(&mut self, t: EventTime) { + self.additions_count += 1; + let earliest = self.earliest.get_or_insert(t); + if t < *earliest { + *earliest = t; + } + let latest = self.latest.get_or_insert(t); + if t > *latest { + *latest = t; + } + } + + pub fn t_len(&self) -> usize { + self.t_properties.len() + } +} + +impl<'a> PropMutEntry<'a> { + pub(crate) fn append_t_props( + &mut self, + t: EventTime, + props: impl IntoIterator, + ) { + let t_prop_row = if let Some(t_prop_row) = self + .properties + .t_properties + .push(props) + .expect("Internal error: properties should be validated at this point") + { + t_prop_row + } else { + self.properties.t_properties.push_null() + }; + + self.ensure_times_from_props(); + self.set_time(t, t_prop_row); + + self.properties.has_properties = true; + self.properties.update_earliest_latest(t); + } + + pub(crate) fn ensure_times_from_props(&mut self) { + if self.properties.times_from_props.len() <= self.row { + self.properties + .times_from_props + .resize_with(self.row + 1, Default::default); + } + } + + pub(crate) fn set_time(&mut self, t: EventTime, t_prop_row: usize) { + let prop_timestamps = &mut self.properties.times_from_props[self.row]; + prop_timestamps.set(t, Some(t_prop_row)); + } + + pub(crate) fn addition_timestamp(&mut self, t: EventTime, edge_id: ELID) { + if self.properties.additions.len() <= self.row { + self.properties + .additions + .resize_with(self.row + 1, Default::default); + } + + self.properties.has_additions = true; + let prop_timestamps = &mut self.properties.additions[self.row]; + prop_timestamps.set(t, edge_id); + + self.properties.update_earliest_latest(t); + } + + pub(crate) fn deletion_timestamp(&mut self, t: EventTime, edge_id: Option) { + if self.properties.deletions.len() <= self.row { + self.properties + .deletions + .resize_with(self.row + 1, Default::default); + } + + self.properties.has_deletions = true; + + let prop_timestamps = &mut self.properties.deletions[self.row]; + prop_timestamps.set(t, edge_id.unwrap_or_default()); + self.properties.update_earliest_latest(t); + } + + pub(crate) fn append_const_props( + &mut self, + props: impl IntoIterator, + ) { + for (prop_id, prop) in props { + if self.properties.c_properties.len() <= prop_id { + self.properties + .c_properties + .resize_with(prop_id + 1, Default::default); + } + let const_props = &mut self.properties.c_properties[prop_id]; + // property types should have been validated before! + const_props.upsert(self.row, prop.as_prop_ref()).unwrap(); + } + } +} + +impl<'a> PropEntry<'a> { + pub(crate) fn prop(self, prop_id: usize) -> Option> { + let t_cell = self.t_cell(); + Some(TPropCell::new(t_cell, self.properties.t_column(prop_id))) + } + + pub fn metadata(self, prop_id: usize) -> Option { + self.properties.c_column(prop_id)?.get(self.row) + } + + pub fn check_metadata(self, prop_id: usize, new_val: PropRef<'_>) -> Result<(), StorageError> { + if let Some(col) = self.properties.c_column(prop_id) { + col.check(self.row, &new_val) + .map_err(Into::::into)?; + } + + Ok(()) + } + + pub fn t_cell(self) -> &'a TCell> { + self.properties + .times_from_props(self.row) + .unwrap_or(&TCell::Empty) + } + + pub fn additions(self) -> &'a TCell { + self.properties.additions(self.row).unwrap_or(&TCell::Empty) + } + + pub fn deletions(self) -> &'a TCell { + self.properties.deletions(self.row).unwrap_or(&TCell::Empty) + } +} diff --git a/db4-storage/src/properties/props_meta_writer.rs b/db4-storage/src/properties/props_meta_writer.rs new file mode 100644 index 0000000000..2d0c23e901 --- /dev/null +++ b/db4-storage/src/properties/props_meta_writer.rs @@ -0,0 +1,311 @@ +use either::Either; +use raphtory_api::core::{ + entities::properties::{ + meta::{LockedPropMapper, Meta, PropMapper}, + prop::{Prop, unify_types}, + }, + storage::dict_mapper::MaybeNew, +}; + +use crate::error::StorageError; + +// TODO: Rename constant props to metadata +#[derive(Debug, Clone, Copy)] +pub enum PropType { + Temporal, + Constant, +} + +pub enum PropsMetaWriter<'a, PN: AsRef> { + Change { + props: Vec>, + mapper: LockedPropMapper<'a>, + meta: &'a Meta, + }, + NoChange { + props: Vec<(PN, usize, Prop)>, + }, +} + +pub enum PropEntry<'a, PN: AsRef + 'a> { + Change { + name: PN, + prop_id: Option, + prop: Prop, + _phantom: &'a (), + }, + NoChange(PN, usize, Prop), +} + +impl<'a, PN: AsRef> PropsMetaWriter<'a, PN> { + pub fn temporal( + meta: &'a Meta, + props: impl Iterator, + ) -> Result { + Self::new(meta, meta.temporal_prop_mapper(), props) + } + + pub fn constant( + meta: &'a Meta, + props: impl Iterator, + ) -> Result { + Self::new(meta, meta.metadata_mapper(), props) + } + + pub fn new( + meta: &'a Meta, + prop_mapper: &'a PropMapper, + props: impl Iterator, + ) -> Result { + let locked_meta = prop_mapper.locked(); + + let mut in_props = props + .size_hint() + .1 + .map(Vec::with_capacity) + .unwrap_or_default(); + + let mut no_type_changes = true; + + // See if any type unification is required while merging props + for (prop_name, prop) in props { + let dtype = prop.dtype(); + let outcome @ (_, _, type_check) = locked_meta + .fast_proptype_check(prop_name.as_ref(), dtype) + .map(|outcome| (prop_name, prop, outcome))?; + let nothing_to_do = type_check.map(|x| x.is_right()).unwrap_or_default(); + + no_type_changes &= nothing_to_do; + in_props.push(outcome); + } + + // If no type changes are required, we can just return the existing prop ids + if no_type_changes { + let props = in_props + .into_iter() + .filter_map(|(prop_name, prop, _)| { + locked_meta + .get_id(prop_name.as_ref()) + .map(|id| (prop_name, id, prop)) + }) + .collect(); + + return Ok(Self::NoChange { props }); + } + + let mut props = vec![]; + + for (prop_name, prop, outcome) in in_props { + props.push(Self::as_prop_entry(prop_name, prop, outcome)); + } + + Ok(Self::Change { + props, + mapper: locked_meta, + meta, + }) + } + + fn as_prop_entry( + prop_name: PN, + prop: Prop, + outcome: Option>, + ) -> PropEntry<'a, PN> { + match outcome { + Some(Either::Right(prop_id)) => PropEntry::NoChange(prop_name, prop_id, prop), + Some(Either::Left(prop_id)) => PropEntry::Change { + name: prop_name, + prop_id: Some(prop_id), + prop, + _phantom: &(), + }, + None => { + // prop id doesn't exist so we grab the entry + PropEntry::Change { + name: prop_name, + prop_id: None, + prop, + _phantom: &(), + } + } + } + } + + pub fn into_props_temporal(self) -> Result, StorageError> { + self.into_props_inner(PropType::Temporal) + } + + /// Returns temporal prop names, prop ids and prop values, along with their MaybeNew status. + pub fn into_props_temporal_with_status( + self, + ) -> Result>, StorageError> { + self.into_props_inner_with_status(PropType::Temporal) + } + + pub fn into_props_const(self) -> Result, StorageError> { + self.into_props_inner(PropType::Constant) + } + + /// Returns constant prop names, prop ids and prop values, along with their MaybeNew status. + pub fn into_props_const_with_status( + self, + ) -> Result>, StorageError> { + self.into_props_inner_with_status(PropType::Constant) + } + + pub fn into_props_inner(self, prop_type: PropType) -> Result, StorageError> { + self.into_props_inner_with_status(prop_type).map(|props| { + props + .into_iter() + .map(|maybe_new| { + let (_, prop_id, prop) = maybe_new.inner(); + (prop_id, prop) + }) + .collect() + }) + } + + pub fn into_props_inner_with_status( + self, + prop_type: PropType, + ) -> Result>, StorageError> { + match self { + Self::NoChange { props } => Ok(props + .into_iter() + .map(|(prop_name, prop_id, prop)| MaybeNew::Existing((prop_name, prop_id, prop))) + .collect()), + Self::Change { + props, + mapper, + meta, + } => { + let mut prop_with_ids = vec![]; + + drop(mapper); + + let mut mapper = match prop_type { + PropType::Temporal => meta.temporal_prop_mapper().write_locked(), + PropType::Constant => meta.metadata_mapper().write_locked(), + }; + + // Revalidate prop types + let props = props + .into_iter() + .map(|entry| match entry { + PropEntry::NoChange(name, _, prop) => { + let new_entry = mapper + .fast_proptype_check(name.as_ref(), prop.dtype()) + .map(|outcome| Self::as_prop_entry(name, prop, outcome))?; + + Ok(new_entry) + } + PropEntry::Change { name, prop, .. } => { + let new_entry = mapper + .fast_proptype_check(name.as_ref(), prop.dtype()) + .map(|outcome| Self::as_prop_entry(name, prop, outcome))?; + + Ok(new_entry) + } + }) + .collect::, StorageError>>()?; + + for entry in props { + match entry { + PropEntry::NoChange(name, prop_id, prop) => { + prop_with_ids.push(MaybeNew::Existing((name, prop_id, prop))); + } + PropEntry::Change { + name, + prop_id: Some(prop_id), + prop, + .. + } => { + // prop_id already exists, so we need to unify the types + let new_prop_type = prop.dtype(); + let existing_type = mapper.get_dtype(prop_id).unwrap(); + let new_prop_type = + unify_types(&new_prop_type, existing_type, &mut false)?; + + mapper.set_id_and_dtype(name.as_ref(), prop_id, new_prop_type); + prop_with_ids.push(MaybeNew::Existing((name, prop_id, prop))); + } + PropEntry::Change { name, prop, .. } => { + // prop_id doesn't exist, so we need to create a new one + let new_prop_type = prop.dtype(); + let prop_id = mapper.new_id_and_dtype(name.as_ref(), new_prop_type); + + prop_with_ids.push(MaybeNew::New((name, prop_id, prop))); + } + } + } + + Ok(prop_with_ids) + } + } + } +} + +#[cfg(test)] +mod test { + use super::*; + use raphtory_api::core::storage::arc_str::ArcStr; + + #[test] + fn test_props_meta_writer() { + let meta = Meta::default(); + let props = vec![ + (ArcStr::from("prop1"), Prop::U32(0)), + (ArcStr::from("prop2"), Prop::U32(1)), + ]; + let writer = PropsMetaWriter::temporal(&meta, props.into_iter()).unwrap(); + let props = writer.into_props_temporal().unwrap(); + assert_eq!(props.len(), 2); + + assert_eq!(props, vec![(0, Prop::U32(0)), (1, Prop::U32(1))]); + + assert_eq!(meta.temporal_prop_mapper().keys().len(), 2); + } + + #[test] + fn complex_props_meta_writer() { + let meta = Meta::default(); + let prop_list_map = Prop::list([Prop::map([("a", 1)]), Prop::map([("b", 2f64)])]); + let props = vec![("a", prop_list_map.clone())]; + + let writer = PropsMetaWriter::temporal(&meta, props.into_iter()).unwrap(); + let props = writer.into_props_temporal().unwrap(); + assert_eq!(props.len(), 1); + + assert_eq!(props, vec![(0, prop_list_map.clone())]); + + let expected_d_type = prop_list_map.dtype(); + + assert_eq!( + meta.temporal_prop_mapper().d_types().first().unwrap(), + &expected_d_type + ); + } + + #[test] + fn test_fail_typecheck() { + let meta = Meta::default(); + let prop1 = Prop::U32(0); + let prop2 = Prop::U64(1); + + let writer = + PropsMetaWriter::temporal(&meta, vec![(ArcStr::from("prop1"), prop1)].into_iter()) + .unwrap(); + let props = writer.into_props_temporal().unwrap(); + assert_eq!(props.len(), 1); + + assert_eq!(meta.temporal_prop_mapper().keys().len(), 1); + assert!(meta.temporal_prop_mapper().get_id("prop1").is_some()); + + let writer = + PropsMetaWriter::temporal(&meta, vec![(ArcStr::from("prop1"), prop2)].into_iter()); + + assert!(writer.is_err()); + assert_eq!(meta.temporal_prop_mapper().keys().len(), 1); + assert!(meta.temporal_prop_mapper().get_id("prop1").is_some()); + } +} diff --git a/db4-storage/src/resolver/mapping_resolver.rs b/db4-storage/src/resolver/mapping_resolver.rs new file mode 100644 index 0000000000..d117f0404f --- /dev/null +++ b/db4-storage/src/resolver/mapping_resolver.rs @@ -0,0 +1,305 @@ +use crate::resolver::{GIDResolverOps, Initialiser, MaybeInit, StorageError}; +use dashmap::{VacantEntry, mapref::entry::Entry}; +use lock_api::ArcMutexGuard; +use once_cell::sync::OnceCell; +use parking_lot::{Mutex, RawMutex}; +use raphtory_api::core::{ + entities::{GID, GidRef, GidType, VID}, + storage::FxDashMap, +}; +use std::{ + borrow::Borrow, + hash::Hash, + sync::{ + Arc, + atomic::{AtomicUsize, Ordering}, + }, +}; +use thiserror::Error; + +use std::path::Path; + +#[derive(Debug)] +enum Map { + U64(FxDashMap), + Str(FxDashMap), +} + +#[derive(Debug, Copy, Clone)] +enum MaybeVID { + VID(VID), + Init(usize), +} + +impl MaybeVID { + fn value(self) -> Option { + match self { + MaybeVID::VID(vid) => Some(vid), + MaybeVID::Init(_) => None, + } + } +} + +enum InitGuard { + Init { + init_id: usize, + guard: ArcMutexGuard, + }, + Read(Arc>), +} + +#[derive(Error, Debug)] +pub enum InvalidNodeId { + #[error("Node id {0} does not have the correct type, expected String")] + InvalidNodeIdU64(u64), + #[error("Node id {0} does not have the correct type, expected Numeric")] + InvalidNodeIdStr(String), +} + +impl Map { + fn as_u64(&self) -> Option<&FxDashMap> { + match self { + Map::U64(map) => Some(map), + _ => None, + } + } + + fn as_str(&self) -> Option<&FxDashMap> { + match self { + Map::Str(map) => Some(map), + _ => None, + } + } +} + +impl Default for Map { + fn default() -> Self { + Map::U64(FxDashMap::default()) + } +} + +#[derive(Debug, Default)] +pub struct MappingResolver { + map: OnceCell, + uninitialised: FxDashMap>>, + init_counter: AtomicUsize, +} + +pub struct Init<'a> { + mapping: &'a MappingResolver, + init_id: usize, + gid: GID, + guard: ArcMutexGuard, +} + +impl<'a> Initialiser for Init<'a> { + fn init(mut self, vid: VID) -> Result<(), StorageError> { + *self.guard = vid; + self.mapping + .set(self.gid.as_ref(), vid) + .expect("gid should have been validated"); + self.mapping.uninitialised.remove(&self.init_id); + Ok(()) + } +} + +impl MappingResolver { + pub fn new_u64() -> Self { + MappingResolver { + map: OnceCell::with_value(Map::U64(Default::default())), + uninitialised: Default::default(), + init_counter: Default::default(), + } + } + + pub fn new_str() -> Self { + MappingResolver { + map: OnceCell::with_value(Map::Str(Default::default())), + uninitialised: Default::default(), + init_counter: Default::default(), + } + } + + fn push_uninit(&self, entry: VacantEntry) -> InitGuard { + let lock = Arc::new(Mutex::new(VID::default())); + let guard = lock.lock_arc(); + let init_id = self.init_counter.fetch_add(1, Ordering::Relaxed); + self.uninitialised.insert(init_id, lock); + entry.insert(MaybeVID::Init(init_id)); + InitGuard::Init { init_id, guard } + } + + fn get_uninit(&self, init_id: &usize) -> Arc> { + self.uninitialised + .get(init_id) + .expect("initialisation guard should exist") + .clone() + } + + fn get_value_from_map(&self, map: &FxDashMap, key: &Q) -> Option + where + K: Borrow + Eq + Hash, + Q: Hash + Eq + ?Sized, + { + map.get(key)?.value().value() + } + + fn handle_init_guard(&self, init_guard: InitGuard, gid: GidRef) -> MaybeInit> { + match init_guard { + InitGuard::Init { guard, init_id } => MaybeInit::Init(Init { + mapping: self, + init_id, + gid: gid.to_owned(), + guard, + }), + InitGuard::Read(guard) => MaybeInit::VID(*guard.lock()), + } + } +} + +impl GIDResolverOps for MappingResolver { + type Init<'a> = Init<'a>; + + fn new() -> Result + where + Self: Sized, + { + Ok(MappingResolver { + map: OnceCell::new(), + uninitialised: Default::default(), + init_counter: Default::default(), + }) + } + + fn new_with_path( + _path: impl AsRef, + dtype: Option, + ) -> Result { + match dtype { + None => Self::new(), + Some(dtype) => { + let mapping = match dtype { + GidType::U64 => MappingResolver::new_u64(), + GidType::Str => MappingResolver::new_str(), + }; + Ok(mapping) + } + } + } + + fn len(&self) -> usize { + self.map.get().map_or(0, |map| match map { + Map::U64(map) => map.len(), + Map::Str(map) => map.len(), + }) + } + + fn dtype(&self) -> Option { + self.map.get().map(|map| match map { + Map::U64(_) => GidType::U64, + Map::Str(_) => GidType::Str, + }) + } + + fn set(&self, gid: GidRef, vid: VID) -> Result<(), StorageError> { + let map = self.map.get_or_init(|| match gid { + GidRef::U64(_) => Map::U64(FxDashMap::default()), + GidRef::Str(_) => Map::Str(FxDashMap::default()), + }); + match gid { + GidRef::U64(id) => { + map.as_u64() + .ok_or(InvalidNodeId::InvalidNodeIdU64(id))? + .insert(id, MaybeVID::VID(vid)); + } + GidRef::Str(id) => { + let id = id.to_owned(); + match map.as_str() { + None => Err(InvalidNodeId::InvalidNodeIdStr(id))?, + Some(map) => { + map.insert(id, MaybeVID::VID(vid)); + } + } + } + } + Ok(()) + } + + fn get_or_init(&self, gid: GidRef) -> Result>, StorageError> { + let map = self.map.get_or_init(|| match &gid { + GidRef::U64(_) => Map::U64(FxDashMap::default()), + GidRef::Str(_) => Map::Str(FxDashMap::default()), + }); + let vid_init = match gid { + GidRef::U64(key) => { + let map = map.as_u64().ok_or(InvalidNodeId::InvalidNodeIdU64(key))?; + let init_guard = match map.entry(key) { + Entry::Occupied(id) => match id.get() { + MaybeVID::VID(vid) => return Ok(MaybeInit::VID(*vid)), + MaybeVID::Init(init_id) => InitGuard::Read(self.get_uninit(init_id)), + }, + Entry::Vacant(entry) => self.push_uninit(entry), + }; + self.handle_init_guard(init_guard, gid) + } + GidRef::Str(key) => { + let map = map + .as_str() + .ok_or_else(|| InvalidNodeId::InvalidNodeIdStr(key.into()))?; + + let init_guard = match map.get(key) { + None => match map.entry(key.to_owned()) { + Entry::Occupied(entry) => match entry.get() { + MaybeVID::VID(vid) => return Ok(MaybeInit::VID(*vid)), + MaybeVID::Init(init_id) => InitGuard::Read(self.get_uninit(init_id)), + }, + Entry::Vacant(entry) => self.push_uninit(entry), + }, + Some(maybe_vid) => match maybe_vid.value() { + MaybeVID::VID(vid) => return Ok(MaybeInit::VID(*vid)), + MaybeVID::Init(init_id) => InitGuard::Read(self.get_uninit(init_id)), + }, + }; + self.handle_init_guard(init_guard, gid) + } + }; + Ok(vid_init) + } + + fn validate_gids<'a>( + &self, + gids: impl IntoIterator>, + ) -> Result<(), StorageError> { + for gid in gids { + let map = self.map.get_or_init(|| match &gid { + GidRef::U64(_) => Map::U64(FxDashMap::default()), + GidRef::Str(_) => Map::Str(FxDashMap::default()), + }); + match gid { + GidRef::U64(id) => { + map.as_u64().ok_or(InvalidNodeId::InvalidNodeIdU64(id))?; + } + GidRef::Str(id) => { + map.as_str() + .ok_or_else(|| InvalidNodeId::InvalidNodeIdStr(id.into()))?; + } + } + } + + Ok(()) + } + + fn get_str(&self, gid: &str) -> Option { + let map = self.map.get()?; + map.as_str().and_then(|m| self.get_value_from_map(m, gid)) + } + + fn get_u64(&self, gid: u64) -> Option { + let map = self.map.get()?; + map.as_u64().and_then(|m| self.get_value_from_map(m, &gid)) + } + + fn flush(&self) -> Result<(), StorageError> { + Ok(()) + } +} diff --git a/db4-storage/src/resolver/mod.rs b/db4-storage/src/resolver/mod.rs new file mode 100644 index 0000000000..32897531ee --- /dev/null +++ b/db4-storage/src/resolver/mod.rs @@ -0,0 +1,78 @@ +use crate::error::StorageError; +use raphtory_api::core::entities::{GidRef, GidType, VID}; +use std::path::Path; + +pub mod mapping_resolver; + +/// Either an initialiser or a `VID`. For equality checks, only VIDs are compared, initialisers are +/// never considered equal. +pub enum MaybeInit { + VID(VID), + Init(I), +} + +impl MaybeInit { + pub fn needs_init(&self) -> bool { + matches!(self, MaybeInit::Init(_)) + } +} + +impl PartialEq for MaybeInit { + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (MaybeInit::VID(left), MaybeInit::VID(right)) => left == right, + _ => false, + } + } +} + +pub trait Initialiser { + fn init(self, vid: VID) -> Result<(), StorageError>; +} + +pub trait GIDResolverOps { + type Init<'a>: Initialiser + where + Self: 'a; + + fn new() -> Result + where + Self: Sized; + + fn new_with_path(path: impl AsRef, dtype: Option) -> Result + where + Self: Sized; + + fn len(&self) -> usize; + + fn is_empty(&self) -> bool { + self.len() == 0 + } + + fn dtype(&self) -> Option; + + fn set(&self, gid: GidRef, vid: VID) -> Result<(), StorageError>; + + fn get_or_init<'a>( + &'a self, + gid: GidRef<'a>, + ) -> Result>, StorageError>; + + fn validate_gids<'a>( + &self, + gids: impl IntoIterator>, + ) -> Result<(), StorageError>; + + fn get_str(&self, gid: &str) -> Option; + + fn get_u64(&self, gid: u64) -> Option; + + fn get(&self, gid: GidRef) -> Option { + match gid { + GidRef::Str(s) => self.get_str(s), + GidRef::U64(u) => self.get_u64(u), + } + } + + fn flush(&self) -> Result<(), StorageError>; +} diff --git a/db4-storage/src/segments/additions.rs b/db4-storage/src/segments/additions.rs new file mode 100644 index 0000000000..6b058e0429 --- /dev/null +++ b/db4-storage/src/segments/additions.rs @@ -0,0 +1,121 @@ +use std::ops::Range; + +use raphtory_api_macros::box_on_debug_lifetime; +use raphtory_core::{ + entities::{ELID, properties::tcell::TCell}, + storage::timeindex::{EventTime, TimeIndexOps, TimeIndexWindow}, +}; + +use crate::{gen_ts::EdgeEventOps, utils::Iter4}; + +#[derive(Clone, Debug)] +pub enum MemAdditions<'a> { + Edges(&'a TCell), + Props(&'a TCell>), + WEdges(TimeIndexWindow<'a, EventTime, TCell>), + WProps(TimeIndexWindow<'a, EventTime, TCell>>), +} + +impl<'a> From<&'a TCell> for MemAdditions<'a> { + fn from(edges: &'a TCell) -> Self { + MemAdditions::Edges(edges) + } +} + +impl<'a> From<&'a TCell>> for MemAdditions<'a> { + fn from(props: &'a TCell>) -> Self { + MemAdditions::Props(props) + } +} + +impl<'a> EdgeEventOps<'a> for MemAdditions<'a> { + #[box_on_debug_lifetime] + fn edge_events(self) -> impl Iterator + Send + Sync + 'a { + match self { + MemAdditions::Edges(edges) => Iter4::I(edges.iter().map(|(k, v)| (*k, *v))), + MemAdditions::WEdges(TimeIndexWindow::All(ti)) => { + Iter4::J(ti.iter().map(|(k, v)| (*k, *v))) + } + MemAdditions::WEdges(TimeIndexWindow::Range { timeindex, range }) => { + Iter4::K(timeindex.iter_window(range).map(|(k, v)| (*k, *v))) + } + _ => Iter4::L(std::iter::empty()), + } + } + + #[box_on_debug_lifetime] + fn edge_events_rev(self) -> impl Iterator + Send + Sync + 'a { + match self { + MemAdditions::Edges(edges) => Iter4::I(edges.iter().map(|(k, v)| (*k, *v)).rev()), + MemAdditions::WEdges(TimeIndexWindow::All(ti)) => { + Iter4::J(ti.iter().map(|(k, v)| (*k, *v)).rev()) + } + MemAdditions::WEdges(TimeIndexWindow::Range { timeindex, range }) => { + Iter4::K(timeindex.iter_window(range).map(|(k, v)| (*k, *v)).rev()) + } + _ => Iter4::L(std::iter::empty()), + } + } +} + +impl<'a> TimeIndexOps<'a> for MemAdditions<'a> { + type IndexType = EventTime; + + type RangeType = Self; + + fn active(&self, w: Range) -> bool { + match self { + MemAdditions::Props(props) => props.active(w), + MemAdditions::Edges(edges) => edges.active(w), + MemAdditions::WProps(window) => window.active(w), + MemAdditions::WEdges(window) => window.active(w), + } + } + + fn range(&self, w: Range) -> Self::RangeType { + match self { + MemAdditions::Props(props) => MemAdditions::WProps(props.range(w)), + MemAdditions::Edges(edges) => MemAdditions::WEdges(edges.range(w)), + MemAdditions::WProps(window) => MemAdditions::WProps(window.range(w)), + MemAdditions::WEdges(window) => MemAdditions::WEdges(window.range(w)), + } + } + + #[box_on_debug_lifetime] + fn iter(self) -> impl Iterator + Send + Sync + 'a { + match self { + MemAdditions::Props(props) => Iter4::I(props.iter().map(|(k, _)| *k)), + MemAdditions::Edges(edges) => Iter4::J(edges.iter().map(|(k, _)| *k)), + MemAdditions::WProps(window) => Iter4::K(window.iter()), + MemAdditions::WEdges(window) => Iter4::L(window.iter()), + } + } + + #[box_on_debug_lifetime] + fn iter_rev(self) -> impl Iterator + Send + Sync + 'a { + match self { + MemAdditions::Props(props) => Iter4::I(props.iter_rev()), + MemAdditions::Edges(edges) => Iter4::J(edges.iter_rev()), + MemAdditions::WProps(window) => Iter4::K(window.iter_rev()), + MemAdditions::WEdges(window) => Iter4::L(window.iter_rev()), + } + } + + fn len(&self) -> usize { + match self { + MemAdditions::Props(props) => props.len(), + MemAdditions::Edges(edges) => edges.len(), + MemAdditions::WProps(window) => window.len(), + MemAdditions::WEdges(window) => window.len(), + } + } + + fn is_empty(&self) -> bool { + match self { + MemAdditions::Edges(edges) => edges.is_empty(), + MemAdditions::Props(props) => props.is_empty(), + MemAdditions::WEdges(edges) => edges.is_empty(), + MemAdditions::WProps(edges) => edges.is_empty(), + } + } +} diff --git a/db4-storage/src/segments/edge/entry.rs b/db4-storage/src/segments/edge/entry.rs new file mode 100644 index 0000000000..d2a506280d --- /dev/null +++ b/db4-storage/src/segments/edge/entry.rs @@ -0,0 +1,194 @@ +use crate::{ + EdgeAdditions, EdgeDeletions, EdgeTProps, LocalPOS, + api::edges::{EdgeEntryOps, EdgeRefOps}, + gen_ts::{AdditionCellsRef, DeletionCellsRef, WithTimeCells}, + generic_t_props::WithTProps, + segments::{additions::MemAdditions, edge::segment::MemEdgeSegment}, +}; +use raphtory_api::core::entities::{LayerId, properties::prop::Prop}; +use raphtory_core::{ + entities::{ + EID, Multiple, VID, + properties::{tcell::TCell, tprop::TPropCell}, + }, + storage::timeindex::{EventTime, TimeIndexOps}, +}; + +#[derive(Debug)] +pub struct MemEdgeEntry<'a, MES> { + pos: LocalPOS, + es: MES, + __marker: std::marker::PhantomData<&'a ()>, +} + +impl<'a, MES: std::ops::Deref> MemEdgeEntry<'a, MES> { + pub fn new(pos: LocalPOS, es: MES) -> Self { + Self { + pos, + es, + __marker: std::marker::PhantomData, + } + } +} + +impl<'a, MES: std::ops::Deref + Send + Sync> EdgeEntryOps<'a> + for MemEdgeEntry<'a, MES> +{ + type Ref<'b> + = MemEdgeRef<'b> + where + 'a: 'b, + MES: 'b; + + fn as_ref<'b>(&'b self) -> Self::Ref<'b> + where + 'a: 'b, + { + MemEdgeRef { + pos: self.pos, + es: &self.es, + } + } +} + +#[derive(Copy, Clone, Debug)] +pub struct MemEdgeRef<'a> { + pos: LocalPOS, + es: &'a MemEdgeSegment, +} + +impl<'a> MemEdgeRef<'a> { + pub fn new(pos: LocalPOS, es: &'a MemEdgeSegment) -> Self { + Self { pos, es } + } + + pub fn has_layers(&self, layer_ids: &Multiple) -> bool { + layer_ids.iter().any(|layer_id| { + self.es + .as_ref() + .get(layer_id.0) + .is_some_and(|layer| layer.has_item(self.pos)) + }) + } +} + +impl<'a> WithTimeCells<'a> for MemEdgeRef<'a> { + type TimeCell = MemAdditions<'a>; + + fn t_props_tc( + self, + layer_id: LayerId, + range: Option<(EventTime, EventTime)>, + ) -> impl Iterator + 'a { + self.es + .as_ref() + .get(layer_id.0) + .map(|layer| MemAdditions::Props(layer.times_from_props(self.pos))) + .into_iter() + .map(move |t_props| { + range + .map(|(start, end)| t_props.range(start..end)) + .unwrap_or_else(|| t_props) + }) + } + + fn additions_tc( + self, + _layer_id: LayerId, + _range: Option<(EventTime, EventTime)>, + ) -> impl Iterator + 'a { + std::iter::empty() + } + + fn deletions_tc( + self, + layer_id: LayerId, + range: Option<(EventTime, EventTime)>, + ) -> impl Iterator + 'a { + let deletions = self + .es + .as_ref() + .get(layer_id.0) + .map(|layer| layer.deletions(self.pos)) + .unwrap_or(&TCell::Empty); + let t_cell = MemAdditions::Edges(deletions); + std::iter::once( + range + .map(|(start, end)| t_cell.range(start..end)) + .unwrap_or_else(|| t_cell), + ) + } + + fn num_layers(&self) -> usize { + self.es.as_ref().len() + } +} + +impl<'a> WithTProps<'a> for MemEdgeRef<'a> { + type TProp = TPropCell<'a>; + + fn num_layers(&self) -> usize { + self.es.as_ref().len() + } + + fn into_t_props( + self, + layer_id: LayerId, + prop_id: usize, + ) -> impl Iterator + 'a { + let edge_pos = self.pos; + self.es + .as_ref() + .get(layer_id.0) + .into_iter() + .flat_map(move |layer| layer.t_prop(edge_pos, prop_id).into_iter()) + } +} + +impl<'a> EdgeRefOps<'a> for MemEdgeRef<'a> { + type Additions = EdgeAdditions<'a>; + type Deletions = EdgeDeletions<'a>; + type TProps = EdgeTProps<'a>; + + fn edge(self, layer_id: LayerId) -> Option<(VID, VID)> { + self.es + .as_ref() + .get(layer_id.0)? + .get(self.pos) + .map(|entry| (entry.src, entry.dst)) + } + + fn layer_additions(self, layer_id: LayerId) -> Self::Additions { + EdgeAdditions::new_with_layer(AdditionCellsRef::new(self), layer_id.0) + } + + fn layer_deletions(self, layer_id: LayerId) -> Self::Deletions { + EdgeDeletions::new_with_layer(DeletionCellsRef::new(self), layer_id.0) + } + + fn c_prop(self, layer_id: LayerId, prop_id: usize) -> Option { + self.es.as_ref().get(layer_id.0)?.c_prop(self.pos, prop_id) + } + + fn layer_t_prop(self, layer_id: LayerId, prop_id: usize) -> Self::TProps { + EdgeTProps::new_with_layer(self, layer_id, prop_id) + } + + fn src(&self) -> Option { + self.es.as_ref()[0].get(self.pos).map(|entry| entry.src) + } + + fn dst(&self) -> Option { + self.es.as_ref()[0].get(self.pos).map(|entry| entry.dst) + } + + fn edge_id(&self) -> EID { + let segment_id = self.es.as_ref()[0].segment_id(); + let max_page_len = self.es.as_ref()[0].max_page_len(); + self.pos.as_eid(segment_id, max_page_len) + } + + fn internal_num_layers(self) -> usize { + self.es.as_ref().len() + } +} diff --git a/db4-storage/src/segments/edge/mod.rs b/db4-storage/src/segments/edge/mod.rs new file mode 100644 index 0000000000..d0b743bd85 --- /dev/null +++ b/db4-storage/src/segments/edge/mod.rs @@ -0,0 +1,2 @@ +pub mod entry; +pub mod segment; diff --git a/db4-storage/src/segments/edge/segment.rs b/db4-storage/src/segments/edge/segment.rs new file mode 100644 index 0000000000..2c4384146d --- /dev/null +++ b/db4-storage/src/segments/edge/segment.rs @@ -0,0 +1,786 @@ +use crate::{ + LocalPOS, + api::edges::{EdgeSegmentOps, LockedESegment}, + error::StorageError, + persist::{config::ConfigOps, strategy::PersistenceStrategy}, + properties::PropMutEntry, + segments::{ + HasRow, SegmentContainer, + edge::entry::{MemEdgeEntry, MemEdgeRef}, + }, + utils::Iter4, + wal::LSN, +}; +use parking_lot::lock_api::ArcRwLockReadGuard; +use raphtory_api::core::{ + entities::{ + LayerId, VID, + properties::{ + meta::{Meta, STATIC_GRAPH_LAYER_ID}, + prop::AsPropRef, + }, + }, + storage::dict_mapper::MaybeNew, +}; +use raphtory_api_macros::box_on_debug_lifetime; +use raphtory_core::{ + entities::LayerIds, + storage::timeindex::{AsTime, EventTime}, +}; +use rayon::prelude::*; +use std::{ + ops::{Deref, DerefMut}, + path::PathBuf, + sync::{ + Arc, + atomic::{self, AtomicU32, AtomicUsize, Ordering}, + }, +}; + +#[derive(Debug, Default)] +pub struct EdgeEntry { + pub src: VID, + pub dst: VID, + pub row: usize, +} + +impl HasRow for EdgeEntry { + fn row(&self) -> usize { + self.row + } + + fn row_mut(&mut self) -> &mut usize { + &mut self.row + } +} + +#[derive(Debug)] +pub struct MemEdgeSegment { + layers: Vec>, + est_size: usize, + global_memory_tracker: Arc, + lsn: LSN, +} + +impl AsRef<[SegmentContainer]> for MemEdgeSegment { + fn as_ref(&self) -> &[SegmentContainer] { + &self.layers + } +} + +impl AsMut<[SegmentContainer]> for MemEdgeSegment { + fn as_mut(&mut self) -> &mut [SegmentContainer] { + &mut self.layers + } +} + +impl MemEdgeSegment { + pub fn new( + segment_id: usize, + max_page_len: u32, + meta: Arc, + global_memory_tracker: Arc, + ) -> Self { + Self { + layers: vec![SegmentContainer::new(segment_id, max_page_len, meta)], + est_size: 0, + global_memory_tracker, + lsn: 0, + } + } + + pub fn increment_global_memory(&self, increment: usize) { + self.global_memory_tracker + .fetch_add(increment, Ordering::Relaxed); + } + + pub fn edge_meta(&self) -> &Arc { + self.layers[0].meta() + } + + pub fn swap_out_layers(&mut self) -> Vec> { + let layers = self + .as_mut() + .iter_mut() + .map(|head_guard| { + let mut old_head = SegmentContainer::new( + head_guard.segment_id(), + head_guard.max_page_len(), + head_guard.meta().clone(), + ); + std::mem::swap(&mut *head_guard, &mut old_head); + old_head + }) + .collect::>(); + self.est_size = 0; // Reset estimated size after swapping out layers + layers + } + + pub fn get_or_create_layer(&mut self, layer_id: LayerId) -> &mut SegmentContainer { + let layer_id = layer_id.0; + if layer_id >= self.layers.len() { + let max_page_len = self.layers[0].max_page_len(); + let segment_id = self.layers[0].segment_id(); + let meta = self.layers[0].meta().clone(); + self.layers.resize_with(layer_id + 1, || { + SegmentContainer::new(segment_id, max_page_len, meta.clone()) + }); + } + &mut self.layers[layer_id] + } + + pub fn get_layer(&self, layer_id: LayerId) -> Option<&SegmentContainer> { + self.layers.get(layer_id.0) + } + + pub fn est_size(&self) -> usize { + self.est_size + } + + pub fn lsn(&self) -> u64 { + self.lsn + } + + pub fn set_lsn(&mut self, lsn: u64) { + self.lsn = lsn; + } + + /// Replaces this segment with an empty instance, returning the old segment + /// with its data. + /// + /// The new segment will have the same number of layers as the original. + pub fn take(&mut self) -> Self { + let layers = self.layers.iter_mut().map(|layer| layer.take()).collect(); + let est_size = self.est_size(); + self.est_size = 0; + + Self { + layers, + est_size, + global_memory_tracker: self.global_memory_tracker.clone(), + lsn: self.lsn, + } + } + + pub fn max_page_len(&self) -> u32 { + self.layers[0].max_page_len() + } + + pub fn get_edge(&self, edge_pos: LocalPOS, layer_id: LayerId) -> Option<(VID, VID)> { + self.layers + .get(layer_id.0)? + .get(edge_pos) + .map(|entry| (entry.src, entry.dst)) + } + + /// insert an edge + /// + /// returns a boolean flag indicating if the edge is new + pub fn insert_edge_internal( + &mut self, + t: T, + edge_pos: LocalPOS, + src: VID, + dst: VID, + layer_id: LayerId, + props: impl IntoIterator, + ) -> bool { + // Ensure we have enough layers + self.ensure_layer(layer_id); + let est_size = self.layers[layer_id.0].est_size(); + + let (local_row, is_new) = self + .reserve_local_row(edge_pos, src, dst, layer_id) + .into_inner_with_status(); + + let mut prop_entry: PropMutEntry<'_> = self.layers[layer_id.0] + .properties_mut() + .get_mut_entry(local_row); + + let ts = EventTime::new(t.t(), t.i()); + prop_entry.append_t_props(ts, props); + + let layer_est_size = self.layers[layer_id.0].est_size(); + self.est_size += layer_est_size.saturating_sub(est_size); + is_new + } + + /// delete an edge + /// + /// returns a boolean flag indicating if the edge is new + pub fn delete_edge_internal( + &mut self, + t: T, + edge_pos: LocalPOS, + src: VID, + dst: VID, + layer_id: LayerId, + ) -> bool { + let t = EventTime::new(t.t(), t.i()); + + // Ensure we have enough layers + self.ensure_layer(layer_id); + let est_size = self.layers[layer_id.0].est_size(); + + let (local_row, is_new) = self + .reserve_local_row(edge_pos, src, dst, layer_id) + .into_inner_with_status(); + let props = self.layers[layer_id.0].properties_mut(); + props.get_mut_entry(local_row).deletion_timestamp(t, None); + let layer_est_size = self.layers[layer_id.0].est_size(); + self.est_size += layer_est_size.saturating_sub(est_size); + is_new + } + + /// add static edge + /// + /// returns flag indicating if edge is new + pub fn insert_static_edge_internal( + &mut self, + edge_pos: LocalPOS, + src: impl Into, + dst: impl Into, + layer_id: LayerId, + ) -> bool { + let src = src.into(); + let dst = dst.into(); + + // Ensure we have enough layers + self.ensure_layer(layer_id); + let est_size = self.layers[layer_id.0].est_size(); + + let is_new = self + .reserve_local_row(edge_pos, src, dst, layer_id) + .is_new(); + let layer_est_size = self.layers[layer_id.0].est_size(); + self.est_size += layer_est_size.saturating_sub(est_size); + is_new + } + + fn ensure_layer(&mut self, layer_id: LayerId) { + let layer_id = layer_id.0; + if layer_id >= self.layers.len() { + // Get details from first layer to create consistent new layers. + if let Some(first_layer) = self.layers.first() { + let segment_id = first_layer.segment_id(); + let max_page_len = first_layer.max_page_len(); + let meta = first_layer.meta().clone(); + + // Extend with new layers + while self.layers.len() <= layer_id { + self.layers.push(SegmentContainer::new( + segment_id, + max_page_len, + meta.clone(), + )); + } + } + } + } + + fn reserve_local_row( + &mut self, + edge_pos: LocalPOS, + src: impl Into, + dst: impl Into, + layer_id: LayerId, + ) -> MaybeNew { + let src = src.into(); + let dst = dst.into(); + + let mut row = self.layers[layer_id.0].reserve_local_row(edge_pos); + let inner = row.as_mut().inner(); + inner.src = src; + inner.dst = dst; + row.map(|row| row.row) + } + + pub fn check_metadata( + &self, + edge_pos: LocalPOS, + layer_id: LayerId, + props: &[(usize, P)], + ) -> Result<(), StorageError> { + if let Some(layer) = self.layers.get(layer_id.0) { + layer.check_metadata(edge_pos, props)?; + } + + Ok(()) + } + + pub fn update_const_properties( + &mut self, + edge_pos: LocalPOS, + src: VID, + dst: VID, + layer_id: LayerId, + props: impl IntoIterator, + ) { + // Ensure we have enough layers + self.ensure_layer(layer_id); + let est_size = self.layers[layer_id.0].est_size(); + let local_row = self.reserve_local_row(edge_pos, src, dst, layer_id).inner(); + let mut prop_entry: PropMutEntry<'_> = self.layers[layer_id.0] + .properties_mut() + .get_mut_entry(local_row); + prop_entry.append_const_props(props); + + let layer_est_size = self.layers[layer_id.0].est_size() + 8; + self.est_size += layer_est_size.saturating_sub(est_size); + } + + pub fn has_edge(&self, edge_pos: LocalPOS, layer_id: LayerId) -> bool { + self.layers + .get(layer_id.0) + .is_some_and(|layer| layer.has_item(edge_pos)) + } + + pub fn latest(&self) -> Option { + Iterator::max(self.layers.iter().filter_map(|seg| seg.latest())) + } + + pub fn earliest(&self) -> Option { + Iterator::min(self.layers.iter().filter_map(|seg| seg.earliest())) + } + + pub fn t_len(&self) -> usize { + self.layers.iter().map(|seg| seg.t_len()).sum() + } +} + +impl Drop for MemEdgeSegment { + fn drop(&mut self) { + self.global_memory_tracker + .fetch_sub(self.est_size, Ordering::Relaxed); + } +} + +// Update EdgeSegmentView implementation to use multiple layers +#[derive(Debug)] +pub struct EdgeSegmentView { + segment: Arc>, + segment_id: usize, + num_edges: AtomicU32, + ext: EXT, +} + +#[derive(Debug)] +pub struct ArcLockedSegmentView { + inner: ArcRwLockReadGuard, + num_edges: u32, +} + +impl ArcLockedSegmentView { + fn edge_iter_layer<'a>( + &'a self, + layer_id: LayerId, + ) -> impl Iterator> + Send + Sync + 'a { + self.inner + .layers + .get(layer_id.0) + .into_iter() + .flat_map(|layer| layer.filled_positions()) + .map(move |pos| MemEdgeRef::new(pos, &self.inner)) + } + + fn edge_par_iter_layer<'a>( + &'a self, + layer_id: LayerId, + ) -> impl ParallelIterator> + 'a { + self.inner + .layers + .get(layer_id.0) + .into_par_iter() + .flat_map(|layer| layer.filled_positions_par()) + .map(move |pos| MemEdgeRef::new(pos, &self.inner)) + } +} + +impl LockedESegment for ArcLockedSegmentView { + type EntryRef<'a> = MemEdgeRef<'a>; + + fn entry_ref<'a>(&'a self, edge_pos: impl Into) -> Self::EntryRef<'a> + where + Self: 'a, + { + let edge_pos = edge_pos.into(); + MemEdgeRef::new(edge_pos, &self.inner) + } + + #[box_on_debug_lifetime] + fn edge_iter<'a, 'b: 'a>( + &'a self, + layer_ids: &'b LayerIds, + ) -> impl Iterator> + Send + Sync + 'a { + match layer_ids { + LayerIds::None => Iter4::I(std::iter::empty()), + LayerIds::All => Iter4::J(self.edge_iter_layer(STATIC_GRAPH_LAYER_ID)), + LayerIds::One(layer_id) => Iter4::K(self.edge_iter_layer(*layer_id)), + LayerIds::Multiple(multiple) => Iter4::L( + self.edge_iter_layer(STATIC_GRAPH_LAYER_ID) + .filter(|pos| pos.has_layers(multiple)), + ), + } + } + + fn edge_par_iter<'a, 'b: 'a>( + &'a self, + layer_ids: &'b LayerIds, + ) -> impl ParallelIterator> + 'a { + match layer_ids { + LayerIds::None => Iter4::I(rayon::iter::empty()), + LayerIds::All => Iter4::J(self.edge_par_iter_layer(STATIC_GRAPH_LAYER_ID)), + LayerIds::One(layer_id) => Iter4::K(self.edge_par_iter_layer(*layer_id)), + LayerIds::Multiple(multiple) => Iter4::L( + self.edge_par_iter_layer(STATIC_GRAPH_LAYER_ID) + .filter(|pos| pos.has_layers(multiple)), + ), + } + } + + fn num_edges(&self) -> u32 { + self.num_edges + } +} + +impl>> EdgeSegmentOps for EdgeSegmentView

{ + type Extension = P; + + type Entry<'a> = MemEdgeEntry<'a, parking_lot::RwLockReadGuard<'a, MemEdgeSegment>>; + + type ArcLockedSegment = ArcLockedSegmentView; + + fn extension(&self) -> &Self::Extension { + &self.ext + } + + fn latest(&self) -> Option { + self.head().latest() + } + + fn earliest(&self) -> Option { + self.head().earliest() + } + + fn t_len(&self) -> usize { + self.head().t_len() + } + + fn num_layers(&self) -> usize { + self.head().layers.len() + } + + fn layer_count(&self, layer_id: LayerId) -> u32 { + self.head() + .get_layer(layer_id) + .map_or(0, |layer| layer.len()) + } + + fn load( + _page_id: usize, + _max_page_len: u32, + _meta: Arc, + _path: impl AsRef, + _ext: Self::Extension, + ) -> Result + where + Self: Sized, + { + Err(StorageError::GenericFailure( + "load not supported".to_string(), + )) + } + + fn new(page_id: usize, meta: Arc, _path: Option, ext: Self::Extension) -> Self { + let max_page_len = ext.config().max_edge_page_len(); + + Self { + segment: parking_lot::RwLock::new(MemEdgeSegment::new( + page_id, + max_page_len, + meta, + ext.memory_tracker().clone(), + )) + .into(), + segment_id: page_id, + num_edges: AtomicU32::new(0), + ext, + } + } + + fn segment_id(&self) -> usize { + self.segment_id + } + + fn edges_counter(&self) -> &AtomicU32 { + &self.num_edges + } + + fn head(&self) -> parking_lot::RwLockReadGuard<'_, MemEdgeSegment> { + self.segment.read_recursive() + } + + fn head_arc(&self) -> ArcRwLockReadGuard { + self.segment.read_arc_recursive() + } + + fn head_mut(&self) -> parking_lot::RwLockWriteGuard<'_, MemEdgeSegment> { + self.segment.write() + } + + fn try_head_mut(&self) -> Option> { + self.segment.try_write() + } + + fn set_dirty(&self, _dirty: bool) {} + + fn is_dirty(&self) -> bool { + true + } + + fn notify_write( + &self, + _head_lock: impl DerefMut, + ) -> Result<(), StorageError> { + Ok(()) + } + + fn increment_num_edges(&self) -> u32 { + self.num_edges.fetch_add(1, atomic::Ordering::Relaxed) + } + + fn has_edge( + &self, + edge_pos: LocalPOS, + layer_id: LayerId, + locked_head: impl Deref, + ) -> bool { + locked_head.has_edge(edge_pos, layer_id) + } + + fn immut_has_edge(&self, _edge_pos: LocalPOS, _layer_id: LayerId) -> bool { + false + } + + fn get_edge( + &self, + edge_pos: LocalPOS, + layer_id: LayerId, + locked_head: impl Deref, + ) -> Option<(VID, VID)> { + locked_head.get_edge(edge_pos, layer_id) + } + + fn entry<'a>(&'a self, edge_pos: LocalPOS) -> Self::Entry<'a> { + MemEdgeEntry::new(edge_pos, self.head()) + } + + fn layer_entry<'a>( + &'a self, + edge_pos: LocalPOS, + layer_id: LayerId, + locked_head: Option>, + ) -> Option> { + locked_head.and_then(|locked_head| { + let layer = locked_head.as_ref().get(layer_id.0)?; + layer + .has_item(edge_pos) + .then(|| MemEdgeEntry::new(edge_pos, locked_head)) + }) + } + + fn locked(self: &Arc) -> Self::ArcLockedSegment { + ArcLockedSegmentView { + inner: self.head_arc(), + num_edges: self.num_edges(), + } + } + + fn vacuum( + &self, + _locked_head: impl DerefMut, + ) -> Result<(), StorageError> { + Ok(()) + } + + fn immut_lsn(&self) -> LSN { + 0 + } + + fn flush(&self) -> Result<(), StorageError> { + Ok(()) + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::{ + Config, + pages::{edge_page::writer::EdgeWriter, layer_counter::GraphStats}, + persist::strategy::NoOpStrategy, + }; + use raphtory_api::core::entities::properties::{ + meta::{Meta, STATIC_GRAPH_LAYER_ID}, + prop::{Prop, PropType}, + }; + use raphtory_core::storage::timeindex::EventTime; + + fn create_test_segment() -> MemEdgeSegment { + let meta = Arc::new(Meta::default()); + MemEdgeSegment::new(1, 100, meta, Arc::new(AtomicUsize::new(0))) + } + + #[test] + fn test_insert_edge_internal_baseline() { + let mut segment = create_test_segment(); + + // Insert a few edges using insert_edge_internal + segment.insert_edge_internal( + EventTime::new(1, 0), + LocalPOS(0), + VID(1), + VID(2), + LayerId(0), + vec![(0, Prop::from("test1"))], + ); + + segment.insert_edge_internal( + EventTime::new(2, 1), + LocalPOS(1), + VID(3), + VID(4), + LayerId(0), + vec![(0, Prop::from("test2"))], + ); + + segment.insert_edge_internal( + EventTime::new(3, 2), + LocalPOS(2), + VID(5), + VID(6), + LayerId(0), + vec![(0, Prop::from("test3"))], + ); + + // Verify edges exist + assert!(segment.has_edge(LocalPOS(0), LayerId(0))); + assert!(segment.has_edge(LocalPOS(1), LayerId(0))); + assert!(segment.has_edge(LocalPOS(2), LayerId(0))); + + // Verify edge data + assert_eq!( + segment.get_edge(LocalPOS(0), LayerId(0)), + Some((VID(1), VID(2))) + ); + assert_eq!( + segment.get_edge(LocalPOS(1), LayerId(0)), + Some((VID(3), VID(4))) + ); + assert_eq!( + segment.get_edge(LocalPOS(2), LayerId(0)), + Some((VID(5), VID(6))) + ); + + // Verify time length increased + assert_eq!(segment.t_len(), 3); + } + + #[test] + fn est_size_changes() { + let meta = Arc::new(Meta::default()); + let ext = NoOpStrategy::new(Config::default(), None).unwrap(); + let stats = GraphStats::new(); + let segment = EdgeSegmentView::new(1, meta.clone(), None, ext.clone()); + let head = segment.head_mut(); + let mut writer = EdgeWriter::new(&stats, &segment, head); + assert_eq!(writer.writer.est_size(), 0); + writer.add_edge( + EventTime::new(1, 0), + LocalPOS(0), + VID(1), + VID(2), + vec![(0, Prop::from("test"))], + STATIC_GRAPH_LAYER_ID, + ); + + let est_size1 = writer.writer.est_size(); + + assert!(est_size1 > 0); + writer.delete_edge( + EventTime::new(2, 3), + LocalPOS(0), + VID(5), + VID(3), + STATIC_GRAPH_LAYER_ID, + ); + + let est_size2 = writer.writer.est_size(); + + assert!( + est_size2 > est_size1, + "Expected size to increase after deletion, but it did not." + ); + + // same edge insertion again to check size increase + writer.add_edge( + EventTime::new(3, 0), + LocalPOS(1), + VID(4), + VID(6), + vec![(0, Prop::from("test2"))], + STATIC_GRAPH_LAYER_ID, + ); + + let est_size3 = writer.writer.est_size(); + assert!( + est_size3 > est_size2, + "Expected size to increase after re-insertion, but it did not." + ); + + // Insert a static edge + writer.add_static_edge(Some(LocalPOS(1)), 4, 6, false); + + let est_size4 = writer.writer.est_size(); + assert_eq!( + est_size4, est_size3, + "Expected size to remain the same after static edge insertion, but it changed." + ); + + let prop_id = meta + .metadata_mapper() + .get_or_create_and_validate("a", PropType::U8) + .unwrap() + .inner(); + + writer.update_c_props( + LocalPOS(1), + VID(4), + VID(6), + STATIC_GRAPH_LAYER_ID, + [(prop_id, Prop::U8(2))], + ); + + let est_size5 = writer.writer.est_size(); + assert!( + est_size5 > est_size4, + "Expected size to increase after updating properties, but it did not." + ); + + // update const properties for the other edge, hard to predict size change + // segment.update_const_properties(LocalPOS(0), 1, 2, 0, [(prop_id, Prop::U8(3))]); + + // let est_size6 = segment.est_size(); + // assert!( + // est_size6 > est_size5, + // "Expected size to increase after updating properties for the other edge, but it did not." + // ); + + drop(writer); + // global size should be the last size of the writer after drop + assert_eq!(ext.estimated_size(), est_size5); + drop(segment); + // global size should be 0 after the segment is dropped + assert_eq!(ext.estimated_size(), 0); + } +} diff --git a/db4-storage/src/segments/graph_prop/entry.rs b/db4-storage/src/segments/graph_prop/entry.rs new file mode 100644 index 0000000000..2fa04494a6 --- /dev/null +++ b/db4-storage/src/segments/graph_prop/entry.rs @@ -0,0 +1,79 @@ +use crate::{ + GraphTProps, + api::graph_props::{GraphPropEntryOps, GraphPropRefOps}, + generic_t_props::WithTProps, + segments::graph_prop::segment::MemGraphPropSegment, +}; +use parking_lot::RwLockReadGuard; +use raphtory_api::core::entities::{LayerId, properties::prop::Prop}; +use raphtory_core::entities::properties::tprop::TPropCell; +use std::ops::Deref; + +/// A borrowed view enabling read operations on an in-memory graph segment. +pub struct MemGraphPropEntry<'a> { + mem: RwLockReadGuard<'a, MemGraphPropSegment>, +} + +impl<'a> MemGraphPropEntry<'a> { + pub fn new(mem: RwLockReadGuard<'a, MemGraphPropSegment>) -> Self { + Self { mem } + } +} + +impl<'a> GraphPropEntryOps<'a> for MemGraphPropEntry<'a> { + type Ref<'b> + = MemGraphPropRef<'b> + where + 'a: 'b; + + fn as_ref<'b>(&'b self) -> Self::Ref<'b> + where + 'a: 'b, + { + MemGraphPropRef { + mem: self.mem.deref(), + } + } +} + +/// A lightweight, copyable reference to graph properties. +#[derive(Copy, Clone, Debug)] +pub struct MemGraphPropRef<'a> { + mem: &'a MemGraphPropSegment, +} + +impl<'a> MemGraphPropRef<'a> { + pub fn new(mem: &'a MemGraphPropSegment) -> Self { + Self { mem } + } +} + +impl<'a> WithTProps<'a> for MemGraphPropRef<'a> { + type TProp = TPropCell<'a>; + + fn num_layers(&self) -> usize { + // TODO: Support multiple layers for graph props. + 1 + } + + fn into_t_props( + self, + _layer_id: LayerId, + prop_id: usize, + ) -> impl Iterator + Send + Sync + 'a { + // TODO: Support multiple layers for graph props. + self.mem.get_temporal_prop(prop_id).into_iter() + } +} + +impl<'a> GraphPropRefOps<'a> for MemGraphPropRef<'a> { + type TProps = GraphTProps<'a>; + + fn get_temporal_prop(self, prop_id: usize) -> Self::TProps { + GraphTProps::new_with_layer(self, MemGraphPropSegment::DEFAULT_LAYER, prop_id) + } + + fn get_metadata(self, prop_id: usize) -> Option { + self.mem.get_metadata(prop_id) + } +} diff --git a/db4-storage/src/segments/graph_prop/mod.rs b/db4-storage/src/segments/graph_prop/mod.rs new file mode 100644 index 0000000000..ec97be2422 --- /dev/null +++ b/db4-storage/src/segments/graph_prop/mod.rs @@ -0,0 +1,101 @@ +pub mod entry; +pub mod segment; + +use crate::{ + api::graph_props::GraphPropSegmentOps, + error::StorageError, + persist::strategy::PersistenceStrategy, + segments::graph_prop::{entry::MemGraphPropEntry, segment::MemGraphPropSegment}, + wal::LSN, +}; +use parking_lot::{RwLock, RwLockReadGuard, RwLockWriteGuard}; +use raphtory_api::core::entities::properties::meta::Meta; +use std::{ + path::Path, + sync::{ + Arc, + atomic::{AtomicBool, AtomicUsize, Ordering}, + }, +}; + +/// `GraphPropSegmentView` manages graph temporal properties and graph metadata +/// (constant properties). Reads / writes are always served from the in-memory segment. +#[derive(Debug)] +pub struct GraphPropSegmentView { + /// In-memory segment that contains the latest graph properties + /// and graph metadata writes. + head: Arc>, + + /// Estimated size of the segment in bytes. + est_size: AtomicUsize, + + is_dirty: AtomicBool, + + _persistent: P, +} + +impl GraphPropSegmentOps for GraphPropSegmentView

{ + type Extension = P; + + type Entry<'a> = MemGraphPropEntry<'a>; + + fn new(meta: Arc, _path: Option<&Path>, ext: Self::Extension) -> Self { + Self { + head: Arc::new(RwLock::new(MemGraphPropSegment::new_with_meta(meta))), + est_size: AtomicUsize::new(0), + is_dirty: AtomicBool::new(false), + _persistent: ext, + } + } + + fn load( + _meta: Arc, + _path: impl AsRef, + _ext: Self::Extension, + ) -> Result { + Err(StorageError::GenericFailure( + "load not supported".to_string(), + )) + } + + fn head(&self) -> RwLockReadGuard<'_, MemGraphPropSegment> { + self.head.read() + } + + fn head_mut(&self) -> RwLockWriteGuard<'_, MemGraphPropSegment> { + self.head.write() + } + + fn entry(&self) -> Self::Entry<'_> { + let head = self.head.read(); + + MemGraphPropEntry::new(head) + } + + fn increment_est_size(&self, size: usize) { + self.est_size.fetch_add(size, Ordering::Relaxed); + } + + fn est_size(&self) -> usize { + self.est_size.load(Ordering::Relaxed) + } + + fn set_dirty(&self, dirty: bool) { + self.is_dirty.store(dirty, Ordering::Release); + } + + fn immut_lsn(&self) -> LSN { + 0 + } + + fn notify_write( + &self, + _mem_segment: &mut RwLockWriteGuard<'_, MemGraphPropSegment>, + ) -> Result<(), StorageError> { + Ok(()) + } + + fn flush(&self) -> Result<(), StorageError> { + Ok(()) + } +} diff --git a/db4-storage/src/segments/graph_prop/segment.rs b/db4-storage/src/segments/graph_prop/segment.rs new file mode 100644 index 0000000000..a0d5eb6d28 --- /dev/null +++ b/db4-storage/src/segments/graph_prop/segment.rs @@ -0,0 +1,162 @@ +use crate::{ + error::StorageError, + segments::{HasRow, SegmentContainer}, + wal::LSN, +}; +use raphtory_api::core::entities::{ + LayerId, + properties::{ + meta::Meta, + prop::{AsPropRef, Prop}, + }, +}; +use raphtory_core::{ + entities::properties::tprop::TPropCell, + storage::timeindex::{AsTime, EventTime}, +}; +use std::sync::Arc; + +/// In-memory segment that contains graph temporal properties and graph metadata. +#[derive(Debug)] +pub struct MemGraphPropSegment { + /// Layers containing graph properties and metadata. + layers: Vec>, + lsn: LSN, +} + +/// A unit-like struct for use with `SegmentContainer`. +/// Graph properties and metadata are already stored in `SegmentContainer`, +/// hence this struct is empty. +#[derive(Debug, Default)] +pub struct UnitEntry(usize); + +// UnitEntry does not store data, but HasRow has to be implemented +// for SegmentContainer to work. +impl HasRow for UnitEntry { + fn row(&self) -> usize { + self.0 + } + + fn row_mut(&mut self) -> &mut usize { + &mut self.0 + } +} + +impl MemGraphPropSegment { + /// Graph segments only have a single row. + pub const DEFAULT_ROW: usize = 0; + + /// Graph segments are currently only written to a single layer. + pub const DEFAULT_LAYER: LayerId = LayerId(0); + + pub fn new_with_meta(meta: Arc) -> Self { + // Technically, these aren't used since there is always only one graph segment. + let segment_id = 0; + let max_page_len = 1; + + Self { + layers: vec![SegmentContainer::new(segment_id, max_page_len, meta)], + lsn: 0, + } + } + + pub fn get_or_create_layer(&mut self, layer_id: LayerId) -> &mut SegmentContainer { + let layer_id = layer_id.0; + if layer_id >= self.layers.len() { + let max_page_len = self.layers[0].max_page_len(); + let segment_id = self.layers[0].segment_id(); + let meta = self.layers[0].meta().clone(); + self.layers.resize_with(layer_id + 1, || { + SegmentContainer::new(segment_id, max_page_len, meta.clone()) + }); + } + &mut self.layers[layer_id] + } + + pub fn layers(&self) -> &Vec> { + &self.layers + } + + pub fn layers_mut(&mut self) -> &mut Vec> { + &mut self.layers + } + + pub fn is_empty(&self) -> bool { + self.layers.iter().all(|layer| layer.est_size() == 0) + } + + /// Replaces this segment with an empty instance, returning the old segment + /// with its data. + /// + /// The new segment will have the same number of layers as the original. + pub fn take(&mut self) -> Self { + let layers = self.layers.iter_mut().map(|layer| layer.take()).collect(); + + Self { + layers, + lsn: self.lsn, + } + } + + pub fn lsn(&self) -> LSN { + self.lsn + } + + pub fn set_lsn(&mut self, lsn: LSN) { + self.lsn = lsn; + } + + pub fn add_properties( + &mut self, + t: T, + props: impl IntoIterator, + ) -> usize { + let layer = self.get_or_create_layer(Self::DEFAULT_LAYER); + let est_size = layer.est_size(); + let ts = EventTime::new(t.t(), t.i()); + + layer.reserve_local_row(Self::DEFAULT_ROW.into()); + let mut prop_mut_entry = layer.properties_mut().get_mut_entry(Self::DEFAULT_ROW); + prop_mut_entry.append_t_props(ts, props); + + let layer_est_size = layer.est_size(); + layer_est_size - est_size + } + + pub fn check_metadata(&self, props: &[(usize, P)]) -> Result<(), StorageError> { + if let Some(layer) = self.layers.get(Self::DEFAULT_LAYER.0) { + layer.check_metadata(Self::DEFAULT_ROW.into(), props)?; + } + + Ok(()) + } + + pub fn update_metadata( + &mut self, + props: impl IntoIterator, + ) -> usize { + let segment_container = self.get_or_create_layer(Self::DEFAULT_LAYER); + let est_size = segment_container.est_size(); + + let row = segment_container + .reserve_local_row(Self::DEFAULT_ROW.into()) + .map(|a| a.row()); + let row = row.inner(); + let mut prop_mut_entry = segment_container.properties_mut().get_mut_entry(row); + prop_mut_entry.append_const_props(props); + + let layer_est_size = segment_container.est_size(); + // random estimate for constant properties + (layer_est_size - est_size) + 8 + } + + pub fn get_temporal_prop(&self, prop_id: usize) -> Option> { + let layer = &self.layers[Self::DEFAULT_LAYER.0]; + layer.t_prop(Self::DEFAULT_ROW, prop_id) + } + + pub fn get_metadata(&self, prop_id: usize) -> Option { + let layer = &self.layers[Self::DEFAULT_LAYER.0]; + layer.c_prop(Self::DEFAULT_ROW, prop_id) + } +} diff --git a/db4-storage/src/segments/mod.rs b/db4-storage/src/segments/mod.rs new file mode 100644 index 0000000000..222c8b2d71 --- /dev/null +++ b/db4-storage/src/segments/mod.rs @@ -0,0 +1,418 @@ +use super::properties::{PropEntry, Properties}; +use crate::{LocalPOS, error::StorageError}; +use raphtory_api::core::{ + entities::properties::{ + meta::Meta, + prop::{AsPropRef, Prop}, + }, + storage::dict_mapper::MaybeNew, +}; +use raphtory_core::{ + entities::{ + ELID, + properties::{tcell::TCell, tprop::TPropCell}, + }, + storage::timeindex::EventTime, +}; +use rayon::prelude::*; +use std::{ + fmt::{Debug, Formatter}, + iter, + sync::Arc, +}; + +pub mod edge; +pub mod graph_prop; +pub mod node; + +pub mod additions; + +pub type PageIndexT = u32; + +#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] +struct PageIndexEntry(PageIndexT); + +impl Default for PageIndexEntry { + fn default() -> Self { + PageIndexEntry(PageIndexT::MAX) + } +} + +impl PageIndexEntry { + fn index(self) -> Option { + (self.0 != PageIndexT::MAX).then_some(self.0 as usize) + } + + fn is_filled(self) -> bool { + self.0 != PageIndexT::MAX + } +} + +#[derive(Default)] +struct PageIndex(Vec); + +impl PageIndex { + fn get(&self, pos: LocalPOS) -> Option { + self.0.get(pos.as_index()).and_then(|index| index.index()) + } + + fn set(&mut self, pos: LocalPOS, index: PageIndexEntry) { + let pos_index = pos.as_index(); + if pos_index >= self.0.len() { + self.0.resize(pos_index + 1, PageIndexEntry::default()); + } + self.0[pos_index] = index; + } + + fn iter(&self) -> impl ExactSizeIterator> { + self.0.iter().map(|i| i.index()) + } + + fn filled_positions(&self) -> impl Iterator { + self.0 + .iter() + .enumerate() + .filter_map(|(i, p)| p.is_filled().then_some(LocalPOS::from(i))) + } + + fn par_iter(&self) -> impl IndexedParallelIterator> { + self.0.par_iter().map(|i| i.index()) + } +} + +#[derive(Default)] +struct SparseVec { + index: PageIndex, + data: Vec<(LocalPOS, T)>, + max_local_pos: Option, +} + +impl Debug for SparseVec { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.debug_list().entries(self.iter_filled()).finish() + } +} + +impl SparseVec { + fn get(&self, pos: LocalPOS) -> Option<&T> { + self.index + .get(pos) + .and_then(|i| self.data.get(i).map(|(_, x)| x)) + } + + fn is_filled(&self, pos: LocalPOS) -> bool { + self.index.get(pos).is_some() + } + + /// Iterator over filled positions. + /// + /// Note that this returns items in insertion order! + fn iter_filled(&self) -> impl Iterator { + self.data.iter().map(|(i, x)| (*i, x)) + } + + fn iter_all(&self) -> impl ExactSizeIterator> { + self.index.iter().map(|i| i.map(|i| &self.data[i].1)) + } + + fn max_local_pos(&self) -> Option { + self.max_local_pos + } + + fn num_filled(&self) -> usize { + self.data.len() + } +} + +impl SparseVec { + /// Parallel iterator over filled positions. + /// + /// Note that this returns items in insertion order! + fn par_iter_filled(&self) -> impl IndexedParallelIterator { + self.data.par_iter().map(|(i, x)| (*i, x)) + } + fn par_iter_all(&self) -> impl IndexedParallelIterator> { + self.index.par_iter().map(|i| i.map(|i| &self.data[i].1)) + } +} + +impl SparseVec { + fn get_or_new(&mut self, pos: LocalPOS) -> MaybeNew<&mut T> { + match self.index.get(pos) { + None => { + let next_index = self.data.len(); + self.data.push((pos, T::default())); + let new_entry = &mut self.data[next_index].1; + *new_entry.row_mut() = next_index; + self.index.set(pos, PageIndexEntry(next_index as u32)); + self.max_local_pos = self.max_local_pos.max(Some(pos)); + MaybeNew::New(new_entry) + } + Some(i) => MaybeNew::Existing(&mut self.data[i].1), + } + } +} + +#[derive(Debug)] +pub struct SegmentContainer { + segment_id: usize, + data: SparseVec, + max_page_len: u32, + properties: Properties, + meta: Arc, +} + +pub trait HasRow: Default + Send + Sync + Sized { + fn row(&self) -> usize; + + fn row_mut(&mut self) -> &mut usize; +} + +impl SegmentContainer { + pub fn new(segment_id: usize, max_page_len: u32, meta: Arc) -> Self { + assert!(max_page_len > 0, "max_page_len must be greater than 0"); + + Self { + segment_id, + data: Default::default(), + max_page_len, + properties: Default::default(), + meta, + } + } + + /// Replaces this container with an empty instance, returning the + /// old container with its data. + pub fn take(&mut self) -> Self { + std::mem::replace( + self, + Self::new(self.segment_id, self.max_page_len, self.meta.clone()), + ) + } + + #[inline] + pub fn est_size(&self) -> usize { + // TODO: this is a rough estimate and should be improved + let data_size = + (self.data.num_filled() as f64 * std::mem::size_of::() as f64 * 1.5) as usize; // Estimate size of data + let timestamp_size = std::mem::size_of::(); + (self.properties.additions_count * timestamp_size) + + data_size + + self.t_prop_est_size() + + self.c_prop_est_size() + } + + pub fn get(&self, item_pos: LocalPOS) -> Option<&T> { + self.data.get(item_pos) + } + + pub fn has_item(&self, item_pos: LocalPOS) -> bool { + self.data.is_filled(item_pos) + } + + pub fn max_page_len(&self) -> u32 { + self.max_page_len + } + + pub fn max_rows(&self) -> usize { + self.data.max_local_pos().map(|pos| pos.0 + 1).unwrap_or(0) as usize + } + + pub fn is_full(&self) -> bool { + self.data.num_filled() == self.max_page_len() as usize + } + + pub fn t_len(&self) -> usize { + self.properties.t_len() + } + + /// Reserves a local row for the given item position. + /// If the item position already exists, it returns a mutable reference to the existing item. + /// Left variant indicates that the item was already present, + /// Right variant indicates that a new item was created. + pub(crate) fn reserve_local_row(&mut self, item_pos: LocalPOS) -> MaybeNew<&mut T> { + self.data.get_or_new(item_pos) + } + + #[inline] + pub fn t_prop_est_size(&self) -> usize { + let row_size = self.meta.temporal_est_row_size(); + let row_count = self.properties.t_len(); + + row_size * row_count + } + + pub(crate) fn c_prop_est_size(&self) -> usize { + self.meta.const_est_row_size() * self.len() as usize + } + + pub fn properties(&self) -> &Properties { + &self.properties + } + + pub fn properties_mut(&mut self) -> &mut Properties { + &mut self.properties + } + + pub fn check_metadata( + &self, + local_pos: LocalPOS, + props: &[(usize, P)], + ) -> Result<(), StorageError> { + if let Some(item) = self.get(local_pos) { + let local_row = item.row(); + let prop_entry = self.properties().get_entry(local_row); + + for (prop_id, prop_val) in props { + prop_entry.check_metadata(*prop_id, prop_val.as_prop_ref())?; + } + } + + Ok(()) + } + + pub fn meta(&self) -> &Arc { + &self.meta + } + + pub fn filled_positions(&self) -> impl Iterator { + self.data.index.filled_positions() + } + + pub fn filled_positions_par(&self) -> impl ParallelIterator { + self.data.par_iter_filled().map(|(i, _)| i) + } + + #[inline(always)] + pub fn segment_id(&self) -> usize { + self.segment_id + } + + pub fn len(&self) -> u32 { + self.data.data.len() as u32 + } + + pub fn is_empty(&self) -> bool { + self.data.data.is_empty() + } + + /// returns items in insertion order! + pub fn row_entries(&self) -> impl Iterator)> { + self.data + .iter_filled() + .map(|(l_pos, entry)| (l_pos, entry, self.properties().get_entry(entry.row()))) + } + + /// return filled entries ordered by index + pub fn row_entries_ordered(&self) -> impl Iterator)> { + self.all_entries().filter_map(|(pos, entry)| { + let (v, row) = entry?; + Some((pos, v, row)) + }) + } + + pub fn all_entries(&self) -> impl Iterator)>)> { + let max_local_pos = self.data.max_local_pos().map(|p| p.0 as usize).unwrap_or(0); + self.data + .iter_all() + .chain(iter::repeat(None)) + .take(max_local_pos + 1) + .enumerate() + .map(|(i, v)| { + ( + LocalPOS::from(i), + v.map(|v| (v, self.properties().get_entry(v.row()))), + ) + }) + } + + pub fn all_entries_par( + &self, + ) -> impl ParallelIterator)>)> + '_ { + self.data.par_iter_all().enumerate().map(|(i, v)| { + ( + LocalPOS::from(i), + v.map(|entry| (entry, self.properties().get_entry(entry.row()))), + ) + }) + } + + pub fn earliest(&self) -> Option { + self.properties.earliest() + } + + pub fn latest(&self) -> Option { + self.properties.latest() + } + + pub fn temporal_index(&self) -> Vec { + self.row_entries_ordered() + .flat_map(|(_, mp, _)| { + let row = mp.row(); + self.properties() + .times_from_props(row) + .into_iter() + .flat_map(|entry| entry.iter()) + .filter_map(|(_, &v)| v) + }) + .collect::>() + } + + pub fn t_prop(&self, item_id: impl Into, prop_id: usize) -> Option> { + let item_id = item_id.into(); + self.data.get(item_id).and_then(|entry| { + let prop_entry = self.properties.get_entry(entry.row()); + prop_entry.prop(prop_id) + }) + } + + pub fn t_prop_rows(&self, item_id: impl Into) -> &TCell> { + let item_id = item_id.into(); + self.data + .get(item_id) + .map(|entry| { + let prop_entry = self.properties.get_entry(entry.row()); + prop_entry.t_cell() + }) + .unwrap_or(&TCell::Empty) + } + + pub fn c_prop(&self, item_id: impl Into, prop_id: usize) -> Option { + let item_id = item_id.into(); + self.data.get(item_id).and_then(|entry| { + let prop_entry = self.properties.c_column(prop_id)?; + prop_entry.get(entry.row()) + }) + } + + pub fn c_prop_str(&self, item_id: impl Into, prop_id: usize) -> Option<&str> { + let item_id = item_id.into(); + self.data.get(item_id).and_then(|entry| { + let prop_entry = self.properties.c_column(prop_id)?; + prop_entry + .get_ref(entry.row()) + .and_then(|prop| prop.as_str()) + }) + } + + pub fn additions(&self, item_pos: LocalPOS) -> &TCell { + self.data + .get(item_pos) + .and_then(|entry| self.properties.additions(entry.row())) + .unwrap_or(&TCell::Empty) + } + + pub fn deletions(&self, item_pos: LocalPOS) -> &TCell { + self.data + .get(item_pos) + .and_then(|entry| self.properties.deletions(entry.row())) + .unwrap_or(&TCell::Empty) + } + + pub fn times_from_props(&self, item_pos: LocalPOS) -> &TCell> { + self.data + .get(item_pos) + .and_then(|entry| self.properties.times_from_props(entry.row())) + .unwrap_or(&TCell::Empty) + } +} diff --git a/db4-storage/src/segments/node/entry.rs b/db4-storage/src/segments/node/entry.rs new file mode 100644 index 0000000000..7d0bc872f0 --- /dev/null +++ b/db4-storage/src/segments/node/entry.rs @@ -0,0 +1,264 @@ +use crate::{ + LocalPOS, NodeEdgeAdditions, NodePropAdditions, NodeTProps, + api::nodes::{NodeEntryOps, NodeRefOps}, + gen_ts::{EdgeAdditionCellsRef, LayerIter, PropAdditionCellsRef, WithTimeCells}, + generic_t_props::WithTProps, + segments::{additions::MemAdditions, node::segment::MemNodeSegment}, +}; +use itertools::Itertools; +use raphtory_api::core::{ + Direction, + entities::{ + EID, LayerId, VID, + properties::{ + meta::{Meta, STATIC_GRAPH_LAYER_ID}, + prop::Prop, + }, + }, +}; +use raphtory_core::{ + entities::{LayerIds, edges::edge_ref::EdgeRef, properties::tprop::TPropCell}, + storage::timeindex::{EventTime, TimeIndexOps}, +}; +use std::{ops::Deref, sync::Arc}; + +pub struct MemNodeEntry<'a, MNS> { + pos: LocalPOS, + ns: MNS, + __marker: std::marker::PhantomData<&'a ()>, +} + +impl<'a, MNS: Deref> MemNodeEntry<'a, MNS> { + pub fn new(pos: LocalPOS, ns: MNS) -> Self { + Self { + pos, + ns, + __marker: std::marker::PhantomData, + } + } +} + +impl<'a, MNS: Deref + Send + Sync + 'a> NodeEntryOps<'a> + for MemNodeEntry<'a, MNS> +{ + type Ref<'b> + = MemNodeRef<'b> + where + 'a: 'b, + MNS: 'b; + + fn as_ref<'b>(&'b self) -> Self::Ref<'b> + where + 'a: 'b, + { + MemNodeRef { + pos: self.pos, + ns: self.ns.deref(), + } + } +} + +#[derive(Copy, Clone, Debug)] +pub struct MemNodeRef<'a> { + pos: LocalPOS, + ns: &'a MemNodeSegment, +} + +impl<'a> MemNodeRef<'a> { + pub fn new(pos: LocalPOS, ns: &'a MemNodeSegment) -> Self { + Self { pos, ns } + } +} + +impl<'a> WithTimeCells<'a> for MemNodeRef<'a> { + type TimeCell = MemAdditions<'a>; + + fn t_props_tc( + self, + layer_id: LayerId, + range: Option<(EventTime, EventTime)>, + ) -> impl Iterator + 'a { + self.ns + .as_ref() + .get(layer_id.0) + .map(|seg| MemAdditions::Props(seg.times_from_props(self.pos))) + .into_iter() + .map(move |t_cell| { + range + .map(|(start, end)| t_cell.range(start..end)) + .unwrap_or_else(|| t_cell) + }) + } + + fn additions_tc( + self, + layer_id: LayerId, + range: Option<(EventTime, EventTime)>, + ) -> impl Iterator + 'a { + self.ns + .as_ref() + .get(layer_id.0) + .map(|seg| MemAdditions::Edges(seg.additions(self.pos))) + .into_iter() + .map(move |t_cell| { + range + .map(|(start, end)| t_cell.range(start..end)) + .unwrap_or_else(|| t_cell) + }) + } + + fn deletions_tc( + self, + layer_id: LayerId, + range: Option<(EventTime, EventTime)>, + ) -> impl Iterator + 'a { + self.ns + .as_ref() + .get(layer_id.0) + .map(|seg| MemAdditions::Edges(seg.deletions(self.pos))) + .into_iter() + .map(move |t_cell| { + range + .map(|(start, end)| t_cell.range(start..end)) + .unwrap_or_else(|| t_cell) + }) + } + + fn num_layers(&self) -> usize { + self.ns.as_ref().len() + } +} + +impl<'a> WithTProps<'a> for MemNodeRef<'a> { + type TProp = TPropCell<'a>; + + fn num_layers(&self) -> usize { + self.ns.as_ref().len() + } + + fn into_t_props( + self, + layer_id: LayerId, + prop_id: usize, + ) -> impl Iterator + 'a { + let node_pos = self.pos; + self.ns + .as_ref() + .get(layer_id.0) + .and_then(|layer| layer.t_prop(node_pos, prop_id)) + .into_iter() + } +} + +impl<'a> NodeRefOps<'a> for MemNodeRef<'a> { + type Additions = NodePropAdditions<'a>; + type EdgeAdditions = NodeEdgeAdditions<'a>; + type TProps = NodeTProps<'a>; + + fn node_meta(&self) -> &Arc { + self.ns.node_meta() + } + + fn vid(&self) -> VID { + self.ns.to_vid(self.pos) + } + + fn out_edges(self, layer_id: LayerId) -> impl Iterator + 'a { + self.ns.out_edges(self.pos, layer_id) + } + + fn inb_edges(self, layer_id: LayerId) -> impl Iterator + 'a { + self.ns.inb_edges(self.pos, layer_id) + } + + fn out_edges_sorted(self, layer_id: LayerId) -> impl Iterator + 'a { + self.ns.out_edges(self.pos, layer_id) + } + + fn inb_edges_sorted(self, layer_id: LayerId) -> impl Iterator + 'a { + self.ns.inb_edges(self.pos, layer_id) + } + + fn c_prop(self, layer_id: LayerId, prop_id: usize) -> Option { + self.ns + .as_ref() + .get(layer_id.0) + .and_then(|layer| layer.c_prop(self.pos, prop_id)) + } + + fn c_prop_str(self, layer_id: LayerId, prop_id: usize) -> Option<&'a str> { + self.ns + .as_ref() + .get(layer_id.0) + .and_then(|layer| layer.c_prop_str(self.pos, prop_id)) + } + + fn node_additions>>(self, layer_id: L) -> Self::Additions { + NodePropAdditions::new_with_layer(PropAdditionCellsRef::new(self), layer_id) + } + + fn edge_additions>>(self, layer_id: L) -> Self::EdgeAdditions { + NodeEdgeAdditions::new_with_layer(EdgeAdditionCellsRef::new(self), layer_id) + } + + fn degree(self, layers: &LayerIds, dir: Direction) -> usize { + match layers { + LayerIds::One(layer_id) => self.ns.degree(self.pos, *layer_id, dir), + LayerIds::All => self.ns.degree(self.pos, STATIC_GRAPH_LAYER_ID, dir), + LayerIds::None => 0, + LayerIds::Multiple(ids) => match dir { + Direction::OUT => ids + .iter() + .map(|id| self.out_nbrs_sorted(id)) + .kmerge() + .dedup() + .count(), + Direction::IN => ids + .iter() + .map(|id| self.inb_nbrs_sorted(id)) + .kmerge() + .dedup() + .count(), + Direction::BOTH => ids + .iter() + .map(|id| { + self.out_nbrs_sorted(id) + .merge(self.inb_nbrs_sorted(id)) + .dedup() + }) + .kmerge() + .dedup() + .count(), + }, + } + } + + fn find_edge(&self, dst: VID, layers: &LayerIds) -> Option { + let eid = match layers { + LayerIds::One(layer_id) => self.ns.get_out_edge(self.pos, dst, *layer_id), + LayerIds::All => self.ns.get_out_edge(self.pos, dst, STATIC_GRAPH_LAYER_ID), + LayerIds::Multiple(layers) => layers + .iter() + .find_map(|layer_id| self.ns.get_out_edge(self.pos, dst, layer_id)), + LayerIds::None => None, + }; + + let src_id = self.ns.to_vid(self.pos); + eid.map(|eid| EdgeRef::new_outgoing(eid, src_id, dst)) + } + + fn temporal_prop_layer(self, layer_id: LayerId, prop_id: usize) -> Self::TProps { + NodeTProps::new_with_layer(self, layer_id, prop_id) + } + + fn internal_num_layers(&self) -> usize { + self.ns.as_ref().len() + } + + fn has_layer_inner(self, layer_id: LayerId) -> bool { + self.ns + .as_ref() + .get(layer_id.0) + .is_some_and(|layer| layer.has_item(self.pos)) + } +} diff --git a/db4-storage/src/segments/node/mod.rs b/db4-storage/src/segments/node/mod.rs new file mode 100644 index 0000000000..d0b743bd85 --- /dev/null +++ b/db4-storage/src/segments/node/mod.rs @@ -0,0 +1,2 @@ +pub mod entry; +pub mod segment; diff --git a/db4-storage/src/segments/node/segment.rs b/db4-storage/src/segments/node/segment.rs new file mode 100644 index 0000000000..a0354bc8a8 --- /dev/null +++ b/db4-storage/src/segments/node/segment.rs @@ -0,0 +1,756 @@ +use crate::{ + LocalPOS, + api::nodes::{LockedNSSegment, NodeSegmentOps}, + error::StorageError, + loop_lock_write, + persist::{config::ConfigOps, strategy::PersistenceStrategy}, + segments::{ + HasRow, SegmentContainer, + node::entry::{MemNodeEntry, MemNodeRef}, + }, + wal::LSN, +}; +use either::Either; +use parking_lot::{RwLock, lock_api::ArcRwLockReadGuard}; +use raphtory_api::core::{ + Direction, + entities::{ + EID, LayerId, VID, + properties::{ + meta::Meta, + prop::{AsPropRef, Prop}, + }, + }, +}; +use raphtory_core::{ + entities::{ELID, nodes::structure::adj::Adj}, + storage::timeindex::{AsTime, EventTime}, +}; +use std::{ + ops::{Deref, DerefMut}, + path::PathBuf, + sync::{ + Arc, + atomic::{AtomicU32, AtomicUsize, Ordering}, + }, +}; + +#[derive(Debug)] +pub struct MemNodeSegment { + segment_id: usize, + max_page_len: u32, + layers: Vec>, + global_mem_tracker: Arc, + est_size: usize, + lsn: LSN, +} + +impl Drop for MemNodeSegment { + fn drop(&mut self) { + self.global_mem_tracker + .fetch_sub(self.est_size, Ordering::Relaxed); + } +} + +#[derive(Debug, Default, serde::Serialize)] +pub struct AdjEntry { + row: usize, + adj: Adj, +} + +impl AdjEntry { + pub fn degree(&self, d: Direction) -> usize { + self.adj.degree(d) + } + + pub fn edges(&self, d: Direction) -> impl Iterator + '_ { + match d { + Direction::IN => Either::Left(self.adj.inb_iter()), + Direction::OUT => Either::Right(self.adj.out_iter()), + Direction::BOTH => panic!("AdjEntry::edges: BOTH direction is not supported"), + } + } +} + +impl HasRow for AdjEntry { + fn row(&self) -> usize { + self.row + } + + fn row_mut(&mut self) -> &mut usize { + &mut self.row + } +} + +impl AsRef<[SegmentContainer]> for MemNodeSegment { + fn as_ref(&self) -> &[SegmentContainer] { + &self.layers + } +} + +impl AsMut<[SegmentContainer]> for MemNodeSegment { + fn as_mut(&mut self) -> &mut [SegmentContainer] { + &mut self.layers + } +} + +impl MemNodeSegment { + pub fn segment_id(&self) -> usize { + self.segment_id + } + + pub fn est_size(&self) -> usize { + self.est_size + } + + pub(crate) fn increment_global_est_size(&self, increment: usize) { + self.global_mem_tracker + .fetch_add(increment, Ordering::Relaxed); + } + + pub(crate) fn increment_est_size(&mut self, increment: usize) { + self.est_size += increment; + } + + pub fn swap_out_layers(&mut self) -> Vec> { + self.layers + .iter_mut() + .map(|head_guard| { + let mut old_head = SegmentContainer::new( + head_guard.segment_id(), + head_guard.max_page_len(), + head_guard.meta().clone(), + ); + std::mem::swap(&mut *head_guard, &mut old_head); + old_head + }) + .collect::>() + } + + pub fn get_or_create_layer(&mut self, layer_id: LayerId) -> &mut SegmentContainer { + let layer_id = layer_id.0; + if layer_id >= self.layers.len() { + let max_page_len = self.layers[0].max_page_len(); + let segment_id = self.layers[0].segment_id(); + let meta = self.layers[0].meta().clone(); + + self.layers.resize_with(layer_id + 1, || { + SegmentContainer::new(segment_id, max_page_len, meta.clone()) + }); + } + + &mut self.layers[layer_id] + } + + pub fn node_meta(&self) -> &Arc { + self.layers[0].meta() + } + + pub fn get_layer(&self, layer_id: LayerId) -> Option<&SegmentContainer> { + self.layers.get(layer_id.0) + } + + pub fn degree(&self, n: LocalPOS, layer_id: LayerId, dir: Direction) -> usize { + self.get_adj(n, layer_id).map_or(0, |adj| adj.degree(dir)) + } + + pub fn lsn(&self) -> LSN { + self.lsn + } + + pub fn set_lsn(&mut self, lsn: LSN) { + if lsn > self.lsn { + self.lsn = lsn; + } + } + + /// Replaces this segment with an empty instance, returning the old segment + /// with its data. + /// + /// The new segment will have the same number of layers as the original. + pub fn take(&mut self) -> Self { + let layers = self.layers.iter_mut().map(|layer| layer.take()).collect(); + let est_size = self.est_size; + self.est_size = 0; + Self { + segment_id: self.segment_id, + max_page_len: self.max_page_len, + est_size, + global_mem_tracker: self.global_mem_tracker.clone(), + layers, + lsn: self.lsn, + } + } + + pub fn to_vid(&self, pos: LocalPOS) -> VID { + pos.as_vid(self.segment_id, self.max_page_len) + } + + #[inline(always)] + fn get_adj(&self, n: LocalPOS, layer_id: LayerId) -> Option<&Adj> { + let layer_id = layer_id.0; + self.layers + .get(layer_id)? + .get(n) + .map(|AdjEntry { adj, .. }| adj) + } + + pub fn has_node(&self, n: LocalPOS, layer_id: LayerId) -> bool { + let layer_id = layer_id.0; + self.layers + .get(layer_id) + .is_some_and(|layer| layer.has_item(n)) + } + + pub fn get_out_edge(&self, n: LocalPOS, dst: VID, layer_id: LayerId) -> Option { + self.get_adj(n, layer_id) + .and_then(|adj| adj.get_edge(dst, Direction::OUT)) + } + + pub fn get_inb_edge(&self, n: LocalPOS, src: VID, layer_id: LayerId) -> Option { + self.get_adj(n, layer_id) + .and_then(|adj| adj.get_edge(src, Direction::IN)) + } + + pub fn out_edges( + &self, + n: LocalPOS, + layer_id: LayerId, + ) -> impl Iterator + '_ { + self.get_adj(n, layer_id) + .into_iter() + .flat_map(|adj| adj.out_iter()) + } + + pub fn inb_edges( + &self, + n: LocalPOS, + layer_id: LayerId, + ) -> impl Iterator + '_ { + self.get_adj(n, layer_id) + .into_iter() + .flat_map(|adj| adj.inb_iter()) + } + + pub fn new( + segment_id: usize, + max_page_len: u32, + meta: Arc, + global_mem_tracker: Arc, + ) -> Self { + Self { + segment_id, + max_page_len, + layers: vec![SegmentContainer::new(segment_id, max_page_len, meta)], + global_mem_tracker, + est_size: 0, + lsn: 0, + } + } + + pub fn add_outbound_edge( + &mut self, + t: Option, + src_pos: LocalPOS, + dst: impl Into, + e_id: impl Into, + ) -> (bool, usize) { + let dst = dst.into(); + let e_id = e_id.into(); + let layer_id = e_id.layer(); + let layer = self.get_or_create_layer(layer_id); + let est_size = layer.est_size(); + + let add_out = layer.reserve_local_row(src_pos); + let new_entry = add_out.is_new(); + let add_out = add_out.inner(); + let is_new_edge = add_out.adj.add_edge_out(dst, e_id.edge); + let row = add_out.row; + if let Some(t) = t { + self.update_timestamp_inner(t, row, e_id); + } + let layer_est_size = self.layers[layer_id.0].est_size(); + let added_size = (layer_est_size - est_size) + + (is_new_edge as usize * std::mem::size_of::<(VID, VID)>()); + (new_entry, added_size) + } + + pub fn add_inbound_edge( + &mut self, + t: Option, + dst_pos: impl Into, + src: impl Into, + e_id: impl Into, + ) -> (bool, usize) { + let src = src.into(); + let e_id = e_id.into(); + let layer_id = e_id.layer(); + let dst_pos = dst_pos.into(); + + let layer = self.get_or_create_layer(layer_id); + let est_size = layer.est_size(); + + let add_in = layer.reserve_local_row(dst_pos); + let new_entry = add_in.is_new(); + let add_in = add_in.inner(); + let is_new_edge = add_in.adj.add_edge_into(src, e_id.edge); + let row = add_in.row; + + if let Some(t) = t { + self.update_timestamp_inner(t, row, e_id); + } + let layer_est_size = self.layers[layer_id.0].est_size(); + let added_size = (layer_est_size - est_size) + + (is_new_edge as usize * std::mem::size_of::<(VID, VID)>()); + (new_entry, added_size) + } + + fn update_timestamp_inner(&mut self, t: T, row: usize, e_id: ELID) { + let mut prop_mut_entry = self.layers[e_id.layer().0] + .properties_mut() + .get_mut_entry(row); + let ts = EventTime::new(t.t(), t.i()); + + prop_mut_entry.addition_timestamp(ts, e_id); + } + + pub fn update_timestamp(&mut self, t: T, node_pos: LocalPOS, e_id: ELID) -> usize { + let layer_id = e_id.layer(); + let (est_size, row) = { + let segment_container = self.get_or_create_layer(layer_id); //&mut self.layers[e_id.layer()]; + let est_size = segment_container.est_size(); + let row = segment_container.reserve_local_row(node_pos).inner().row(); + (est_size, row) + }; + self.update_timestamp_inner(t, row, e_id); + let layer_est_size = self.layers[layer_id.0].est_size(); + layer_est_size - est_size + } + + pub fn add_props( + &mut self, + t: T, + node_pos: LocalPOS, + layer_id: LayerId, + props: impl IntoIterator, + ) -> (bool, usize) { + let layer = self.get_or_create_layer(layer_id); + let est_size = layer.est_size(); + let row = layer.reserve_local_row(node_pos); + let is_new = row.is_new(); + let row = row.inner().row; + let mut prop_mut_entry = layer.properties_mut().get_mut_entry(row); + let ts = EventTime::new(t.t(), t.i()); + prop_mut_entry.append_t_props(ts, props); + let layer_est_size = layer.est_size(); + (is_new, layer_est_size - est_size) + } + + pub fn check_metadata( + &self, + node_pos: LocalPOS, + layer_id: LayerId, + props: &[(usize, P)], + ) -> Result<(), StorageError> { + if let Some(layer) = self.layers.get(layer_id.0) { + layer.check_metadata(node_pos, props)?; + } + Ok(()) + } + + pub fn update_metadata( + &mut self, + node_pos: LocalPOS, + layer_id: LayerId, + props: impl IntoIterator, + ) -> (bool, usize) { + let segment_container = self.get_or_create_layer(layer_id); + let est_size = segment_container.est_size(); + + let row = segment_container.reserve_local_row(node_pos).map(|a| a.row); + let is_new = row.is_new(); + let row = row.inner(); + let mut prop_mut_entry = segment_container.properties_mut().get_mut_entry(row); + prop_mut_entry.append_const_props(props); + + let layer_est_size = segment_container.est_size(); + let added_size = (layer_est_size - est_size) + 8; // random estimate for constant properties + (is_new, added_size) + } + + pub fn get_metadata( + &self, + node_pos: LocalPOS, + layer_id: LayerId, + prop_id: usize, + ) -> Option { + let segment_container = &self.layers[layer_id.0]; + segment_container.c_prop(node_pos, prop_id) + } + + pub fn latest(&self) -> Option { + Iterator::max(self.layers.iter().filter_map(|seg| seg.latest())) + } + + pub fn earliest(&self) -> Option { + Iterator::min(self.layers.iter().filter_map(|seg| seg.earliest())) + } + + pub fn t_len(&self) -> usize { + self.layers.iter().map(|seg| seg.t_len()).sum() + } + + pub fn node_ref(&self, pos: LocalPOS) -> MemNodeRef<'_> { + MemNodeRef::new(pos, self) + } + + pub fn max_page_len(&self) -> u32 { + self.max_page_len + } +} + +#[derive(Debug)] +pub struct NodeSegmentView { + inner: Arc>, + segment_id: usize, + max_num_node: AtomicU32, + _ext: EXT, +} + +#[derive(Debug)] +pub struct ArcLockedSegmentView { + inner: ArcRwLockReadGuard, + num_nodes: u32, +} + +impl ArcLockedSegmentView { + pub fn new( + inner: ArcRwLockReadGuard, + num_nodes: u32, + ) -> Self { + Self { inner, num_nodes } + } +} + +impl LockedNSSegment for ArcLockedSegmentView { + type EntryRef<'a> = MemNodeRef<'a>; + + fn num_nodes(&self) -> u32 { + self.num_nodes + } + + fn entry_ref<'a>(&'a self, pos: impl Into) -> Self::EntryRef<'a> { + let pos = pos.into(); + MemNodeRef::new(pos, &self.inner) + } +} + +impl>> NodeSegmentOps for NodeSegmentView

{ + type Extension = P; + + type Entry<'a> = MemNodeEntry<'a, parking_lot::RwLockReadGuard<'a, MemNodeSegment>>; + + type ArcLockedSegment = ArcLockedSegmentView; + + fn latest(&self) -> Option { + self.head().latest() + } + + fn earliest(&self) -> Option { + self.head().latest() + } + + fn t_len(&self) -> usize { + self.head().t_len() + } + + fn load( + _segment_id: usize, + _node_meta: Arc, + _edge_meta: Arc, + _path: impl AsRef, + _ext: Self::Extension, + ) -> Result + where + Self: Sized, + { + Err(StorageError::GenericFailure( + "load not supported".to_string(), + )) + } + + fn new( + segment_id: usize, + meta: Arc, + _edge_meta: Arc, + _path: Option, + ext: Self::Extension, + ) -> Self { + let max_page_len = ext.config().max_node_page_len(); + let inner = RwLock::new(MemNodeSegment::new( + segment_id, + max_page_len, + meta, + ext.memory_tracker().clone(), + )); + let inner = Arc::new(inner); + + Self { + inner, + segment_id, + _ext: ext, + max_num_node: AtomicU32::new(0), + } + } + + fn segment_id(&self) -> usize { + self.segment_id + } + + fn is_dirty(&self) -> bool { + true + } + + #[inline(always)] + fn head_arc(&self) -> ArcRwLockReadGuard { + self.inner.read_arc_recursive() + } + + #[inline(always)] + fn head(&self) -> parking_lot::RwLockReadGuard<'_, MemNodeSegment> { + self.inner.read_recursive() + } + + #[inline(always)] + fn head_mut(&self) -> parking_lot::RwLockWriteGuard<'_, MemNodeSegment> { + loop_lock_write(&self.inner) + } + + #[inline(always)] + fn try_head_mut(&self) -> Option> { + self.inner.try_write() + } + + fn notify_write( + &self, + _head_lock: impl DerefMut, + ) -> Result<(), StorageError> { + Ok(()) + } + + fn set_dirty(&self, _dirty: bool) {} + + fn has_node(&self, _pos: LocalPOS, _layer_id: LayerId) -> bool { + false + } + + fn get_out_edge( + &self, + pos: LocalPOS, + dst: impl Into, + layer_id: LayerId, + locked_head: impl Deref, + ) -> Option { + locked_head.get_out_edge(pos, dst.into(), layer_id) + } + + fn get_inb_edge( + &self, + pos: LocalPOS, + src: impl Into, + layer_id: LayerId, + locked_head: impl Deref, + ) -> Option { + locked_head.get_inb_edge(pos, src.into(), layer_id) + } + + fn entry<'a>(&'a self, pos: impl Into) -> Self::Entry<'a> { + let pos = pos.into(); + MemNodeEntry::new(pos, self.head()) + } + + fn locked(&self) -> Self::ArcLockedSegment { + ArcLockedSegmentView::new(self.inner.read_arc(), self.num_nodes()) + } + + fn flush(&self) -> Result<(), StorageError> { + Ok(()) + } + + fn vacuum( + &self, + _locked_head: impl DerefMut, + ) -> Result<(), StorageError> { + Ok(()) + } + + fn immut_lsn(&self) -> LSN { + 0 + } + + fn nodes_counter(&self) -> &AtomicU32 { + &self.max_num_node + } + + fn num_layers(&self) -> usize { + self.head().layers.len() + } + + fn layer_count(&self, layer_id: LayerId) -> u32 { + self.head() + .get_layer(layer_id) + .map_or(0, |layer| layer.len()) + } +} + +#[cfg(test)] +mod test { + use crate::{ + LocalPOS, NodeSegmentView, + api::nodes::NodeSegmentOps, + pages::{layer_counter::GraphStats, node_page::writer::NodeWriter}, + persist::{ + config::BaseConfig, + strategy::{NoOpStrategy, PersistenceStrategy}, + }, + }; + use raphtory_api::core::entities::properties::{ + meta::{Meta, STATIC_GRAPH_LAYER_ID}, + prop::{Prop, PropType}, + }; + use raphtory_core::entities::{EID, ELID, VID}; + use std::sync::Arc; + use tempfile::tempdir; + + #[test] + fn est_size_changes() { + let node_meta = Arc::new(Meta::default()); + let edge_meta = Arc::new(Meta::default()); + let path = tempdir().unwrap(); + let config = BaseConfig::new(10, 10); + let ext = NoOpStrategy::new(config, None).unwrap(); + let segment_id = 0; + let segment = NodeSegmentView::new( + segment_id, + node_meta.clone(), + edge_meta, + Some(path.path().to_path_buf()), + ext.clone(), + ); + let stats = GraphStats::default(); + + let mut writer = NodeWriter::new(&segment, &stats, segment.head_mut()); + + let est_size1 = writer.mut_segment.est_size(); + assert_eq!(est_size1, 0); + + writer.add_outbound_edge( + Some(1), + LocalPOS(1), + VID(3), + EID(7).with_layer(STATIC_GRAPH_LAYER_ID), + ); + + let est_size2 = writer.mut_segment.est_size(); + assert!( + est_size2 > est_size1, + "Estimated size should be greater than 0 after adding an edge" + ); + + writer.add_inbound_edge( + Some(1), + LocalPOS(2), + VID(4), + EID(8).with_layer(STATIC_GRAPH_LAYER_ID), + ); + + let est_size3 = writer.mut_segment.est_size(); + assert!( + est_size3 > est_size2, + "Estimated size should increase after adding an inbound edge" + ); + + // no change when adding the same edge again + + writer.add_outbound_edge::( + None, + LocalPOS(1), + VID(3), + EID(7).with_layer(STATIC_GRAPH_LAYER_ID), + ); + let est_size4 = writer.mut_segment.est_size(); + assert_eq!( + est_size4, est_size3, + "Estimated size should not change when adding the same edge again" + ); + + // add constant properties + + let prop_id = node_meta + .metadata_mapper() + .get_or_create_and_validate("a", PropType::U64) + .unwrap() + .inner(); + + writer.update_c_props( + LocalPOS(1), + STATIC_GRAPH_LAYER_ID, + [(prop_id, Prop::U64(73))], + ); + + let est_size5 = writer.mut_segment.est_size(); + assert!( + est_size5 > est_size4, + "Estimated size should increase after adding constant properties" + ); + + writer.update_timestamp(17, LocalPOS(1), ELID::new(EID(0), STATIC_GRAPH_LAYER_ID)); + + let est_size6 = writer.mut_segment.est_size(); + assert!( + est_size6 > est_size5, + "Estimated size should increase after updating timestamp" + ); + + // add temporal properties + let prop_id = node_meta + .temporal_prop_mapper() + .get_or_create_and_validate("b", PropType::F64) + .unwrap() + .inner(); + + writer.add_props( + 42, + LocalPOS(1), + STATIC_GRAPH_LAYER_ID, + [(prop_id, Prop::F64(4.13))], + ); + + let est_size7 = writer.mut_segment.est_size(); + assert!( + est_size7 > est_size6, + "Estimated size should increase after adding temporal properties" + ); + + writer.add_props( + 72, + LocalPOS(1), + STATIC_GRAPH_LAYER_ID, + [(prop_id, Prop::F64(5.41))], + ); + let est_size8 = writer.mut_segment.est_size(); + assert!( + est_size8 > est_size7, + "Estimated size should increase after adding another temporal property" + ); + drop(writer); + + // after drop the global estimated size should be the same as the last estimated size of the writer + assert_eq!(ext.estimated_size(), est_size8); + + drop(segment); + + // after the segment is dropped, the global estimated size should be zero (no more usage) + assert_eq!(ext.estimated_size(), 0); + } +} diff --git a/db4-storage/src/state.rs b/db4-storage/src/state.rs new file mode 100644 index 0000000000..b12520b2d2 --- /dev/null +++ b/db4-storage/src/state.rs @@ -0,0 +1,892 @@ +use crate::pages::SegmentCounts; +use rayon::prelude::*; +use std::{ + borrow::Borrow, + marker::PhantomData, + ops::{Index, IndexMut}, + sync::Arc, +}; + +/// Index resolver for sharded storage with fixed-size chunks +/// +/// Given a sharding scheme where items are distributed across chunks: +/// - chunk_id = index / max_page_len +/// - local_pos = index % max_page_len +/// +/// This struct provides O(1) lookup to map any global index to a flat array position, +/// accounting for partially filled chunks. +/// +/// # Example +/// With max_page_len = 1000: +/// - Chunk 0: 1000 items (offsets[0] = 0, offsets[1] = 1000) +/// - Chunk 1: 500 items (offsets[1] = 1000, offsets[2] = 1500) +/// - Chunk 2: 1000 items (offsets[2] = 1500, offsets[3] = 2500) +/// +/// To resolve index 1200: +/// - chunk = 1200 / 1000 = 1 +/// - local_pos = 1200 % 1000 = 200 +/// - flat_index = offsets[1] + 200 = 1000 + 200 = 1200 +#[derive(Debug, Clone)] +pub struct StateIndex { + /// Cumulative offsets: offsets[chunk_id] = starting position in flat array for that chunk + /// Length is equal to number of chunks + 1 (includes final cumulative value) + offsets: Box<[usize]>, + /// Maximum items per chunk + max_page_len: u32, + /// Phantom data for index type + _marker: std::marker::PhantomData, +} + +impl From> for StateIndex +where + I: From + Into, +{ + fn from(counts: SegmentCounts) -> Self { + Self::new( + counts.counts().iter().map(|c| *c as usize), + counts.max_seg_len(), + ) + } +} + +impl + Into> StateIndex { + /// Create a new StateIndex with the given chunk configuration + /// + /// # Arguments + /// * `chunk_sizes` - The actual size of each chunk (can be <= max_page_len) + /// * `max_page_len` - Maximum capacity of each chunk + pub fn new(chunk_sizes: impl IntoIterator, max_page_len: u32) -> Self { + // Build cumulative offsets (includes final cumulative value) + let mut offsets = Vec::new(); + let mut cumulative = 0; + for size in chunk_sizes { + offsets.push(cumulative); + cumulative += size; + } + offsets.push(cumulative); // Add final cumulative value + + Self { + offsets: offsets.into_boxed_slice(), + max_page_len, + _marker: std::marker::PhantomData, + } + } + + /// Create a new StateIndex for mapping the union of both indexes + pub fn union(&self, other: &Self) -> Self { + let counts = self + .offsets + .iter() + .zip(other.offsets.iter()) + .map(|(l, r)| *l.max(r)); + let max_page_len = self.max_page_len.max(other.max_page_len); + Self::new(counts, max_page_len) + } + + /// Resolve a global index to a flat array index + /// + /// # Arguments + /// * `index` - Global index across all chunks + /// + /// # Returns + /// Some(flat_index) if the index is valid, None otherwise + #[inline(always)] + pub fn resolve(&self, index: I) -> Option { + let index: usize = index.into(); + let chunk = index / self.max_page_len as usize; + let local_pos = index % self.max_page_len as usize; + + let offset = *self.offsets.get(chunk)?; + let flat_index = offset + local_pos; + + // Verify the flat_index is within bounds of this chunk + let next_offset = *self.offsets.get(chunk + 1)?; + if flat_index < next_offset { + Some(flat_index) + } else { + None + } + } + + pub fn global_index(&self, index: usize) -> Option { + self.iter().nth(index) + } + + /// Resolve a global index to a flat array index without bounds checking + /// + /// # Arguments + /// * `index` - Global index across all chunks + /// + /// # Returns + /// The flat array index + /// + /// # Safety + /// Panics if the index is out of bounds + #[inline(always)] + pub fn resolve_unchecked(&self, index: I) -> usize { + let index: usize = index.into(); + let chunk = index / self.max_page_len as usize; + let local_pos = index % self.max_page_len as usize; + + let offset = self.offsets[chunk]; + offset + local_pos + } + + /// Get the number of chunks + #[inline] + pub fn num_chunks(&self) -> usize { + self.offsets.len().saturating_sub(1) + } + + /// Get the total number of items across all chunks + #[inline] + pub fn len(&self) -> usize { + self.offsets[self.num_chunks()] + } + + /// Check if there are no items + #[inline] + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Get the maximum page length + #[inline] + pub fn max_page_len(&self) -> u32 { + self.max_page_len + } + + /// Create an iterator over all valid global indices + /// + /// This iterates through all chunks and yields the global indices for each item. + /// For example, with chunk_sizes [10, 1, 5] and max_page_len 10: + /// - Chunk 0: yields 0..10 + /// - Chunk 1: yields 10..11 + /// - Chunk 2: yields 20..25 + pub fn iter(&self) -> StateIndexIter<&Self, I> { + StateIndexIter { + index: self, + current_chunk: 0, + current_local: 0, + _marker: PhantomData, + } + } + + /// Create a parallel iterator over all valid global indices with their flat indices + /// + /// This iterates through all chunks in parallel and yields tuples of (flat_index, global_index). + /// The flat_index starts at 0 and increments for each item in iteration order. + /// + /// For example, with chunk_sizes [10, 1, 5] and max_page_len 10: + /// - Chunk 0: yields (0, 0)..(9, 9) + /// - Chunk 1: yields (10, 10) + /// - Chunk 2: yields (11, 20)..(15, 24) + pub fn par_iter(&self) -> impl ParallelIterator + '_ + where + I: Send + Sync, + { + let max_page_len = self.max_page_len as usize; + let num_chunks = self.num_chunks(); + (0..num_chunks).into_par_iter().flat_map(move |chunk_idx| { + let chunk_start = self.offsets[chunk_idx]; + let chunk_end = self.offsets[chunk_idx + 1]; + let chunk_size = chunk_end - chunk_start; + let global_base = chunk_idx * max_page_len; + (0..chunk_size).into_par_iter().map(move |local_offset| { + let flat_idx = chunk_start + local_offset; + let global_idx = I::from(global_base + local_offset); + (flat_idx, global_idx) + }) + }) + } + + pub fn arc_into_iter(self: Arc) -> StateIndexIter, I> { + StateIndexIter { + index: self, + current_chunk: 0, + current_local: 0, + _marker: PhantomData, + } + } +} + +impl + Into> StateIndex { + /// Create a parallel iterator over all valid global indices with their flat indices + /// + /// This iterates through all chunks in parallel and yields tuples of (flat_index, global_index). + /// The flat_index starts at 0 and increments for each item in iteration order. + /// + /// For example, with chunk_sizes [10, 1, 5] and max_page_len 10: + /// - Chunk 0: yields (0, 0)..(9, 9) + /// - Chunk 1: yields (10, 10) + /// - Chunk 2: yields (11, 20)..(15, 24) + pub fn into_par_iter(self: Arc) -> impl ParallelIterator + where + I: Send + Sync, + { + let max_page_len = self.max_page_len as usize; + let num_chunks = self.num_chunks(); + (0..num_chunks).into_par_iter().flat_map(move |chunk_idx| { + let chunk_start = self.offsets[chunk_idx]; + let chunk_end = self.offsets[chunk_idx + 1]; + let chunk_size = chunk_end - chunk_start; + let global_base = chunk_idx * max_page_len; + (0..chunk_size).into_par_iter().map(move |local_offset| { + let flat_idx = chunk_start + local_offset; + let global_idx = I::from(global_base + local_offset); + (flat_idx, global_idx) + }) + }) + } +} + +/// Iterator over global indices in a StateIndex +#[derive(Debug, Clone)] +pub struct StateIndexIter { + index: I, + current_chunk: usize, + current_local: usize, + _marker: PhantomData, +} + +impl + Into, I: Borrow>> Iterator for StateIndexIter { + type Item = V; + + fn next(&mut self) -> Option { + let index = self.index.borrow(); + loop { + if self.current_chunk >= index.num_chunks() { + return None; + } + + let chunk_start = index.offsets[self.current_chunk]; + let chunk_end = index.offsets[self.current_chunk + 1]; + let chunk_size = chunk_end - chunk_start; + + if self.current_local < chunk_size { + let global_idx = + self.current_chunk * index.max_page_len as usize + self.current_local; + self.current_local += 1; + return Some(V::from(global_idx)); + } + + // Move to next chunk + self.current_chunk += 1; + self.current_local = 0; + } + } + + fn nth(&mut self, n: usize) -> Option { + let index = self.index.borrow(); + // fast skip + if self.current_chunk >= index.num_chunks() { + return None; + } + let current = index.offsets[self.current_chunk] + self.current_local; + let target = current.saturating_add(n); + if &target >= index.offsets.last()? { + // need to set this so that future calls actually return None! + self.current_chunk = index.num_chunks(); + return None; + } + // find the first offset > target, then substract 1 to get the last chunk starting at <= target + let skip_chunks = index.offsets[self.current_chunk..] + .partition_point(|&offset| offset <= target) + .saturating_sub(1); + self.current_chunk += skip_chunks; + self.current_local = target - index.offsets[self.current_chunk]; + let global_idx = self.current_chunk * index.max_page_len as usize + self.current_local; + self.current_local += 1; + Some(V::from(global_idx)) + } + + fn size_hint(&self) -> (usize, Option) { + let index = self.index.borrow(); + let total = index.len(); + let consumed = if self.current_chunk < index.num_chunks() { + index.offsets[self.current_chunk] + self.current_local + } else { + total + }; + let remaining = total.saturating_sub(consumed); + (remaining, Some(remaining)) + } +} + +impl + Into, I: Borrow>> ExactSizeIterator + for StateIndexIter +{ +} + +impl + Into> IntoIterator for StateIndex { + type Item = V; + type IntoIter = StateIndexIter; + + fn into_iter(self) -> Self::IntoIter { + StateIndexIter { + index: self, + current_chunk: 0, + current_local: 0, + _marker: PhantomData, + } + } +} + +/// Address resolver for sharded storage with fixed-size chunks +/// +/// This struct combines a StateIndex with a flat array to provide O(1) access +/// to elements in a sharded storage scheme with partially filled chunks. +#[derive(Debug)] +pub struct State { + /// Index resolver + index: StateIndex, + /// Flat array of state cells + state: Box<[A]>, +} + +impl + Into> State { + /// Create a new State with the given chunk configuration + /// + /// # Arguments + /// * `chunk_sizes` - The actual size of each chunk (can be <= max_page_len) + /// * `max_page_len` - Maximum capacity of each chunk + /// + /// # Example + /// ``` + /// use db4_storage::state::State; + /// use std::sync::atomic::AtomicUsize; + /// + /// // 3 chunks with sizes 1000, 500, 1000 and max capacity 1000 + /// let state: State = State::new(vec![1000, 500, 1000], 1000); + /// ``` + pub fn new(chunk_sizes: Vec, max_page_len: u32) -> Self { + let index = StateIndex::::new(chunk_sizes, max_page_len); + let total_size = index.len(); + + // Initialize state array with default values + let state: Box<[A]> = (0..total_size) + .map(|_| A::default()) + .collect::>() + .into_boxed_slice(); + + Self { index, state } + } + + /// Get a reference to the StateIndex + #[inline] + pub fn index(&self) -> &StateIndex { + &self.index + } + + /// Get a reference to the cell for the given global index + /// + /// # Arguments + /// * `index` - Global index across all chunks + /// + /// # Returns + /// Some(&A) if the index is valid, None otherwise + #[inline(always)] + pub fn get(&self, index: I) -> Option<&A> { + let flat_index = self.index.resolve(index)?; + self.state.get(flat_index) + } + + /// Get a mutable reference to the cell for the given global index + /// + /// # Arguments + /// * `index` - Global index across all chunks + /// + /// # Returns + /// Some(&mut A) if the index is valid, None otherwise + #[inline(always)] + pub fn get_mut(&mut self, index: I) -> Option<&mut A> { + let flat_index = self.index.resolve(index)?; + self.state.get_mut(flat_index) + } + + /// Get a reference to the cell for the given global index without bounds checking + /// + /// # Arguments + /// * `index` - Global index across all chunks + /// + /// # Returns + /// Reference to the corresponding cell + /// + /// # Safety + /// Panics if the index is out of bounds + #[inline(always)] + pub fn get_unchecked(&self, index: I) -> &A { + let flat_index = self.index.resolve_unchecked(index); + &self.state[flat_index] + } + + /// Get a mutable reference to the cell for the given global index without bounds checking + /// + /// # Arguments + /// * `index` - Global index across all chunks + /// + /// # Returns + /// Mutable reference to the corresponding cell + /// + /// # Safety + /// Panics if the index is out of bounds + #[inline(always)] + pub fn get_mut_unchecked(&mut self, index: I) -> &mut A { + let flat_index = self.index.resolve_unchecked(index); + &mut self.state[flat_index] + } + + /// Get the number of chunks + #[inline] + pub fn num_chunks(&self) -> usize { + self.index.num_chunks() + } + + /// Get the total number of state cells + #[inline] + pub fn len(&self) -> usize { + self.state.len() + } + + /// Check if the state is empty + #[inline] + pub fn is_empty(&self) -> bool { + self.state.is_empty() + } + + /// Get the maximum page length + #[inline] + pub fn max_page_len(&self) -> u32 { + self.index.max_page_len() + } + + /// Create an iterator over all elements in the state + /// + /// Yields references to each element in order of their global indices. + pub fn iter(&self) -> StateIter<'_, A, I> { + StateIter { + state: self, + inner: self.index.iter(), + } + } +} + +/// Iterator over elements in a State +#[derive(Debug)] +pub struct StateIter<'a, A, I> { + state: &'a State, + inner: StateIndexIter<&'a StateIndex, I>, +} + +impl<'a, A: Default, I: From + Into> Iterator for StateIter<'a, A, I> { + type Item = &'a A; + + fn next(&mut self) -> Option { + let global_idx = self.inner.next()?; + Some(self.state.get_unchecked(global_idx)) + } + + fn size_hint(&self) -> (usize, Option) { + self.inner.size_hint() + } +} + +impl<'a, A: Default, I: From + Into> ExactSizeIterator for StateIter<'a, A, I> { + fn len(&self) -> usize { + self.inner.len() + } +} + +impl + Into + std::fmt::Debug + Copy> Index for State { + type Output = A; + + #[inline(always)] + fn index(&self, index: I) -> &Self::Output { + self.get(index) + .unwrap_or_else(|| panic!("index out of bounds: {:?}", index)) + } +} + +impl + Into + std::fmt::Debug + Copy> IndexMut + for State +{ + #[inline(always)] + fn index_mut(&mut self, index: I) -> &mut Self::Output { + self.get_mut(index) + .unwrap_or_else(|| panic!("index out of bounds: {:?}", index)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::sync::atomic::{AtomicUsize, Ordering}; + + #[test] + fn test_state_index_resolve() { + let index: StateIndex = StateIndex::new(vec![1000, 500, 1000], 1000); + + assert_eq!(index.num_chunks(), 3); + assert_eq!(index.len(), 2500); + assert_eq!(index.max_page_len(), 1000); + + // Test chunk 0 + assert_eq!(index.resolve(0), Some(0)); + assert_eq!(index.resolve(999), Some(999)); + + // Test chunk 1 + assert_eq!(index.resolve(1000), Some(1000)); + assert_eq!(index.resolve(1499), Some(1499)); + + // Test chunk 2 + assert_eq!(index.resolve(2000), Some(1500)); + assert_eq!(index.resolve(2999), Some(2499)); + + // Test out of bounds + assert_eq!(index.resolve(3000), None); + assert_eq!(index.resolve(1500), None); // In chunk 1 but beyond its actual size + } + + #[test] + fn test_basic_get() { + let state: State = State::new(vec![1000, 500, 1000], 1000); + + // Test chunk 0 + state.get_unchecked(0).store(42, Ordering::Relaxed); + assert_eq!(state.get_unchecked(0).load(Ordering::Relaxed), 42); + + state.get_unchecked(999).store(123, Ordering::Relaxed); + assert_eq!(state.get_unchecked(999).load(Ordering::Relaxed), 123); + + // Test chunk 1 (offset should be 1000) + state.get_unchecked(1000).store(77, Ordering::Relaxed); + assert_eq!(state.get_unchecked(1000).load(Ordering::Relaxed), 77); + + state.get_unchecked(1499).store(88, Ordering::Relaxed); + assert_eq!(state.get_unchecked(1499).load(Ordering::Relaxed), 88); + + // Test chunk 2 (offset should be 1500) + state.get_unchecked(2000).store(99, Ordering::Relaxed); + assert_eq!(state.get_unchecked(2000).load(Ordering::Relaxed), 99); + + state.get_unchecked(2999).store(111, Ordering::Relaxed); + assert_eq!(state.get_unchecked(2999).load(Ordering::Relaxed), 111); + } + + #[test] + fn test_get_option() { + let state: State = State::new(vec![100, 50], 100); + + assert!(state.get(0).is_some()); + assert!(state.get(99).is_some()); + assert!(state.get(100).is_some()); + assert!(state.get(149).is_some()); + + // Out of bounds chunk + assert!(state.get(200).is_none()); + assert!(state.get(1000).is_none()); + + // In bounds chunk but beyond chunk's actual size + assert!(state.get(150).is_none()); + } + + #[test] + #[should_panic] + fn test_out_of_bounds_chunk() { + let state: State = State::new(vec![100], 100); + state.get_unchecked(200); // Should panic + } + + #[test] + fn test_partially_filled_chunks() { + // Simulate real scenario: chunks with varying fill levels + let state: State = State::new(vec![1000, 300, 1000, 50], 1000); + + // First chunk - fully filled + state.get_unchecked(0).store(1, Ordering::Relaxed); + state.get_unchecked(999).store(2, Ordering::Relaxed); + assert_eq!(state.get_unchecked(0).load(Ordering::Relaxed), 1); + assert_eq!(state.get_unchecked(999).load(Ordering::Relaxed), 2); + + // Second chunk - partially filled (300 items) + // Global indices: 1000-1299 + state.get_unchecked(1000).store(3, Ordering::Relaxed); + state.get_unchecked(1299).store(4, Ordering::Relaxed); + assert_eq!(state.get_unchecked(1000).load(Ordering::Relaxed), 3); + assert_eq!(state.get_unchecked(1299).load(Ordering::Relaxed), 4); + + // Third chunk - fully filled + // Global indices: 2000-2999 + state.get_unchecked(2000).store(5, Ordering::Relaxed); + state.get_unchecked(2999).store(6, Ordering::Relaxed); + assert_eq!(state.get_unchecked(2000).load(Ordering::Relaxed), 5); + assert_eq!(state.get_unchecked(2999).load(Ordering::Relaxed), 6); + + // Fourth chunk - minimally filled (50 items) + // Global indices: 3000-3049 + state.get_unchecked(3000).store(7, Ordering::Relaxed); + state.get_unchecked(3049).store(8, Ordering::Relaxed); + assert_eq!(state.get_unchecked(3000).load(Ordering::Relaxed), 7); + assert_eq!(state.get_unchecked(3049).load(Ordering::Relaxed), 8); + + assert_eq!(state.len(), 2350); // 1000 + 300 + 1000 + 50 + assert_eq!(state.num_chunks(), 4); + } + + #[test] + fn test_resolve_pos_consistency() { + // Test that our addressing matches the resolve_pos function + let max_page_len = 1000u32; + let state: State = State::new(vec![1000, 500, 1000], max_page_len); + + // Helper to simulate resolve_pos + let resolve_pos = |i: usize| -> (usize, u32) { + let chunk = i / max_page_len as usize; + let pos = (i % max_page_len as usize) as u32; + (chunk, pos) + }; + + for index in [0, 500, 999, 1000, 1250, 1499, 2000, 2500, 2999] { + let (chunk, local_pos) = resolve_pos(index); + + // Verify our addressing scheme matches + let computed_chunk = index / max_page_len as usize; + let computed_local = index % max_page_len as usize; + + assert_eq!(chunk, computed_chunk); + assert_eq!(local_pos, computed_local as u32); + + // Verify we can access the cell + state.get_unchecked(index).store(index, Ordering::Relaxed); + assert_eq!(state.get_unchecked(index).load(Ordering::Relaxed), index); + } + } + + #[test] + fn test_generic_over_different_types() { + // Test with usize + let state_usize: State = State::new(vec![10, 5], 10); + assert_eq!(*state_usize.get_unchecked(0), 0); + assert_eq!(*state_usize.get_unchecked(10), 0); + + // Test with Option + let state_option: State> = State::new(vec![10, 5], 10); + assert_eq!(*state_option.get_unchecked(0), None); + assert_eq!(*state_option.get_unchecked(10), None); + + // Test with AtomicUsize + let state_atomic: State = State::new(vec![10, 5], 10); + state_atomic.get_unchecked(0).store(42, Ordering::Relaxed); + assert_eq!(state_atomic.get_unchecked(0).load(Ordering::Relaxed), 42); + } + + #[test] + fn test_mutable_access() { + let mut state: State = State::new(vec![100, 50], 100); + + // Test get_mut + *state.get_mut(0).unwrap() = 42; + assert_eq!(*state.get(0).unwrap(), 42); + + *state.get_mut(50).unwrap() = 99; + assert_eq!(*state.get(50).unwrap(), 99); + + // Test get_mut in second chunk + *state.get_mut(100).unwrap() = 123; + assert_eq!(*state.get(100).unwrap(), 123); + + // Test get_mut_unchecked + *state.get_mut_unchecked(10) = 77; + assert_eq!(*state.get_unchecked(10), 77); + + // Test out of bounds returns None + assert!(state.get_mut(200).is_none()); + } + + #[test] + fn test_index_trait() { + let mut state: State = State::new(vec![100, 50], 100); + + // Test Index trait + state[0] = 42; + assert_eq!(state[0], 42); + + state[99] = 100; + assert_eq!(state[99], 100); + + // Test in second chunk + state[100] = 200; + assert_eq!(state[100], 200); + + state[149] = 300; + assert_eq!(state[149], 300); + } + + #[test] + #[should_panic(expected = "index out of bounds")] + fn test_index_out_of_bounds() { + let state: State = State::new(vec![100], 100); + let _ = state[200]; + } + + #[test] + fn test_offsets_include_final_cumulative() { + let state: State = State::new(vec![1000, 500, 1000], 1000); + + // offsets should be [0, 1000, 1500, 2500] + assert_eq!(state.num_chunks(), 3); + assert_eq!(state.len(), 2500); + + // Verify via StateIndex API + assert_eq!(state.index().len(), state.len()); + } + + #[test] + fn test_state_index_can_be_used_independently() { + // StateIndex can be used independently of State + let index: StateIndex = StateIndex::new(vec![1000, 500, 1000], 1000); + + // Create your own array + let mut data = vec![0usize; index.len()]; + + // Use the index to access elements + if let Some(flat_idx) = index.resolve(1200) { + data[flat_idx] = 42; + } + + if let Some(flat_idx) = index.resolve(1200) { + assert_eq!(data[flat_idx], 42); + } + } + + #[test] + fn test_state_index_iter() { + let index: StateIndex = StateIndex::new(vec![10, 1, 5], 10); + + let global_indices: Vec = index.iter().collect(); + + // Chunk 0: global indices 0-9 (10 items) + // Chunk 1: global index 10 (1 item) + // Chunk 2: global indices 20-24 (5 items) + let expected = vec![ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, // Chunk 0 + 10, // Chunk 1 + 20, 21, 22, 23, 24, // Chunk 2 + ]; + + assert_eq!(global_indices, expected); + assert_eq!(index.iter().len(), 16); + } + + #[test] + fn test_state_index_par_iter() { + let index: StateIndex = StateIndex::new(vec![10, 1, 5], 10); + + let mut results: Vec<(usize, usize)> = index.par_iter().collect(); + results.sort_by_key(|(flat_idx, _)| *flat_idx); // Sort by flat index + + // Expected: (flat_idx, global_idx) tuples + // Chunk 0: flat indices 0-9, global indices 0-9 + // Chunk 1: flat index 10, global index 10 + // Chunk 2: flat indices 11-15, global indices 20-24 + let expected = vec![ + (0, 0), + (1, 1), + (2, 2), + (3, 3), + (4, 4), + (5, 5), + (6, 6), + (7, 7), + (8, 8), + (9, 9), // Chunk 0 + (10, 10), // Chunk 1 + (11, 20), + (12, 21), + (13, 22), + (14, 23), + (15, 24), // Chunk 2 + ]; + + assert_eq!(results, expected); + + // Verify count matches + assert_eq!(index.par_iter().count(), 16); + + // Verify flat indices are sequential + let flat_indices: Vec = results.iter().map(|(flat_idx, _)| *flat_idx).collect(); + assert_eq!(flat_indices, (0..16).collect::>()); + } + + #[test] + fn test_state_iter() { + let mut state: State = State::new(vec![10, 1, 5], 10); + + // Collect global indices first to avoid borrow checker issues + let global_indices: Vec = state.index().iter().collect(); + + // Initialize state with global indices + for global_idx in global_indices { + state[global_idx] = global_idx * 10; + } + + // Collect values via iter + let values: Vec = state.iter().copied().collect(); + + let expected = vec![ + 0, 10, 20, 30, 40, 50, 60, 70, 80, 90, // Chunk 0 + 100, // Chunk 1 + 200, 210, 220, 230, 240, // Chunk 2 + ]; + + assert_eq!(values, expected); + assert_eq!(state.iter().len(), 16); + } + + #[test] + fn test_state_iter_with_atomics() { + let state: State = State::new(vec![10, 5], 10); + + // Collect global indices first to avoid borrow checker issues + let global_indices: Vec = state.index().iter().collect(); + + // Set values via global indices + for global_idx in global_indices { + state + .get_unchecked(global_idx) + .store(global_idx, Ordering::Relaxed); + } + + // Read via iterator + let values: Vec = state.iter().map(|a| a.load(Ordering::Relaxed)).collect(); + + let expected = vec![ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, // Chunk 0 + 10, 11, 12, 13, 14, // Chunk 1 + ]; + + assert_eq!(values, expected); + } + + #[test] + fn test_iter_skip() { + let index: StateIndex = StateIndex::new(vec![10, 1, 5], 10); + + let expected: Vec<_> = (0..10).chain(10..11).chain(20..25).collect(); + // check all skips + for (i, v) in expected.iter().copied().enumerate() { + assert_eq!(index.iter().nth(i), Some(v)); + } + + assert_eq!(index.iter().nth(100), None); + + // check that iterator is correctly exhausted after calling nth + let mut iter = index.iter(); + assert_eq!(iter.nth(16), None); + assert!(iter.next().is_none()); + + let mut iter = index.iter(); + assert_eq!(iter.nth(15), Some(expected[15])); + assert!(iter.next().is_none()); + } +} diff --git a/db4-storage/src/transaction/mod.rs b/db4-storage/src/transaction/mod.rs new file mode 100644 index 0000000000..439e5b00de --- /dev/null +++ b/db4-storage/src/transaction/mod.rs @@ -0,0 +1,40 @@ +use std::sync::atomic::{self, AtomicU64}; + +use crate::wal::TransactionID; + +#[derive(Debug)] +pub struct TransactionManager { + last_transaction_id: AtomicU64, +} + +impl TransactionManager { + const STARTING_TRANSACTION_ID: TransactionID = 1; + + pub fn new() -> Self { + Self { + last_transaction_id: AtomicU64::new(Self::STARTING_TRANSACTION_ID), + } + } + + /// Restores the last used transaction ID to the specified value. + /// Intended for using during recovery. + pub fn restore_transaction_id(&self, last_transaction_id: TransactionID) { + self.last_transaction_id + .store(last_transaction_id, atomic::Ordering::SeqCst) + } + + pub fn begin_transaction(&self) -> TransactionID { + self.last_transaction_id + .fetch_add(1, atomic::Ordering::SeqCst) + } + + pub fn end_transaction(&self, _transaction_id: TransactionID) { + // No-op for now. + } +} + +impl Default for TransactionManager { + fn default() -> Self { + Self::new() + } +} diff --git a/db4-storage/src/utils.rs b/db4-storage/src/utils.rs new file mode 100644 index 0000000000..9a28e2d86e --- /dev/null +++ b/db4-storage/src/utils.rs @@ -0,0 +1,52 @@ +use iter_enum::{ + DoubleEndedIterator, ExactSizeIterator, FusedIterator, IndexedParallelIterator, Iterator, + ParallelIterator, +}; + +#[derive( + Clone, + Debug, + Iterator, + DoubleEndedIterator, + ExactSizeIterator, + ParallelIterator, + IndexedParallelIterator, + FusedIterator, +)] +pub enum Iter2 { + I1(I1), + I2(I2), +} + +#[derive( + Copy, + Clone, + Iterator, + ExactSizeIterator, + DoubleEndedIterator, + ParallelIterator, + IndexedParallelIterator, + FusedIterator, +)] +pub enum Iter3 { + I(I), + J(J), + K(K), +} + +#[derive( + Copy, + Clone, + Iterator, + ExactSizeIterator, + DoubleEndedIterator, + ParallelIterator, + IndexedParallelIterator, + FusedIterator, +)] +pub enum Iter4 { + I(I), + J(J), + K(K), + L(L), +} diff --git a/db4-storage/src/wal/entry.rs b/db4-storage/src/wal/entry.rs new file mode 100644 index 0000000000..002723af7d --- /dev/null +++ b/db4-storage/src/wal/entry.rs @@ -0,0 +1,133 @@ +use raphtory_api::core::entities::{GidRef, LayerId, properties::prop::Prop}; +use raphtory_core::{ + entities::{EID, VID}, + storage::timeindex::EventTime, +}; + +use crate::{ + error::StorageError, + wal::{GraphReplay, GraphWalOps, LSN, TransactionID, no_wal::NoWal}, +}; + +impl GraphWalOps for NoWal { + type ReplayEntry = (); + + fn log_add_edge( + &self, + _transaction_id: TransactionID, + _t: EventTime, + _src_name: Option>, + _src_id: VID, + _dst_name: Option>, + _dst_id: VID, + _eid: EID, + _layer_name: Option<&str>, + _layer_id: LayerId, + _props: Vec<(&str, usize, Prop)>, + ) -> Result { + Ok(0) + } + + fn log_add_edge_metadata( + &self, + _transaction_id: TransactionID, + _eid: EID, + _layer_id: LayerId, + _props: Vec<(&str, usize, Prop)>, + ) -> Result { + Ok(0) + } + + fn log_delete_edge( + &self, + _transaction_id: TransactionID, + _t: EventTime, + _src_name: Option>, + _src_id: VID, + _dst_name: Option>, + _dst_id: VID, + _eid: EID, + _layer_name: Option<&str>, + _layer_id: LayerId, + ) -> Result { + Ok(0) + } + + fn log_add_node( + &self, + _transaction_id: TransactionID, + _t: EventTime, + _node_name: Option>, + _node_id: VID, + _node_type_and_id: Option<(&str, usize)>, + _props: Vec<(&str, usize, Prop)>, + _layer_name: Option<&str>, + _layer_id: LayerId, + ) -> Result { + Ok(0) + } + + fn log_add_node_metadata( + &self, + _transaction_id: TransactionID, + _vid: VID, + _props: Vec<(&str, usize, Prop)>, + ) -> Result { + Ok(0) + } + + fn log_set_node_type( + &self, + _transaction_id: TransactionID, + _vid: VID, + _node_type: &str, + _node_type_id: usize, + ) -> Result { + Ok(0) + } + + fn log_add_graph_props( + &self, + _transaction_id: TransactionID, + _t: EventTime, + _props: Vec<(&str, usize, Prop)>, + ) -> Result { + Ok(0) + } + + fn log_add_graph_metadata( + &self, + _transaction_id: TransactionID, + _props: Vec<(&str, usize, Prop)>, + ) -> Result { + Ok(0) + } + + fn log_checkpoint(&self, _redo: LSN) -> Result { + Ok(0) + } + + fn log_shutdown_checkpoint(&self) -> Result { + Ok(0) + } + + fn read_checkpoint(&self, _lsn: LSN) -> Result { + Err(StorageError::GenericFailure( + "read_checkpoint is not supported for NoWAL".to_string(), + )) + } + + fn read_shutdown_checkpoint(&self, _lsn: LSN) -> Result { + Err(StorageError::GenericFailure( + "read_shutdown_checkpoint is not supported for NoWAL".to_string(), + )) + } + + fn replay_to_graph( + &self, + _graph: &mut G, + _start: LSN, + ) -> Result { + panic!("NoWAL does not support replay") + } +} diff --git a/db4-storage/src/wal/mod.rs b/db4-storage/src/wal/mod.rs new file mode 100644 index 0000000000..e933504121 --- /dev/null +++ b/db4-storage/src/wal/mod.rs @@ -0,0 +1,256 @@ +use crate::error::StorageError; +use raphtory_api::core::entities::{GidRef, LayerId, properties::prop::Prop}; +use raphtory_core::{ + entities::{EID, GID, VID}, + storage::timeindex::EventTime, +}; + +pub mod entry; +pub mod no_wal; + +pub type LSN = u64; +pub type TransactionID = u64; + +/// Core Wal methods. +pub trait WalOps { + /// Appends data to the WAL and returns the assigned LSN. + fn append(&self, data: &[u8]) -> Result; + + /// Flushes in-memory WAL entries up to the given LSN to disk. + /// Returns immediately if the given LSN is already flushed to disk. + fn flush(&self, lsn: LSN) -> Result<(), StorageError>; + + /// Reads the WAL record at the given LSN. + /// Returns `Ok(None)` if there is no record at that LSN. + fn read(&self, lsn: LSN) -> Result, StorageError>; + + /// Returns an iterator over the entries in the wal, starting from the given LSN. + fn replay(&self, start: LSN) -> impl Iterator>; + + /// Returns the current position in the WAL stream. + fn position(&self) -> LSN; + + /// Sets the position in the WAL stream. + fn set_position(&self, lsn: LSN) -> Result<(), StorageError>; +} + +#[derive(Debug)] +pub struct ReplayRecord { + lsn: LSN, + + data: Vec, + + /// LSN immediately after this record in the WAL stream. + next_lsn: LSN, +} + +impl ReplayRecord { + pub fn new(lsn: LSN, data: Vec, next_lsn: LSN) -> Self { + Self { + lsn, + data, + next_lsn, + } + } + + pub fn lsn(&self) -> LSN { + self.lsn + } + + /// Returns the LSN immediately following this record in the WAL stream. + pub fn next_lsn(&self) -> LSN { + self.next_lsn + } + + pub fn data(&self) -> &[u8] { + &self.data + } +} + +// Raphtory-specific logging & replay methods. +pub trait GraphWalOps { + /// ReplayEntry represents the type of the wal entry returned during replay. + type ReplayEntry; + + fn log_add_edge( + &self, + transaction_id: TransactionID, + t: EventTime, + src_name: Option>, + src_id: VID, + dst_name: Option>, + dst_id: VID, + eid: EID, + layer_name: Option<&str>, + layer_id: LayerId, + props: Vec<(&str, usize, Prop)>, + ) -> Result; + + fn log_add_edge_metadata( + &self, + transaction_id: TransactionID, + eid: EID, + layer_id: LayerId, + props: Vec<(&str, usize, Prop)>, + ) -> Result; + + fn log_delete_edge( + &self, + transaction_id: TransactionID, + t: EventTime, + src_name: Option>, + src_id: VID, + dst_name: Option>, + dst_id: VID, + eid: EID, + layer_name: Option<&str>, + layer_id: LayerId, + ) -> Result; + + fn log_add_node( + &self, + transaction_id: TransactionID, + t: EventTime, + node_name: Option>, + node_id: VID, + node_type_and_id: Option<(&str, usize)>, + props: Vec<(&str, usize, Prop)>, + layer_name: Option<&str>, + layer_id: LayerId, + ) -> Result; + + fn log_add_node_metadata( + &self, + transaction_id: TransactionID, + vid: VID, + props: Vec<(&str, usize, Prop)>, + ) -> Result; + + fn log_set_node_type( + &self, + transaction_id: TransactionID, + vid: VID, + node_type: &str, + node_type_id: usize, + ) -> Result; + + fn log_add_graph_props( + &self, + transaction_id: TransactionID, + t: EventTime, + props: Vec<(&str, usize, Prop)>, + ) -> Result; + + fn log_add_graph_metadata( + &self, + transaction_id: TransactionID, + props: Vec<(&str, usize, Prop)>, + ) -> Result; + + /// Logs a checkpoint indicating that all LSN < `redo` are persisted. + /// On recovery, replay will start from `redo` in the WAL stream. + fn log_checkpoint(&self, redo: LSN) -> Result; + + /// Logs a shutdown checkpoint indicating a clean shutdown with all writes persisted. + fn log_shutdown_checkpoint(&self) -> Result; + + /// Reads and decodes the WAL entry at the given LSN and validates that it is a checkpoint. + /// Returns the checkpoint redo LSN, denoting where replay should start from. + fn read_checkpoint(&self, lsn: LSN) -> Result; + + /// Reads and decodes the WAL entry at the given LSN and validates that it is a shutdown checkpoint. + /// Returns the LSN immediately after this record, marking the end of the WAL stream. + fn read_shutdown_checkpoint(&self, lsn: LSN) -> Result; + + /// Replays and applies all the entries in the wal to the given graph, starting from the given LSN. + /// Returns the LSN immediately after the last entry in the WAL stream on success. + fn replay_to_graph( + &self, + graph: &mut G, + start: LSN, + ) -> Result; +} + +/// Trait for defining callbacks for replaying from wal. +pub trait GraphReplay { + fn replay_add_edge( + &mut self, + lsn: LSN, + transaction_id: TransactionID, + t: EventTime, + src_name: Option, + src_id: VID, + dst_name: Option, + dst_id: VID, + eid: EID, + layer_name: Option, + layer_id: LayerId, + props: Vec<(String, usize, Prop)>, + ) -> Result<(), StorageError>; + + fn replay_add_edge_metadata( + &mut self, + lsn: LSN, + transaction_id: TransactionID, + eid: EID, + layer_id: LayerId, + props: Vec<(String, usize, Prop)>, + ) -> Result<(), StorageError>; + + fn replay_delete_edge( + &mut self, + lsn: LSN, + transaction_id: TransactionID, + t: EventTime, + src_name: Option, + src_id: VID, + dst_name: Option, + dst_id: VID, + eid: EID, + layer_name: Option, + layer_id: LayerId, + ) -> Result<(), StorageError>; + + fn replay_add_node( + &mut self, + lsn: LSN, + transaction_id: TransactionID, + t: EventTime, + node_name: Option, + node_id: VID, + node_type_and_id: Option<(String, usize)>, + props: Vec<(String, usize, Prop)>, + ) -> Result<(), StorageError>; + + fn replay_add_node_metadata( + &mut self, + lsn: LSN, + transaction_id: TransactionID, + vid: VID, + props: Vec<(String, usize, Prop)>, + ) -> Result<(), StorageError>; + + fn replay_set_node_type( + &mut self, + lsn: LSN, + transaction_id: TransactionID, + vid: VID, + node_type: String, + node_type_id: usize, + ) -> Result<(), StorageError>; + + fn replay_add_graph_props( + &mut self, + lsn: LSN, + transaction_id: TransactionID, + t: EventTime, + props: Vec<(String, usize, Prop)>, + ) -> Result<(), StorageError>; + + fn replay_add_graph_metadata( + &mut self, + lsn: LSN, + transaction_id: TransactionID, + props: Vec<(String, usize, Prop)>, + ) -> Result<(), StorageError>; +} diff --git a/db4-storage/src/wal/no_wal.rs b/db4-storage/src/wal/no_wal.rs new file mode 100644 index 0000000000..7f563080ca --- /dev/null +++ b/db4-storage/src/wal/no_wal.rs @@ -0,0 +1,40 @@ +use crate::{ + error::StorageError, + wal::{LSN, ReplayRecord, WalOps}, +}; + +/// `NoWAL` is a no-op WAL implementation that discards all writes. +/// Used for in-memory only graphs. +#[derive(Debug, Copy, Clone)] +pub struct NoWal; + +impl WalOps for NoWal { + fn append(&self, _data: &[u8]) -> Result { + Ok(0) + } + + fn flush(&self, _lsn: LSN) -> Result<(), StorageError> { + Ok(()) + } + + fn replay(&self, _start: LSN) -> impl Iterator> { + let error = "Recovery is not supported for NoWAL"; + std::iter::once(Err(StorageError::GenericFailure(error.to_string()))) + } + + fn read(&self, _lsn: LSN) -> Result, StorageError> { + Err(StorageError::GenericFailure( + "read is not supported for NoWAL".to_string(), + )) + } + + fn position(&self) -> LSN { + 0 + } + + fn set_position(&self, _lsn: LSN) -> Result<(), StorageError> { + Err(StorageError::GenericFailure( + "set_position is not supported for NoWAL".to_string(), + )) + } +} diff --git a/docs/reference/graphql/graphql_API.md b/docs/reference/graphql/graphql_API.md index c7d231b158..50823287f0 100644 --- a/docs/reference/graphql/graphql_API.md +++ b/docs/reference/graphql/graphql_API.md @@ -9,6 +9,12 @@ hide: ## Query (QueryRoot) +Top-level READ-only query root. Entry points for loading a graph +(`graph`, `graphMetadata`), browsing stored graphs (`namespaces`, +`namespace`, `root`), downloading a stored graph as a base64 blob +(`receiveGraph`), inspecting vectorised variants (`vectorisedGraph`), +and a few utility endpoints (`version`, `hello`, `plugins`). + @@ -24,95 +30,161 @@ hide: - + - + + + + + + + + + + + + + + + + - + - + - + - + - + - + @@ -121,25 +193,29 @@ Returns:: List of namespaces on root - + @@ -148,7 +224,9 @@ Returns:: Root namespace @@ -157,21 +235,30 @@ Returns a plugin. - + - +
String! -Hello world demo +Liveness check — returns a static "hello world" string. Useful for +smoke-testing that the GraphQL server is reachable.
graphGraph!Graph -Returns a graph +Load a graph by path. Returns null if the graph doesn't exist or is +inaccessible. When a READ-scoped filter is attached to the caller's +permissions, that filter is applied before the graph is returned. +`graphType` lets you re-interpret the stored graph at query time — +e.g. read an event-stored graph through persistent semantics. Defaults +to the type the graph was created with. +Requires READ on the graph.
path String! + +Graph path relative to the root namespace (e.g. `"master"` or `"team/project/graph"`). + +
graphTypeGraphType + +Optional override for graph semantics — `EVENT` treats every update as a point-in-time event, `PERSISTENT` carries values forward until overwritten or deleted. Defaults to the stored graph's native type. + +
graphMetadataMetaGraph + +Returns lightweight metadata for a graph (node/edge counts, +timestamps) without deserialising the full graph. Returns null if the +graph doesn't exist or is inaccessible. +Requires READ on the graph, or INTROSPECT on its parent namespace. + +
pathString! + +Graph path relative to the root namespace. + +
updateGraph MutableGraph! -Update graph query, has side effects to update graph state - -Returns:: GqlMutableGraph +Open a graph for writing — returns a `MutableGraph` handle that can +add nodes/edges/properties/metadata. +Requires WRITE on the graph.
path String! + +Graph path relative to the root namespace. + +
vectoriseGraph Boolean! -Update graph query, has side effects to update graph state - -Returns:: GqlMutableGraph +Compute and persist embeddings for the nodes and edges of a stored +graph so it can be queried via `vectorisedGraph`. +Requires WRITE access.
path String! + +Graph path relative to the root namespace. + +
model EmbeddingModel + +Optional embedding model; defaults to OpenAI's standard model. + +
nodes Template + +Optional node-document template (which fields go into each node's text representation); defaults to the built-in template. + +
edges Template + +Optional edge-document template; defaults to the built-in template. + +
vectorisedGraph VectorisedGraph -Create vectorised graph in the format used for queries - -Returns:: GqlVectorisedGraph +Open a previously-vectorised graph for similarity queries. Returns null +if the graph has no embeddings (call `vectoriseGraph` first) or is +inaccessible. +Requires READ on the graph.
path String! + +Graph path relative to the root namespace. + +
namespaces CollectionOfNamespace! -Returns all namespaces using recursive search - -Returns:: List of namespaces on root +Recursively list every namespace under the root. Each namespace is +filtered against the caller's permissions: only namespaces with at +least DISCOVER are returned.
Namespace! -Returns a specific namespace at a given path - -Returns:: Namespace or error if no namespace found +Return a specific namespace by path. Errors if no namespace exists at +that path. +Requires INTROSPECT on the namespace to browse its contents.
path String! + +Namespace path relative to the root namespace (e.g. `"team/project"`). + +
root Namespace! -Returns root namespace - -Returns:: Root namespace +Returns the root namespace. Use it as the entry point for browsing +namespaces and graphs — child listings filter against the caller's +permissions.
QueryPlugin! -Returns a plugin. +Entry point for READ-only plugins registered with the server (e.g. graph +algorithms exposed as queries). Available plugins are defined at server +startup via the plugin registry.
String! -Encodes graph and returns as string - -Returns:: Base64 url safe encoded string +Encode a stored graph as a base64 string for client-side download. If +a READ-scoped filter is attached to the caller's permissions, only the +materialised filtered view is encoded. +Requires READ on the graph.
path String! + +Graph path relative to the root namespace. + +
version String! + +Version string of the running `raphtory-graphql` server build. + +
@@ -201,28 +288,39 @@ Returns a collection of mutation plugins.
Boolean! -Delete graph from a path on the server. +Permanently delete a stored graph from the server. +Requires WRITE on the graph and on its parent namespace. path String! - + + +Graph path relative to the root namespace. + + newGraph Boolean! -Creates a new graph. +Create a new empty graph at the given path. Errors if a graph already +exists there. +Requires WRITE on the parent namespace. path String! - + + +Destination path relative to the root namespace. + + graphType @@ -234,154 +332,238 @@ Creates a new graph. Boolean! -Move graph from a path path on the server to a new_path on the server. - -If namespace is not provided, it will be set to the current working directory. -This applies to both the graph namespace and new graph namespace. +Move a stored graph to a new path on the server (rename / relocate). +Atomic: copies first, then deletes the source. +Requires WRITE on the source graph and on both the source and +destination namespaces. path String! - + + +Current graph path relative to the root namespace. + + newPath String! - + + +Destination path relative to the root namespace. + + + + +overwrite +Boolean + + +If true, allow replacing an existing graph at `newPath`; defaults to false. + + copyGraph Boolean! -Copy graph from a path path on the server to a new_path on the server. - -If namespace is not provided, it will be set to the current working directory. -This applies to both the graph namespace and new graph namespace. +Duplicate a stored graph to a new path on the server. Source is +preserved. +Requires READ on the source graph and WRITE on the destination namespace. path String! - + + +Source graph path relative to the root namespace. + + newPath String! - + + +Destination path relative to the root namespace. + + + + +overwrite +Boolean + + +If true, allow replacing an existing graph at `newPath`; defaults to false. + + uploadGraph String! -Upload a graph file from a path on the client using GQL multipart uploading. - -Returns:: -name of the new graph +Stream-upload a graph file using GraphQL multipart upload. The client +sends the file directly; the server stores it under `path`. +Requires WRITE on the destination namespace. path String! - + + +Destination path relative to the root namespace. + + graph Upload! - + + +Multipart upload of the serialised graph file. + + overwrite Boolean! - + + +If true, replace any graph already at `path`. + + sendGraph String! -Send graph bincode as base64 encoded string. - -Returns:: -path of the new graph +Send a serialised graph as a base64-encoded string in the request +body. Use for smaller graphs where multipart upload is overkill. +Requires WRITE on the destination namespace. path String! - + + +Destination path relative to the root namespace. + + graph String! - + + +Base64-encoded bincode of the serialised graph. + + overwrite Boolean! - + + +If true, replace any graph already at `path`. + + createSubgraph String! -Returns a subgraph given a set of nodes from an existing graph in the server. - -Returns:: -name of the new graph +Persist a subgraph of an existing stored graph as a new graph. The +subgraph contains only the listed nodes and edges between them. +Requires READ on the parent graph and WRITE on the destination namespace. parentPath String! - + + +Source graph path relative to the root namespace. + + nodes -[String!]! - +[NodeId!]! + + +Node ids to include in the subgraph. + + newPath String! - + + +Destination path relative to the root namespace. + + overwrite Boolean! - + + +If true, replace any graph already at `newPath`. + + createIndex Boolean! -(Experimental) Creates search index. +(Experimental) Build a Tantivy search index for a stored graph so it +can be queried via `searchNodes` / `searchEdges`. +Requires WRITE on the graph. path String! - + + +Graph path relative to the root namespace. + + indexSpec IndexSpecInput - + + +Optional spec selecting which node/edge property fields to index. Omit to index a default set. + + inRam Boolean! - + + +If true, build the index in memory (faster but lost on restart). If false, persist to disk. + + @@ -426,17 +608,29 @@ will be returned. limit Int! - + + +Maximum number of items to return on this page. + + offset Int - + + +Extra items to skip on top of `pageIndex` paging (default 0). + + pageIndex Int - + + +Zero-based page number; multiplies `limit` to determine where to start (default 0). + + count @@ -488,17 +682,29 @@ will be returned. limit Int! - + + +Maximum number of items to return on this page. + + offset Int - + + +Extra items to skip on top of `pageIndex` paging (default 0). + + pageIndex Int - + + +Zero-based page number; multiplies `limit` to determine where to start (default 0). + + count @@ -550,17 +756,29 @@ will be returned. limit Int! - + + +Maximum number of items to return on this page. + + offset Int - + + +Extra items to skip on top of `pageIndex` paging (default 0). + + pageIndex Int - + + +Zero-based page number; multiplies `limit` to determine where to start (default 0). + + count @@ -660,7 +878,11 @@ Errors if any of the layers do not exist. names [String!]! - + + +Layer names to include. + + excludeLayers @@ -676,7 +898,11 @@ Errors if any of the layers do not exist. names [String!]! - + + +Layer names to exclude. + + layer @@ -692,9 +918,13 @@ Errors if any of the layers do not exist. name String! - - - + + +Layer name to include. + + + + excludeLayer Edge! @@ -708,7 +938,11 @@ Errors if any of the layers do not exist. name String! - + + +Layer name to exclude. + + rolling @@ -730,17 +964,29 @@ the start of the first window and/or after the end of the last window (i.e. not window WindowDuration! - + + +Width of each window. Pass either `{epoch: }` for a discrete number of milliseconds (e.g. `{epoch: 1000}` for 1 second), or `{duration: }` for a calendar duration (e.g. `{duration: 1 day}` or `{duration: 2 hours and 30 minutes}`). + + step WindowDuration - + + +Optional gap between the start of one window and the start of the next. Accepts the same `{epoch: }` or `{duration: }` values as `window`. Defaults to `window` — i.e. windows touch end-to-end with no overlap and no gap. + + alignmentUnit AlignmentUnit - + + +Optional anchor for window boundaries — pass `Unaligned` to disable, or one of the unit values (e.g. `Day`, `Hour`, `Minute`) to align edges to that calendar unit. Defaults to the smallest unit present in `step` (or `window` if no step is set). + + expanding @@ -760,12 +1006,20 @@ e.g. "1 month and 1 day" will align at the start of the day. step WindowDuration! - + + +How much the window grows by on each step. Pass either `{epoch: }` for a discrete number of milliseconds, or `{duration: }` for a calendar duration (e.g. `{duration: 1 day}`). + + alignmentUnit AlignmentUnit - + + +Optional anchor for window boundaries — pass `Unaligned` to disable, or one of the unit values (e.g. `Day`, `Hour`, `Minute`) to align edges to that calendar unit. Defaults to the smallest unit present in `step`. + + window @@ -781,12 +1035,20 @@ For persistent graphs, any edge which exists at any point during the window will start TimeInput! - + + +Inclusive lower bound. + + end TimeInput! - + + +Exclusive upper bound. + + at @@ -800,14 +1062,21 @@ Creates a view of the Edge including all events at a specified time. time TimeInput! - + + +Instant to pin the view to. + + latest Edge! -Returns a view of the edge at the latest time of the graph. +View of this edge pinned to the graph's latest time — equivalent to +`at(graph.latestTime)`. The edge's properties and metadata show their +most recent values, and (for persistent graphs) validity is evaluated +at that instant. @@ -825,7 +1094,11 @@ This is equivalent to before(time + 1) for Graph and at(time) for PersistentGrap time TimeInput! - + + +Instant at which entities must be valid. + + snapshotLatest @@ -850,7 +1123,11 @@ Creates a view of the Edge including all events before a specified end (exclusiv time TimeInput! - + + +Exclusive upper bound. + + after @@ -864,7 +1141,11 @@ Creates a view of the Edge including all events after a specified start (exclusi time TimeInput! - + + +Exclusive lower bound. + + shrinkWindow @@ -878,12 +1159,20 @@ Shrinks both the start and end of the window. start TimeInput! - + + +Proposed new start (TimeInput); ignored if it would widen the window. + + end TimeInput! - + + +Proposed new end (TimeInput); ignored if it would widen the window. + + shrinkStart @@ -897,7 +1186,11 @@ Set the start of the window. start TimeInput! - + + +Proposed new start (TimeInput); ignored if it would widen the window. + + shrinkEnd @@ -911,7 +1204,11 @@ Set the end of the window. end TimeInput! - + + +Proposed new end (TimeInput); ignored if it would widen the window. + + applyViews @@ -925,7 +1222,11 @@ Takes a specified selection of views and applies them in given order. views [EdgeViewCollection!]! - + + +Ordered list of view operations; each entry is a one-of variant (`window`, `layer`, `filter`, ...) applied to the running result. + + earliestTime @@ -939,7 +1240,14 @@ Returns the earliest time of an edge. firstUpdate EventTime! - + + +The timestamp of the first event in this edge's history (first update, first +deletion, or anything in between). Differs from `earliestTime` in that +`earliestTime` reports when the edge is first *valid*; `firstUpdate` reports +when its history actually begins. + + latestTime @@ -953,7 +1261,14 @@ Returns the latest time of an edge. lastUpdate EventTime! - + + +The timestamp of the last event in this edge's history (last update, last +deletion, or anything in between). Differs from `latestTime` in that +`latestTime` reports when the edge is last *valid*; `lastUpdate` reports +when its history actually ends. + + time @@ -1020,13 +1335,12 @@ Node: id -[String!]! +[NodeId!]! -Returns the id of the edge. - -Returns: -list[str]: +Returns the `[src, dst]` id pair of the edge. Each id is a `String` +for string-indexed graphs or a non-negative `Int` for integer-indexed +graphs. @@ -1157,18 +1471,31 @@ Returns: boolean filter Edge! - + + +Apply an edge filter in place, returning an edge view whose properties / +metadata / history are restricted to the matching subset. + + expr EdgeFilter! - + + +Composite edge filter (by property, layer, src/dst, etc.). + + ### EdgeSchema +Describes edges between a specific pair of node types — the property and +metadata keys seen on such edges, along with their observed value types. +One `EdgeSchema` per `(srcType, dstType)` pair per layer. + @@ -1220,6 +1547,10 @@ Returns the list of metadata schemas for edges connecting these types of nodes ### EdgeWindowSet +A lazy sequence of per-window views of a single edge, produced by +`edge.rolling` / `edge.expanding`. Each entry is the edge as it exists in +that window. +
@@ -1233,7 +1564,12 @@ Returns the list of metadata schemas for edges connecting these types of nodes - + @@ -1251,28 +1587,49 @@ will be returned. - + - + - + - +
count Int! + +Number of windows in this set. Materialising all windows is expensive for +large graphs — prefer `page` over `list` when iterating. + +
page
limit Int! + +Maximum number of items to return on this page. + +
offset Int + +Extra items to skip on top of `pageIndex` paging (default 0). + +
pageIndex Int + +Zero-based page number; multiplies `limit` to determine where to start (default 0). + +
list [Edge!]! + +Materialise every window as a list. Rejected by the server when bulk list +endpoints are disabled; use `page` for paginated access instead. + +
### Edges +A lazy collection of edges from a graph view. Supports the usual view +transforms (window, layer, filter, ...), plus edge-specific ones like +`explode` and `explodeLayers`, pagination, and sorting. + @@ -1304,7 +1661,11 @@ Returns a collection containing only edges belonging to the listed layers. - + @@ -1318,7 +1679,11 @@ Returns a collection containing edges belonging to all layers except the exclude - + @@ -1332,7 +1697,11 @@ Returns a collection containing edges belonging to the specified layer. - + @@ -1346,7 +1715,11 @@ Returns a collection containing edges belonging to all layers except the exclude - + @@ -1368,17 +1741,29 @@ the start of the first window and/or after the end of the last window (i.e. not - + - + - + @@ -1398,12 +1783,20 @@ e.g. "1 month and 1 day" will align at the start of the day. - + - + @@ -1417,12 +1810,20 @@ Creates a view of the Edge including all events between the specified start (inc - + - + @@ -1436,12 +1837,20 @@ Creates a view of the Edge including all events at a specified time. - + - + @@ -1455,7 +1864,11 @@ Creates a view of the Edge including all events that are valid at time. This is - + @@ -1478,7 +1891,11 @@ Creates a view of the Edge including all events before a specified end (exclusiv - + @@ -1492,7 +1909,11 @@ Creates a view of the Edge including all events after a specified start (exclusi - + @@ -1506,12 +1927,20 @@ Shrinks both the start and end of the window. - + - + @@ -1525,7 +1954,11 @@ Set the start of the window. - + @@ -1539,7 +1972,11 @@ Set the end of the window. - + @@ -1553,14 +1990,20 @@ Takes a specified selection of views and applies them in order given. - + @@ -1580,14 +2023,19 @@ Each new edge object contains only updates from the respective layers. - + @@ -1635,17 +2083,29 @@ will be returned. - - + + - + - + @@ -1661,34 +2121,77 @@ Returns a list of all objects in the current selection of the collection. You sh - + - +
names [String!]! + +Layer names to include. + +
excludeLayers
names [String!]! + +Layer names to exclude. + +
layer
name String! + +Layer name to include. + +
excludeLayer
name String! + +Layer name to exclude. + +
rolling
window WindowDuration! + +Width of each window. Pass either `{epoch: }` for a discrete number of milliseconds (e.g. `{epoch: 1000}` for 1 second), or `{duration: }` for a calendar duration (e.g. `{duration: 1 day}` or `{duration: 2 hours and 30 minutes}`). + +
step WindowDuration + +Optional gap between the start of one window and the start of the next. Accepts the same `{epoch: }` or `{duration: }` values as `window`. Defaults to `window` — i.e. windows touch end-to-end with no overlap and no gap. + +
alignmentUnit AlignmentUnit + +Optional anchor for window boundaries — pass `Unaligned` to disable, or one of the unit values (e.g. `Day`, `Hour`, `Minute`) to align edges to that calendar unit. Defaults to the smallest unit present in `step` (or `window` if no step is set). + +
expanding
step WindowDuration! + +How much the window grows by on each step. Pass either `{epoch: }` for a discrete number of milliseconds, or `{duration: }` for a calendar duration (e.g. `{duration: 1 day}`). + +
alignmentUnit AlignmentUnit + +Optional anchor for window boundaries — pass `Unaligned` to disable, or one of the unit values (e.g. `Day`, `Hour`, `Minute`) to align edges to that calendar unit. Defaults to the smallest unit present in `step`. + +
window
start TimeInput! + +Inclusive lower bound. + +
end TimeInput! + +Exclusive upper bound. + +
at
time TimeInput! + +Instant to pin the view to. + +
latest Edges! + +View showing only the latest state of each edge (equivalent to `at(latestTime)`). + +
snapshotAt
time TimeInput! + +Instant at which entities must be valid. + +
snapshotLatest
time TimeInput! + +Exclusive upper bound. + +
after
time TimeInput! + +Exclusive lower bound. + +
shrinkWindow
start TimeInput! + +Proposed new start (TimeInput); ignored if it would widen the window. + +
end TimeInput! + +Proposed new end (TimeInput); ignored if it would widen the window. + +
shrinkStart
start TimeInput! + +Proposed new start (TimeInput); ignored if it would widen the window. + +
shrinkEnd
end TimeInput! + +Proposed new end (TimeInput); ignored if it would widen the window. + +
applyViews
views [EdgesViewCollection!]! + +Ordered list of view operations; each entry is a one-of variant (`window`, `layer`, `filter`, ...) applied to the running result. + +
explode Edges! -Returns an edge object for each update within the original edge. +Expand each edge into one edge per update: if `A->B` has three updates, it +becomes three `A->B` entries each at a distinct timestamp. Use this to +iterate per-event rather than per-edge.
Edges! -Specify a sort order from: source, destination, property, time. You can also reverse the ordering. +Sort the edges. Multiple criteria are applied lexicographically (ties +on the first key break to the second, etc.).
sortBys [EdgeSortBy!]! + +Ordered list of sort keys. Each entry chooses exactly one of `src` / `dst` / `time` / `property`, with an optional `reverse: true` to flip order. + +
start
limit Int!
+ +Maximum number of items to return on this page. + +
offset Int + +Extra items to skip on top of `pageIndex` paging (default 0). + +
pageIndex Int + +Zero-based page number; multiplies `limit` to determine where to start (default 0). + +
listEdges! -Returns a filtered view that applies to list down the chain +Narrow the collection to edges matching `expr`. The filter sticks to the +returned view — every subsequent traversal through these edges (their +properties, their endpoints' neighbours, etc.) continues to see the +filtered scope. + +Useful when you want one scoping rule to apply across the whole query. +E.g. restricting everything to a specific week: + +```text +edges { filter(expr: {window: {start: 1234, end: 5678}}) { +list { src { neighbours { list { name } } } } # neighbours still windowed +} } +``` + +Contrast with `select`, which applies here and is not carried through.
expr EdgeFilter! + +Composite edge filter (by property, layer, src/dst, etc.). + +
select Edges! -Returns filtered list of edges +Narrow the collection to edges matching `expr`, but only at this step — +subsequent traversals out of these edges see the unfiltered graph again. + +Useful when you want different scopes at different hops. E.g. Monday's +edges, then the neighbours of their endpoints on Tuesday, then *those* +neighbours on Wednesday: + +```text +edges { select(expr: {window: {...monday...}}) { +list { src { select(expr: {window: {...tuesday...}}) { +neighbours { select(expr: {window: {...wednesday...}}) { +neighbours { list { name } } +} } +} } } +} } +``` + +Contrast with `filter`, which persists the scope through subsequent ops.
expr EdgeFilter! + +Composite edge filter (by property, layer, src/dst, etc.). + +
### EdgesWindowSet +A lazy sequence of per-window edge collections, produced by +`edges.rolling` / `edges.expanding`. Each entry is an `Edges` collection +as it exists in that window. + @@ -1702,7 +2205,12 @@ Returns filtered list of edges - + @@ -1720,22 +2228,39 @@ will be returned. - + - + - + - +
count Int! + +Number of windows in this set. Materialising all windows is expensive for +large graphs — prefer `page` over `list` when iterating. + +
page
limit Int! + +Maximum number of items to return on this page. + +
offset Int + +Extra items to skip on top of `pageIndex` paging (default 0). + +
pageIndex Int + +Zero-based page number; multiplies `limit` to determine where to start (default 0). + +
list [Edges!]! + +Materialise every window as a list. Rejected by the server when bulk list +endpoints are disabled; use `page` for paginated access instead. + +
@@ -1797,13 +2322,21 @@ Raises an error if a time conversion fails. formatString String - + + +Optional format string for the rendered datetime. Uses `%`-style specifiers — for example `%Y-%m-%d` for `2024-01-15`, `%Y-%m-%d %H:%M:%S` for `2024-01-15 10:30:00`, or `%H:%M` for `10:30`. Defaults to RFC 3339 (e.g. `2024-01-15T10:30:45.123+00:00`) when omitted. + + ### Graph +A view of a Raphtory graph. Every field here returns either data from the +view or a derived view (`window`, `layer`, `at`, `filter`, ...) that you can +keep chaining. Views are cheap — they don't copy the underlying data. + @@ -1820,6 +2353,9 @@ Raises an error if a time conversion fails. @@ -1828,7 +2364,10 @@ Returns the names of all layers in the graphview. @@ -1837,77 +2376,109 @@ Returns a view containing only the default layer. - + - + - + - + - - + + @@ -1916,28 +2487,36 @@ Returns a view of the graph that only includes valid edges. - + - - + + @@ -1959,17 +2538,29 @@ the start of the first window and/or after the end of the last window (i.e. not - + - + - + @@ -1989,12 +2580,20 @@ e.g. "1 month and 1 day" will align at the start of the day. - + - + @@ -2008,12 +2607,20 @@ Return a graph containing only the activity between start and end, by default ra - + - + @@ -2027,7 +2634,11 @@ Creates a view including all events at a specified time. - + @@ -2050,7 +2661,11 @@ Create a view including all events that are valid at the specified time. - + @@ -2073,7 +2688,11 @@ Create a view including all events before a specified end (exclusive). - + @@ -2087,26 +2706,39 @@ Create a view including all events after a specified start (exclusive). - + - + - + @@ -2120,7 +2752,11 @@ Set the start of the window to the larger of the specified value or current star - + @@ -2134,14 +2770,20 @@ Set the end of the window to the smaller of the specified value or current end. - + @@ -2204,28 +2846,39 @@ Returns the end time of the window. Errors if there is no window. - + - + @@ -2264,99 +2917,138 @@ Optionally takes a list of node ids to return a subset. - - + + - - + + - - + + - + - - + + - + - - + + - - + + - + @@ -2415,61 +3107,109 @@ Returns the graph schema. - + - + - - - - + + + + - + - + - + - + - + - + - + @@ -2485,67 +3225,96 @@ Export all nodes and edges from this graph view to another existing graph - + - + - + - + - + - + - +
Returns the names of all layers in the graphview. +Distinct layer names observed in the current view — any layer that has at +least one edge event visible here. Excludes layers that exist elsewhere in +the graph but whose edges have been filtered out.
Graph! -Returns a view containing only the default layer. +View restricted to the default layer — where nodes and edges end up +when `addNode` / `addEdge` is called without a `layer` argument. +Useful for separating "unlayered" base-graph events from named-layer +ones.
Graph! -Returns a view containing all the specified layers. +View restricted to the named layers. Updates on any other layer are hidden; +if that leaves a node or edge with no updates left, it disappears from the +view.
names [String!]! + +Layer names to include. + +
excludeLayers Graph! -Returns a view containing all layers except the specified excluded layers. +View with the named layers hidden. Updates on those layers are removed; if +that leaves a node or edge with no updates left, it disappears from the +view.
names [String!]! + +Layer names to exclude. + +
layer Graph! -Returns a view containing the layer specified. +View restricted to a single layer. Convenience form of +`layers(names: [name])` — updates on any other layer are hidden, and +entities with nothing left disappear.
name String! + +Layer name to include. + +
excludeLayer Graph! -Returns a view containing all layers except the specified excluded layer. +View with one layer hidden. Convenience form of +`excludeLayers(names: [name])` — updates on that layer are removed, and +entities with nothing left disappear.
name String! + +Layer name to exclude. + +
subgraph Graph! -Returns a subgraph of a specified set of nodes which contains only the edges that connect nodes of the subgraph to each other. +View restricted to a chosen set of nodes and the edges between them. Edges +connecting a selected node to a non-selected node are hidden.
nodes[String!]![NodeId!]! + +Node ids to keep. + +
valid Graph! -Returns a view of the graph that only includes valid edges. +View containing only valid edges — for persistent graphs this drops edges +whose most recent event is a deletion at the latest time of the current +view (a later re-addition would keep them). On event graphs this is a +no-op.
Graph! -Returns a subgraph filtered by the specified node types. +View restricted to nodes with the given node types.
nodeTypes [String!]! + +Node types to include. + +
excludeNodes Graph! -Returns a subgraph containing all nodes except the specified excluded nodes. +View with a set of nodes removed (along with any edges touching them).
nodes[String!]![NodeId!]! + +Node ids to exclude. + +
rolling
window WindowDuration! + +Width of each window. Pass either `{epoch: }` for a discrete number of milliseconds (e.g. `{epoch: 1000}` for 1 second), or `{duration: }` for a calendar duration (e.g. `{duration: 1 day}` or `{duration: 2 hours and 30 minutes}`). + +
step WindowDuration + +Optional gap between the start of one window and the start of the next. Accepts the same `{epoch: }` or `{duration: }` values as `window`. Defaults to `window` — i.e. windows touch end-to-end with no overlap and no gap. + +
alignmentUnit AlignmentUnit + +Optional anchor for window boundaries — pass `Unaligned` to disable, or one of the unit values (e.g. `Day`, `Hour`, `Minute`) to align edges to that calendar unit. Defaults to the smallest unit present in `step` (or `window` if no step is set). + +
expanding
step WindowDuration! + +How much the window grows by on each step. Pass either `{epoch: }` for a discrete number of milliseconds, or `{duration: }` for a calendar duration (e.g. `{duration: 1 day}`). + +
alignmentUnit AlignmentUnit + +Optional anchor for window boundaries — pass `Unaligned` to disable, or one of the unit values (e.g. `Day`, `Hour`, `Minute`) to align edges to that calendar unit. Defaults to the smallest unit present in `step`. + +
window
start TimeInput! + +Inclusive lower bound. + +
end TimeInput! + +Exclusive upper bound. + +
at
time TimeInput! + +Instant to pin the view to. + +
latest
time TimeInput! + +Instant at which entities must be valid. + +
snapshotLatest
time TimeInput! + +Exclusive upper bound. + +
after
time TimeInput! + +Exclusive lower bound. + +
shrinkWindow Graph! -Shrink both the start and end of the window. +Shrink both the start and end of the window. The new bounds are taken as the +intersection with the current window; this never widens the view.
start TimeInput! + +Proposed new start (TimeInput); ignored if before the current start. + +
end TimeInput! + +Proposed new end (TimeInput); ignored if after the current end. + +
shrinkStart
start TimeInput! + +Proposed new start (TimeInput); has no effect if it would widen the window. + +
shrinkEnd
end TimeInput! + +Proposed new end (TimeInput); has no effect if it would widen the window. + +
created Int! -Returns the timestamp for the creation of the graph. +Filesystem creation timestamp (epoch millis) of the graph's on-disk folder +— i.e. when this graph was first saved to the server, not when its earliest +event occurred. Use `earliestTime` for the latter.
EventTime! -Returns the earliest time that any edge in this graph is valid. +The earliest time at which any edge in this graph is valid. + +* `includeNegative` — if false, edge events with a timestamp `< 0` are +skipped when computing the minimum. Defaults to true.
includeNegative Boolean + +If false, edge events with a timestamp `< 0` are skipped when computing the minimum. Defaults to true. + +
latestEdgeTime EventTime! -Returns the latest time that any edge in this graph is valid. +The latest time at which any edge in this graph is valid.
includeNegative Boolean + +If false, edge events with a timestamp `< 0` are skipped when computing the maximum. Defaults to true. + +
countEdgesBoolean! -Returns true if the graph contains the specified node. +Returns true if a node with the given id exists in this view.
nameString!NodeId! + +Node id to look up. + +
hasEdge Boolean! -Returns true if the graph contains the specified edge. Edges are specified by providing a source and destination node id. You can restrict the search to a specified layer. +Returns true if an edge exists between `src` and `dst` in this view, optionally +restricted to a single layer.
srcString!NodeId! + +Source node id. + +
dstString!NodeId! + +Destination node id. + +
layer String + +Optional; if provided, only checks whether the edge exists on this layer. If null or omitted, any layer counts. + +
node Node -Gets the node with the specified id. +Look up a single node by id. Returns null if the node doesn't exist in this +view.
nameString!NodeId! + +Node id. + +
nodes Nodes! -Gets (optionally a subset of) the nodes in the graph. +All nodes in this view, optionally narrowed by a filter.
select NodeFilter + +Optional node filter (by name, property, type, etc.). If omitted, every node in the view is returned. + +
edge Edge -Gets the edge with the specified source and destination nodes. +Look up a single edge by its endpoint ids. Returns null if no edge exists +between `src` and `dst` in this view.
srcString!NodeId! + +Source node id. + +
dstString!NodeId! + +Destination node id. + +
edges Edges! -Gets the edges in the graph. +All edges in this view, optionally narrowed by a filter.
select EdgeFilter + +Optional edge filter (by property, layer, src/dst, etc.). If omitted, every edge in the view is returned. + +
properties
algorithms GraphAlgorithmPlugin! + +Access registered graph algorithms (PageRank, shortest path, etc.) for this +graph view. The set of available algorithms is defined by the plugin registry +loaded at server startup. + +
sharedNeighbours [Node!]! + +Nodes that are neighbours of every node in `selectedNodes`. Returns the +intersection of each selected node's neighbour set (undirected). + +
selectedNodes[String!]!
[NodeId!]! + +Node ids whose common neighbours you want. Returns an empty list if `selectedNodes` is empty or any id does not exist. + +
exportTo Boolean! -Export all nodes and edges from this graph view to another existing graph +Copy all nodes and edges of the current graph view into another already- +existing graph stored on the server. The destination graph is preserved +— this only adds; it does not replace.
path String! + +Destination graph path relative to the root namespace. + +
filter Graph! + +Returns a filtered view of the graph. Applies a mixed node/edge filter +expression and narrows nodes, edges, and their properties to what matches. + +
expr GraphFilter + +Optional composite filter combining node, edge, property, and metadata conditions. If omitted, applies the identity filter (equivalent to no filtering). + +
filterNodes Graph! + +Returns a graph view restricted to nodes that match the given filter; edges +are kept only if both endpoints survive. + +
expr NodeFilter! + +Composite node filter (by name, property, type, etc.). + +
filterEdges Graph! + +Returns a graph view restricted to edges that match the given filter. Nodes +remain in the view even if all their edges are filtered out. + +
expr EdgeFilter! + +Composite edge filter (by property, layer, src/dst, etc.). + +
getIndexSpec[Node!]! -(Experimental) Searches for nodes which match the given filter expression. - -Uses Tantivy's exact search. +(Experimental) Searches for nodes which match the given filter +expression. Uses Tantivy's exact search; requires the graph to have +been indexed.
filter NodeFilter! + +Composite node filter (by name, property, type, etc.). + +
limit Int! + +Maximum number of nodes to return. + +
offset Int! + +Number of matches to skip before returning results. + +
searchEdges [Edge!]! -(Experimental) Searches the index for edges which match the given filter expression. - -Uses Tantivy's exact search. +(Experimental) Searches the index for edges which match the given +filter expression. Uses Tantivy's exact search; requires the graph to +have been indexed.
filter EdgeFilter! + +Composite edge filter (by property, layer, src/dst, etc.). + +
limit Int! + +Maximum number of edges to return. + +
offset Int! + +Number of matches to skip before returning results. + +
applyViews Graph! -Returns the specified graph view or if none is specified returns the default view. -This allows you to specify multiple operations together. +Apply a list of view operations in the given order and return the +resulting graph view. Lets callers compose multiple view transforms +(window, layer, filter, snapshot, ...) in a single call.
views [GraphViewCollection!]! + +Ordered list of view operations; each entry is a one-of variant applied to the running result. + +
@@ -2632,6 +3401,11 @@ This allows you to specify multiple operations together. ### GraphWindowSet +A lazy sequence of graph snapshots produced by `rolling` or `expanding`. +Each entry is a `Graph` at a different window over time. Iterate via +`list` / `page` (or count with `count`). Subsequent view ops apply +per-window. + @@ -2647,7 +3421,8 @@ This allows you to specify multiple operations together. @@ -2667,22 +3442,39 @@ will be returned. - + - + - + - +
Int! -Returns the number of items. +Number of windows in this set. Materialising all windows is expensive for +large graphs — prefer `page` over `list` when iterating.
limit Int! + +Maximum number of items to return on this page. + +
offset Int + +Extra items to skip on top of `pageIndex` paging (default 0). + +
pageIndex Int + +Zero-based page number; multiplies `limit` to determine where to start (default 0). + +
list [Graph!]! + +Materialise every window as a list. Rejected by the server when bulk list +endpoints are disabled; use `page` for paginated access instead. + +
@@ -2754,17 +3546,29 @@ will be returned. limit Int! - + + +Maximum number of items to return on this page. + + offset Int - + + +Extra items to skip on top of `pageIndex` paging (default 0). + + pageIndex Int - + + +Zero-based page number; multiplies `limit` to determine where to start (default 0). + + pageRev @@ -2782,17 +3586,29 @@ will be returned. limit Int! - + + +Maximum number of items to return on this page. + + offset Int - + + +Extra items to skip on top of `pageIndex` paging (default 0). + + pageIndex Int - + + +Zero-based page number; multiplies `limit` to determine where to start (default 0). + + isEmpty @@ -2837,7 +3653,11 @@ Refer to chrono::format::strftime for formatting specifiers and escape sequences formatString String - + + +Optional format string for the rendered datetime. Uses `%`-style specifiers — for example `%Y-%m-%d` for `2024-01-15`, `%Y-%m-%d %H:%M:%S` for `2024-01-15 10:30:00`, or `%H:%M` for `10:30`. Defaults to RFC 3339 (e.g. `2024-01-15T10:30:45.123+00:00`) when omitted. + + eventId @@ -2854,7 +3674,10 @@ They are used for ordering within the same timestamp. Intervals! -Returns an Intervals object which calculates the intervals between consecutive EventTime timestamps. +Inter-event gap analysis for this history. The returned `Intervals` +object exposes each gap (in milliseconds) between consecutive events, +plus summary statistics — `min` / `max` / `mean` / `median` — and +paginated access via `list` / `listRev` / `page` / `pageRev`. @@ -2889,7 +3712,11 @@ will be raised on time conversion error. Defaults to False. filterBroken Boolean - + + +If true, ignore unconvertible timestamps; if false, raise an error on the first conversion failure. Defaults to false. + + listRev @@ -2905,7 +3732,11 @@ will be raised on time conversion error. Defaults to False. filterBroken Boolean - + + +If true, ignore unconvertible timestamps; if false, raise an error on the first conversion failure. Defaults to false. + + page @@ -2925,22 +3756,38 @@ will be returned. limit Int! - + + +Maximum number of items to return on this page. + + offset Int - + + +Extra items to skip on top of `pageIndex` paging (default 0). + + pageIndex Int - + + +Zero-based page number; multiplies `limit` to determine where to start (default 0). + + filterBroken Boolean - + + +If true, skip timestamps whose conversion fails; if false, raise an error on the first conversion failure. Defaults to false. + + pageRev @@ -2960,22 +3807,38 @@ will be returned. limit Int! - + + +Maximum number of items to return on this page. + + offset Int - + + +Extra items to skip on top of `pageIndex` paging (default 0). + + pageIndex Int - + + +Zero-based page number; multiplies `limit` to determine where to start (default 0). + + filterBroken Boolean - + + +If true, skip timestamps whose conversion fails; if false, raise an error on the first conversion failure. Defaults to false. + + @@ -3028,17 +3891,29 @@ will be returned. limit Int! - + + +Maximum number of items to return on this page. + + offset Int - + + +Extra items to skip on top of `pageIndex` paging (default 0). + + pageIndex Int - + + +Zero-based page number; multiplies `limit` to determine where to start (default 0). + + pageRev @@ -3056,17 +3931,29 @@ will be returned. limit Int! - + + +Maximum number of items to return on this page. + + offset Int - + + +Extra items to skip on top of `pageIndex` paging (default 0). + + pageIndex Int - + + +Zero-based page number; multiplies `limit` to determine where to start (default 0). + + @@ -3119,17 +4006,29 @@ will be returned. limit Int! - + + +Maximum number of items to return on this page. + + offset Int - + + +Extra items to skip on top of `pageIndex` paging (default 0). + + pageIndex Int - + + +Zero-based page number; multiplies `limit` to determine where to start (default 0). + + pageRev @@ -3147,17 +4046,29 @@ will be returned. limit Int! - + + +Maximum number of items to return on this page. + + offset Int - + + +Extra items to skip on top of `pageIndex` paging (default 0). + + pageIndex Int - + + +Zero-based page number; multiplies `limit` to determine where to start (default 0). + + @@ -3261,18 +4172,30 @@ will be returned. limit Int! - + + +Maximum number of items to return on this page. + + offset Int - + + +Extra items to skip on top of `pageIndex` paging (default 0). + + pageIndex Int - - + + +Zero-based page number; multiplies `limit` to determine where to start (default 0). + + + pageRev [Int!]! @@ -3289,17 +4212,29 @@ will be returned. limit Int! - + + +Maximum number of items to return on this page. + + offset Int - + + +Extra items to skip on top of `pageIndex` paging (default 0). + + pageIndex Int - + + +Zero-based page number; multiplies `limit` to determine where to start (default 0). + + mean @@ -3342,6 +4277,9 @@ Compute the minimum interval between consecutive timestamps. Returns None if few ### LayerSchema +Describes a single edge layer — its name and the per `(srcType, dstType)` +edge schemas observed within it. + @@ -3375,6 +4313,11 @@ Returns the list of edge schemas for this edge layer ### MetaGraph +Lightweight summary of a stored graph — its name, path, counts, and +filesystem timestamps — served without deserializing the full graph. +Useful for listing what's available on the server before committing to a +full load. +
@@ -3465,6 +4408,11 @@ Returns the metadata of the graph. ### Metadata +Constant key/value metadata attached to an entity (node, edge, or graph). +Metadata has no timeline — each key maps to exactly one value for the +lifetime of the entity. Separate from `Properties`, which carries +time-varying data. +
@@ -3480,35 +4428,44 @@ Returns the metadata of the graph. - + - + @@ -3517,20 +4474,28 @@ Return all metadata keys. - +
Property -Get metadata value matching the specified key. +Look up a single metadata value by key. Returns null if no metadata with that +key exists.
key String! + +The metadata name. + +
contains Boolean! -/// Check if the key is in the metadata. +Returns true if a metadata entry with the given key exists.
key String! + +The metadata name to look up. + +
keys [String!]! -Return all metadata keys. +All metadata keys present on this entity.
[Property!]! -/// Return all metadata values. +All metadata values as `{key, value}` entries.
keys [String!] + +Optional whitelist. If provided, only metadata with these keys is returned; if omitted, every metadata entry is returned. + +
### MutableEdge +Write-side handle for a single edge — returned from `addEdge` or +`MutableGraph.edge`. Supports adding updates, deletions, and attaching +or updating metadata. + @@ -3582,96 +4547,137 @@ Get the mutable destination node of the edge. - - + + - + - + - + - + - + - - + + - + - +
Boolean! -Mark the edge as deleted at time time. +Mark this edge as deleted at the given time. Persistent graphs treat this +as a tombstone (the edge becomes invalid from `time` onwards); event +graphs simply log the deletion event.
timeInt!TimeInput! + +Time of the deletion. + +
layer String + +Optional layer name. If omitted, uses the layer the edge was originally added on (when called after `addEdge`). + +
addMetadata Boolean! -Add metadata to the edge (errors if the value already exists). - -If this is called after add_edge, the layer is inherited from the add_edge and does not -need to be specified again. +Add metadata to this edge. Errors if any of the keys already exists — +use `updateMetadata` to overwrite. If this is called after `addEdge`, +the layer is inherited and does not need to be specified again.
properties [PropertyInput!]! + +List of `{key, value}` pairs to set as metadata. + +
layer String + +Optional layer name; defaults to the inherited layer. + +
updateMetadata Boolean! -Update metadata of the edge (existing values are overwritten). - -If this is called after add_edge, the layer is inherited from the add_edge and does not -need to be specified again. +Update metadata of this edge, overwriting any existing values for the +given keys. If this is called after `addEdge`, the layer is inherited +and does not need to be specified again.
properties [PropertyInput!]! + +List of `{key, value}` pairs to upsert. + +
layer String + +Optional layer name; defaults to the inherited layer. + +
addUpdates Boolean! -Add temporal property updates to the edge. - -If this is called after add_edge, the layer is inherited from the add_edge and does not -need to be specified again. +Append a property update to this edge at a specific time. If called +after `addEdge`, the layer is inherited and does not need to be +specified again.
timeInt!TimeInput! + +Time of the update. + +
properties [PropertyInput!] + +Optional `{key, value}` pairs attached to the event. + +
layer String + +Optional layer name; defaults to the inherited layer. + +
### MutableGraph +Write-enabled handle for a graph. Obtained by calling `updateGraph(path)` +on the root query with a path you have write permission for. Supports +adding nodes and edges (individually or in batches), attaching +properties/metadata, and looking up mutable `node`/`edge` handles. Use the +read-only `graph(path)` resolver for queries. + @@ -3687,7 +4693,20 @@ need to be specified again. + + + + + @@ -3696,235 +4715,378 @@ Get the non-mutable graph. - - + + - - + + - - + + - + - + + + + + + - - + + - - + + - + - + + + + + + - + - - + + - - + + - - + + - - + + - - + + - + - + - + - - + + - - + + - - + + - + - - + + - + - + - +
Graph! -Get the non-mutable graph. +Read-only view of this graph — identical to what you'd get from +`graph(path:)` on the query root. Use this when you want to compose +queries on the graph you've just mutated. `graphType` lets you +re-interpret the graph at query time (see `graph(path:)` for +semantics); defaults to the stored graph's native type. + +
graphTypeGraphType + +Optional override for graph semantics — `EVENT` treats every update as a point-in-time event, `PERSISTENT` carries values forward until overwritten or deleted. Defaults to the stored graph's native type.
MutableNode -Get mutable existing node. +Look up an existing node for mutation. Returns null if the node doesn't +exist; use `addNode` or `createNode` to create one.
nameString!NodeId! + +Node id. + +
addNode MutableNode! -Add a new node or add updates to an existing node. +Add a new node or append an update to an existing one. Upsert semantics: +no error if the node already exists — properties and type are merged.
timeInt!TimeInput! + +Time of the event. + +
nameString!NodeId! + +Node id. + +
properties [PropertyInput!] + +Optional property updates attached to this event. + +
nodeType String + +Optional node type to assign. If provided, sets the node's type at this event. + +
layerString + +Optional layer name. If omitted, the default layer is used. + +
createNode MutableNode! -Create a new node or fail if it already exists. +Create a new node or fail if it already exists. Strict alternative to +`addNode` — use this when you want to detect collisions.
timeInt!TimeInput! + +Time of the create event. + +
nameString!NodeId! + +Node id. + +
properties [PropertyInput!] + +Optional property updates attached to this event. + +
nodeType String + +Optional node type to assign. If provided, sets the node's type at this event. + +
layerString + +Optional layer name. If omitted, the default layer is used. + +
addNodes Boolean! -Add a batch of nodes. +Batch-add multiple nodes in one call. For each `NodeAddition`, applies every +update it carries (time/properties pairs), then optionally sets its node type +and adds any metadata. On partial failure, returns a `BatchFailures` error +describing which entries failed and why; otherwise returns true.
nodes [NodeAddition!]! + +List of `NodeAddition` inputs, each specifying a node's name, optional type, layer, per-timestamp updates, and metadata. + +
edge MutableEdge -Get a mutable existing edge. +Look up an existing edge for mutation. Returns null if no such edge exists.
srcString!NodeId! + +Source node id. + +
dstString!NodeId! + +Destination node id. + +
addEdge MutableEdge! -Add a new edge or add updates to an existing edge. +Add a new edge or append an update to an existing one. Upsert semantics: +safe to call on an edge that already exists — creates missing endpoints if +needed.
timeInt!TimeInput! + +Time of the event. + +
srcString!NodeId! + +Source node id. + +
dstString!NodeId! + +Destination node id. + +
properties [PropertyInput!] + +Optional property updates attached to this event. + +
layer String + +Optional layer name. If omitted, the default layer is used. + +
addEdges Boolean! -Add a batch of edges. +Batch-add multiple edges in one call. For each `EdgeAddition`, applies every +update it carries, then adds any metadata. On partial failure, returns a +`BatchFailures` error describing which entries failed; otherwise returns +true.
edges [EdgeAddition!]! + +List of `EdgeAddition` inputs, each specifying an edge's `src`, `dst`, optional layer, per-timestamp updates, and metadata. + +
deleteEdge MutableEdge! -Mark an edge as deleted (creates the edge if it did not exist). +Mark an edge as deleted at the given time. Persistent graphs treat this +as a tombstone (the edge becomes invalid from `time` onwards); event +graphs simply log the deletion event. Creates the edge first if it did +not exist.
timeInt!TimeInput! + +Time of the deletion. + +
srcString!NodeId! + +Source node id. + +
dstString!NodeId! + +Destination node id. + +
layer String + +Optional layer name. If omitted, the default layer is used. + +
addProperties Boolean! -Add temporal properties to graph. +Add temporal properties to the graph itself (not a node or edge). Each +call records a property update at `t`.
tInt!TimeInput! + +Time of the update. + +
properties [PropertyInput!]! + +List of `{key, value}` pairs to set. + +
addMetadata Boolean! -Add metadata to graph (errors if the property already exists). +Add metadata to the graph itself. Errors if any of the keys already +exists — use `updateMetadata` to overwrite.
properties [PropertyInput!]! + +List of `{key, value}` pairs to set as metadata. + +
updateMetadata Boolean! -Update metadata of the graph (overwrites existing values). +Update metadata of the graph itself, overwriting any existing values for +the given keys.
properties [PropertyInput!]! + +List of `{key, value}` pairs to upsert. + +
### MutableNode +Write-side handle for a single node — returned from `addNode`, `createNode`, +or `MutableGraph.node`. Supports adding updates, setting node type, and +attaching or updating metadata. + @@ -3958,61 +5120,93 @@ Get the non-mutable Node. - + - + - + - - + + - + + + + + +
Boolean! -Add metadata to the node (errors if the property already exists). +Add metadata to this node. Errors if any of the keys already exists — +use `updateMetadata` to overwrite.
properties [PropertyInput!]! + +List of `{key, value}` pairs to set as metadata. + +
setNodeType Boolean! -Set the node type (errors if the node already has a non-default type). +Set this node's type. Errors if the node already has a non-default +type and you're trying to change it.
newType String! + +Node-type name to assign. + +
updateMetadata Boolean! -Update metadata of the node (overwrites existing property values). +Update metadata of this node, overwriting any existing values for the +given keys.
properties [PropertyInput!]! + +List of `{key, value}` pairs to upsert. + +
addUpdates Boolean! -Add temporal property updates to the node. +Append a property update to this node at a specific time.
timeInt!TimeInput! + +Time of the update. + +
properties [PropertyInput!] + +Optional `{key, value}` pairs attached to the event. + +
layerString + +Optional layer name. If omitted, the default layer is used. + +
@@ -4039,6 +5233,11 @@ Add temporal property updates to the node. ### Namespace +A directory-like container for graphs and nested namespaces. Graphs are +addressed by path (e.g. `"team/project/graph"`), and every segment except +the last is a namespace. Use to browse what's stored on the server without +loading any graph data. + @@ -4052,27 +5251,53 @@ Add temporal property updates to the node. - + - + - + - + - +
graphs CollectionOfMetaGraph! + +Graphs directly inside this namespace (excludes graphs in nested +namespaces). Filtered by the caller's permissions — only graphs the +caller is allowed to see are returned. + +
path String! + +Path of this namespace relative to the root namespace. Empty string for +the root namespace itself. + +
parent Namespace + +Parent namespace, or null at the root. + +
children CollectionOfNamespace! + +Sub-namespaces directly inside this one (one level down, not recursive). +Filtered by permissions. + +
items CollectionOfNamespacedItem! + +Everything in this namespace — sub-namespaces and graphs — as a single +heterogeneous collection. Sub-namespaces are listed before graphs. +Filtered by permissions. + +
@@ -4093,10 +5318,11 @@ Raphtory graph node. id -String! +NodeId! -Returns the unique id of the node. +Returns the unique id of the node — `String` for string-indexed +graphs, non-negative `Int` for integer-indexed graphs. @@ -4130,7 +5356,11 @@ Return a view of node containing all layers specified. names [String!]! - + + +Layer names to include. + + excludeLayers @@ -4144,7 +5374,11 @@ Returns a collection containing nodes belonging to all layers except the exclude names [String!]! - + + +Layer names to exclude. + + layer @@ -4158,7 +5392,11 @@ Returns a collection containing nodes belonging to the specified layer. name String! - + + +Layer name to include. + + excludeLayer @@ -4172,7 +5410,11 @@ Returns a collection containing nodes belonging to all layers except the exclude name String! - + + +Layer name to exclude. + + rolling @@ -4194,17 +5436,29 @@ the start of the first window and/or after the end of the last window (i.e. not window WindowDuration! - + + +Width of each window. Pass either `{epoch: }` for a discrete number of milliseconds (e.g. `{epoch: 1000}` for 1 second), or `{duration: }` for a calendar duration (e.g. `{duration: 1 day}` or `{duration: 2 hours and 30 minutes}`). + + step WindowDuration - + + +Optional gap between the start of one window and the start of the next. Accepts the same `{epoch: }` or `{duration: }` values as `window`. Defaults to `window` — i.e. windows touch end-to-end with no overlap and no gap. + + alignmentUnit AlignmentUnit - + + +Optional anchor for window boundaries — pass `Unaligned` to disable, or one of the unit values (e.g. `Day`, `Hour`, `Minute`) to align edges to that calendar unit. Defaults to the smallest unit present in `step` (or `window` if no step is set). + + expanding @@ -4224,12 +5478,20 @@ e.g. "1 month and 1 day" will align at the start of the day. step WindowDuration! - + + +How much the window grows by on each step. Pass either `{epoch: }` for a discrete number of milliseconds, or `{duration: }` for a calendar duration (e.g. `{duration: 1 day}`). + + alignmentUnit AlignmentUnit - + + +Optional anchor for window boundaries — pass `Unaligned` to disable, or one of the unit values (e.g. `Day`, `Hour`, `Minute`) to align edges to that calendar unit. Defaults to the smallest unit present in `step`. + + window @@ -4243,12 +5505,20 @@ Create a view of the node including all events between the specified start (incl start TimeInput! - + + +Inclusive lower bound. + + end TimeInput! - + + +Exclusive upper bound. + + at @@ -4262,7 +5532,11 @@ Create a view of the node including all events at a specified time. time TimeInput! - + + +Instant to pin the view to. + + latest @@ -4285,7 +5559,11 @@ Create a view of the node including all events that are valid at the specified t time TimeInput! - + + +Instant at which entities must be valid. + + snapshotLatest @@ -4308,7 +5586,11 @@ Create a view of the node including all events before specified end time (exclus time TimeInput! - + + +Exclusive upper bound. + + after @@ -4322,7 +5604,11 @@ Create a view of the node including all events after the specified start time (e time TimeInput! - + + +Exclusive lower bound. + + shrinkWindow @@ -4336,12 +5622,20 @@ Shrink a Window to a specified start and end time, if these are earlier and late start TimeInput! - + + +Proposed new start (TimeInput); ignored if it would widen the window. + + end TimeInput! - + + +Proposed new end (TimeInput); ignored if it would widen the window. + + shrinkStart @@ -4355,7 +5649,11 @@ Set the start of the window to the larger of a specified start time and self.sta start TimeInput! - + + +Proposed new start (TimeInput); ignored if it would widen the window. + + shrinkEnd @@ -4369,7 +5667,11 @@ Set the end of the window to the smaller of a specified end and self.end(). end TimeInput! - + + +Proposed new end (TimeInput); ignored if it would widen the window. + + applyViews @@ -4625,6 +5927,10 @@ Returns the number of neighbours that have at least one out-going edge from this ### NodeSchema +Describes nodes of a specific type in a graph — its property keys and +observed value types (and, for string-valued properties, the set of +distinct values seen). One `NodeSchema` per node type. + @@ -4638,27 +5944,43 @@ Returns the number of neighbours that have at least one out-going edge from this - + - +
typeName String! + +The node type this schema describes (e.g. `"person"`, `"org"`). +Falls back to the default node type for untyped nodes. + +
properties [PropertySchema!]! -Returns the list of property schemas for this node +Property schemas seen on nodes of this type — one entry per property key +ever set on a node of this type, with its observed `PropertyType` and (for +string-valued properties) the set of distinct values.
metadata [PropertySchema!]! + +Metadata schemas seen on nodes of this type — like `properties`, but +covering metadata fields rather than temporal properties. + +
### NodeWindowSet +A lazy sequence of per-window views of a single node, produced by +`node.rolling` / `node.expanding`. Each entry is the node as it exists in +that window. + @@ -4672,7 +5994,12 @@ Returns the list of property schemas for this node - + @@ -4690,28 +6017,49 @@ will be returned. - + - + - + - +
count Int! + +Number of windows in this set. Materialising all windows is expensive for +large graphs — prefer `page` over `list` when iterating. + +
page
limit Int! + +Maximum number of items to return on this page. + +
offset Int + +Extra items to skip on top of `pageIndex` paging (default 0). + +
pageIndex Int + +Zero-based page number; multiplies `limit` to determine where to start (default 0). + +
list [Node!]! + +Materialise every window as a list. Rejected by the server when bulk list +endpoints are disabled; use `page` for paginated access instead. + +
### Nodes +A lazy collection of nodes from a graph view. Supports all the same view +transforms as `Graph` (window, layer, filter, ...) plus pagination and +sorting. Iterated via `list` / `page` / `ids` / `count`. + @@ -4743,7 +6091,11 @@ Return a view of the nodes containing all layers specified. - + @@ -4757,7 +6109,11 @@ Return a view of the nodes containing all layers except those specified. - + @@ -4771,7 +6127,11 @@ Return a view of the nodes containing the specified layer. - + @@ -4785,7 +6145,11 @@ Return a view of the nodes containing all layers except those specified. - + @@ -4807,17 +6171,29 @@ the start of the first window and/or after the end of the last window (i.e. not - + - + - + @@ -4837,12 +6213,20 @@ e.g. "1 month and 1 day" will align at the start of the day. - + - + @@ -4856,12 +6240,20 @@ Create a view of the node including all events between the specified start (incl - + - + @@ -4875,7 +6267,11 @@ Create a view of the nodes including all events at a specified time. - + @@ -4898,7 +6294,11 @@ Create a view of the nodes including all events that are valid at the specified - + @@ -4921,7 +6321,11 @@ Create a view of the nodes including all events before specified end time (exclu - + @@ -4935,7 +6339,11 @@ Create a view of the nodes including all events after the specified start time ( - + @@ -4949,12 +6357,20 @@ Shrink both the start and end of the window. - + - + @@ -4968,7 +6384,11 @@ Set the start of the window to the larger of a specified start time and self.sta - + @@ -4982,7 +6402,11 @@ Set the end of the window to the smaller of a specified end and self.end(). - + @@ -4996,27 +6420,50 @@ Filter nodes by node type. - + - + - + - + - + @@ -5039,7 +6486,11 @@ Returns the end time of the window. Errors if there is no window. - + @@ -5057,29 +6508,47 @@ will be returned. - + - + - + - + @@ -5088,34 +6557,76 @@ Returns a view of the node ids. - + - +
names [String!]! + +Layer names to include. + +
excludeLayers
names [String!]! + +Layer names to exclude. + +
layer
name String! + +Layer name to include. + +
excludeLayer
name String! + +Layer name to exclude. + +
rolling
window WindowDuration! + +Width of each window. Pass either `{epoch: }` for a discrete number of milliseconds (e.g. `{epoch: 1000}` for 1 second), or `{duration: }` for a calendar duration (e.g. `{duration: 1 day}` or `{duration: 2 hours and 30 minutes}`). + +
step WindowDuration + +Optional gap between the start of one window and the start of the next. Accepts the same `{epoch: }` or `{duration: }` values as `window`. Defaults to `window` — i.e. windows touch end-to-end with no overlap and no gap. + +
alignmentUnit AlignmentUnit + +Optional anchor for window boundaries — pass `Unaligned` to disable, or one of the unit values (e.g. `Day`, `Hour`, `Minute`) to align edges to that calendar unit. Defaults to the smallest unit present in `step` (or `window` if no step is set). + +
expanding
step WindowDuration! + +How much the window grows by on each step. Pass either `{epoch: }` for a discrete number of milliseconds, or `{duration: }` for a calendar duration (e.g. `{duration: 1 day}`). + +
alignmentUnit AlignmentUnit + +Optional anchor for window boundaries — pass `Unaligned` to disable, or one of the unit values (e.g. `Day`, `Hour`, `Minute`) to align edges to that calendar unit. Defaults to the smallest unit present in `step`. + +
window
start TimeInput! + +Inclusive lower bound. + +
end TimeInput! + +Exclusive upper bound. + +
at
time TimeInput! + +Instant to pin the view to. + +
latest
time TimeInput! + +Instant at which entities must be valid. + +
snapshotLatest
time TimeInput! + +Exclusive upper bound. + +
after
time TimeInput! + +Exclusive lower bound. + +
shrinkWindow
start TimeInput! + +Proposed new start (TimeInput); ignored if it would widen the window. + +
end TimeInput! + +Proposed new end (TimeInput); ignored if it would widen the window. + +
shrinkStart
start TimeInput! + +Proposed new start (TimeInput); ignored if it would widen the window. + +
shrinkEnd
end TimeInput! + +Proposed new end (TimeInput); ignored if it would widen the window. + +
typeFilter
nodeTypes [String!]! + +Node-type names to keep. + +
applyViews Nodes! + +Apply a list of views in the given order and return the resulting nodes +collection. Lets callers compose window, layer, filter, and snapshot +operations in a single call. + +
views [NodesViewCollection!]! + +Ordered list of view operations; each entry is a one-of variant (`window`, `layer`, `filter`, etc.) applied to the running result. + +
sorted Nodes! + +Sort the nodes. Multiple criteria are applied lexicographically (ties on the +first key break to the second, etc.). + +
sortBys [NodeSortBy!]! + +Ordered list of sort keys. Each entry chooses exactly one of `id` / `time` / `property`, with an optional `reverse: true` to flip order. + +
start
count Int! + +Number of nodes in the current view. + +
page
limit Int! + +Maximum number of items to return on this page. + +
offset Int + +Extra items to skip on top of `pageIndex` paging (default 0). + +
pageIndex Int + +Zero-based page number; multiplies `limit` to determine where to start (default 0). + +
list [Node!]! + +Materialise every node in the view. Rejected by the server when bulk list +endpoints are disabled; use `page` for paginated access instead. + +
ids [String!]! -Returns a view of the node ids. +Every node's id (name) as a flat list of strings. Rejected by the server when +bulk list endpoints are disabled.
Nodes! -Returns a filtered view that applies to list down the chain +Narrow the collection to nodes matching `expr`. The filter sticks to the +returned view — every subsequent traversal through these nodes (their +neighbours, edges, properties) continues to see the filtered scope. + +Useful when you want one scoping rule to apply across the whole query. +E.g. restricting everything to a specific week: + +```text +nodes { filter(expr: {window: {start: 1234, end: 5678}}) { +list { neighbours { list { name } } } # neighbours still windowed +} } +``` + +Contrast with `select`, which applies here and is not carried through.
expr NodeFilter! + +Composite node filter (by name, property, type, etc.). + +
select Nodes! -Returns filtered list of nodes +Narrow the collection to nodes matching `expr`, but only at this step — +subsequent traversals out of these nodes see the unfiltered graph again. + +Useful when you want different scopes at different hops. E.g. nodes +active on Monday, then their neighbours active on Tuesday, then *those* +neighbours active on Wednesday: + +```text +nodes { select(expr: {window: {...monday...}}) { +list { neighbours { select(expr: {window: {...tuesday...}}) { +list { neighbours { select(expr: {window: {...wednesday...}}) { +list { name } +} } } +} } } +} } +``` + +Contrast with `filter`, which persists the scope through subsequent ops.
expr NodeFilter! + +Composite node filter (by name, property, type, etc.). + +
### NodesWindowSet +A lazy sequence of per-window node collections, produced by +`nodes.rolling` / `nodes.expanding`. Each entry is a `Nodes` collection +as it exists in that window. + @@ -5129,7 +6640,12 @@ Returns filtered list of nodes - + @@ -5147,22 +6663,39 @@ will be returned. - + - + - + - +
count Int! + +Number of windows in this set. Materialising all windows is expensive for +large graphs — prefer `page` over `list` when iterating. + +
page
limit Int! + +Maximum number of items to return on this page. + +
offset Int + +Extra items to skip on top of `pageIndex` paging (default 0). + +
pageIndex Int + +Zero-based page number; multiplies `limit` to determine where to start (default 0). + +
list [Nodes!]! + +Materialise every window as a list. Rejected by the server when bulk list +endpoints are disabled; use `page` for paginated access instead. + +
@@ -5196,6 +6729,11 @@ PageRank score. ### PathFromNode +A collection of nodes anchored to a source node — the result of traversals +like `node.neighbours`, `inNeighbours`, or `outNeighbours`. Supports all +the usual view transforms (window, layer, filter, ...) and can be chained +to walk further hops. + @@ -5218,7 +6756,11 @@ Returns a view of PathFromNode containing the specified layer, errors if the lay - + @@ -5232,7 +6774,11 @@ Return a view of PathFromNode containing all layers except the specified exclude - + @@ -5246,7 +6792,11 @@ Return a view of PathFromNode containing the layer specified layer, errors if th - + @@ -5260,7 +6810,11 @@ Return a view of PathFromNode containing all layers except the specified exclude - + @@ -5282,17 +6836,29 @@ the start of the first window and/or after the end of the last window (i.e. not - + - + - + @@ -5312,12 +6878,20 @@ e.g. "1 month and 1 day" will align at the start of the day. - + - + @@ -5331,12 +6905,20 @@ Create a view of the PathFromNode including all events between a specified start - + - + @@ -5350,7 +6932,11 @@ Create a view of the PathFromNode including all events at time. - + @@ -5373,7 +6959,11 @@ Create a view of the PathFromNode including all events that are valid at the spe - + @@ -5396,7 +6986,11 @@ Create a view of the PathFromNode including all events before the specified end - + @@ -5410,7 +7004,11 @@ Create a view of the PathFromNode including all events after the specified start - + @@ -5424,12 +7022,20 @@ Shrink both the start and end of the window. - + - + @@ -5443,7 +7049,11 @@ Set the start of the window to the larger of the specified start and self.start( - + @@ -5457,21 +7067,29 @@ Set the end of the window to the smaller of the specified end and self.end(). - + - + @@ -5494,7 +7112,11 @@ Returns the latest time that this PathFromNode is valid or None if the PathFromN - + @@ -5512,29 +7134,47 @@ will be returned. - + - + - + - + @@ -5550,41 +7190,84 @@ Takes a specified selection of views and applies them in given order. - + - + - +
names [String!]! + +Layer names to include. + +
excludeLayers
names [String!]! + +Layer names to exclude. + +
layer
name String! + +Layer name to include. + +
excludeLayer
name String! + +Layer name to exclude. + +
rolling
window WindowDuration! + +Width of each window. Pass either `{epoch: }` for a discrete number of milliseconds (e.g. `{epoch: 1000}` for 1 second), or `{duration: }` for a calendar duration (e.g. `{duration: 1 day}` or `{duration: 2 hours and 30 minutes}`). + +
step WindowDuration + +Optional gap between the start of one window and the start of the next. Accepts the same `{epoch: }` or `{duration: }` values as `window`. Defaults to `window` — i.e. windows touch end-to-end with no overlap and no gap. + +
alignmentUnit AlignmentUnit + +Optional anchor for window boundaries — pass `Unaligned` to disable, or one of the unit values (e.g. `Day`, `Hour`, `Minute`) to align edges to that calendar unit. Defaults to the smallest unit present in `step` (or `window` if no step is set). + +
expanding
step WindowDuration! + +How much the window grows by on each step. Pass either `{epoch: }` for a discrete number of milliseconds, or `{duration: }` for a calendar duration (e.g. `{duration: 1 day}`). + +
alignmentUnit AlignmentUnit + +Optional anchor for window boundaries — pass `Unaligned` to disable, or one of the unit values (e.g. `Day`, `Hour`, `Minute`) to align edges to that calendar unit. Defaults to the smallest unit present in `step`. + +
window
start TimeInput! + +Inclusive lower bound. + +
end TimeInput! + +Exclusive upper bound. + +
at
time TimeInput! + +Instant to pin the view to. + +
snapshotLatest
time TimeInput! + +Instant at which entities must be valid. + +
latest
time TimeInput! + +Exclusive upper bound. + +
after
time TimeInput! + +Exclusive lower bound. + +
shrinkWindow
start TimeInput! + +Proposed new start (TimeInput); ignored if it would widen the window. + +
end TimeInput! + +Proposed new end (TimeInput); ignored if it would widen the window. + +
shrinkStart
start TimeInput! + +Proposed new start (TimeInput); ignored if it would widen the window. + +
shrinkEnd
end TimeInput! + +Proposed new end (TimeInput); ignored if it would widen the window. + +
typeFilter PathFromNode! -Filter nodes by type. +Narrow this path to neighbours whose node type is in the given set.
nodeTypes [String!]! + +Node types to keep. + +
start
count Int! + +Number of neighbour nodes reachable from the source in this view. + +
page
limit Int! + +Maximum number of items to return on this page. + +
offset Int + +Extra items to skip on top of `pageIndex` paging (default 0). + +
pageIndex Int + +Zero-based page number; multiplies `limit` to determine where to start (default 0). + +
list [Node!]! + +Materialise every neighbour node in the path. Rejected by the server when +bulk list endpoints are disabled; use `page` for paginated access instead. + +
ids [String!]! -Returns the node ids. +Every neighbour node's id (name) as a flat list of strings. Rejected by the +server when bulk list endpoints are disabled.
views [PathFromNodeViewCollection!]! + +Ordered list of view operations; each entry is a one-of variant (`window`, `layer`, `filter`, ...) applied to the running result. + +
filter PathFromNode! -Returns a filtered view that applies to list down the chain +Narrow the neighbour set to nodes matching `expr`. The filter sticks to +the returned path — every subsequent traversal (further hops, edges, +properties) continues to see the filtered scope. + +Useful when you want one scoping rule to apply across the whole query. +E.g. restricting the whole traversal to a specific week: + +```text +node(name: "A") { neighbours { filter(expr: {window: {...week...}}) { +list { neighbours { list { name } } } # further hops still windowed +} } } +``` + +Contrast with `select`, which applies here and is not carried through.
expr NodeFilter! + +Composite node filter (by name, property, type, etc.). + +
select PathFromNode! -Returns filtered list of neighbour nodes +Narrow the neighbour set to nodes matching `expr`, but only at this hop +— further traversals out of these nodes see the unfiltered graph again. + +Useful when each hop needs a different scope. E.g. neighbours active on +Monday, then *their* neighbours active on Tuesday: + +```text +node(name: "A") { neighbours { select(expr: {window: {...monday...}}) { +list { neighbours { select(expr: {window: {...tuesday...}}) { +list { name } +} } } +} } } +``` + +Contrast with `filter`, which persists the scope through subsequent ops.
expr NodeFilter! + +Composite node filter (by name, property, type, etc.). + +
### PathFromNodeWindowSet +A lazy sequence of per-window neighbour sets, produced by +`neighbours.rolling` / `neighbours.expanding` (or the in/out variants). +Each entry is a `PathFromNode` scoped to that window. + @@ -5598,7 +7281,12 @@ Returns filtered list of neighbour nodes - + @@ -5616,28 +7304,49 @@ will be returned. - + - + - + - +
count Int! + +Number of windows in this set. Materialising all windows is expensive for +large graphs — prefer `page` over `list` when iterating. + +
page
limit Int! + +Maximum number of items to return on this page. + +
offset Int + +Extra items to skip on top of `pageIndex` paging (default 0). + +
pageIndex Int + +Zero-based page number; multiplies `limit` to determine where to start (default 0). + +
list [PathFromNode!]! + +Materialise every window as a list. Rejected by the server when bulk list +endpoints are disabled; use `page` for paginated access instead. + +
### Properties +All temporal properties of an entity (metadata is exposed separately). +Look up individual properties via `get` / `contains`, enumerate via +`keys` / `values`, or drop into `temporal` for time-aware accessors. + @@ -5653,35 +7362,45 @@ will be returned. - + - + @@ -5690,25 +7409,38 @@ Return all property keys. - + - +
Property -Get property value matching the specified key. +Look up a single property by key. Returns null if no property with that key +exists in the current view.
key String! + +The property name. + +
contains Boolean! -Check if the key is in the properties. +Returns true if a property with the given key exists in this view.
key String! + +The property name to look up. + +
keys [String!]! -Return all property keys. +All property keys present in the current view. Does not include metadata +— metadata is exposed separately via the entity's `metadata` field.
[Property!]! -Return all property values. +Snapshot of property values, one `{key, value}` entry per property.
keys [String!] + +Optional whitelist. If provided, only properties with these keys are returned; if omitted or null, every property in the view is returned. + +
temporal TemporalProperties! + +The temporal-only view of these properties — excludes metadata (which has no +history) and lets you drill into per-key timelines and aggregates. + +
### Property +A single `(key, value)` property reading at a point in the graph view. +The value is exposed both as a typed scalar (`value`) and as a +human-readable string (`asString`). + @@ -5722,17 +7454,32 @@ Return all property values. - + - + - +
key String! + +The property key (name). + +
asString String! + +The property value rendered as a human-readable string (e.g. `"10"`, `"hello"`, +`"2024-01-01T00:00:00Z"`). For programmatic access use `value`, which returns +a typed scalar. + +
value PropertyOutput! + +The property value as a typed `PropertyOutput` scalar — numbers come back as +numbers, booleans as booleans, strings as strings, etc. + +
@@ -5769,6 +7516,10 @@ Return all property values. ### PropertyTuple +A `(time, value)` pair — the output type of temporal-property accessors +that need to report *when* a value was observed (e.g. `min`, `max`, +`median`, `orderedDedupe`). + @@ -5782,17 +7533,31 @@ Return all property values. - + - + - +
time EventTime! + +The timestamp at which this value was recorded. + +
asString String! + +The value rendered as a human-readable string. For programmatic access use +`value`, which returns a typed scalar. + +
value PropertyOutput! + +The value as a typed `PropertyOutput` scalar — numbers come back as numbers, +booleans as booleans, etc. + +
@@ -5844,6 +7609,10 @@ Return all property values. ### TemporalProperties +The temporal-only view of an entity's properties. Each entry is a +`TemporalProperty` carrying the full timeline for that key — use this when +you need per-update iteration, time-indexed lookups, or aggregates. + @@ -5859,35 +7628,44 @@ Return all property values. - + - + @@ -5896,20 +7674,30 @@ Return all property keys. - +
TemporalProperty -Get property value matching the specified key. +Look up a single temporal property by key. Returns null if there's no temporal +property with that key.
key String! + +The property name. + +
contains Boolean! -Check if the key is in the properties. +Returns true if a temporal property with the given key exists.
key String! + +The property name to look up. + +
keys [String!]! -Return all property keys. +All temporal-property keys present in this view.
[TemporalProperty!]! -Return all property values. +All temporal properties, each as a `TemporalProperty` with its full timeline +available. Use `history`, `values`, `latest`, `at`, etc. on each entry.
keys [String!] + +Optional whitelist. If provided, only temporal properties with these keys are returned; if omitted, every temporal property in the view is returned. + +
### TemporalProperty +The full timeline of a single property key on one entity. Exposes every +update (via `values` / `history` / `orderedDedupe`), point lookups (`at`, +`latest`), and aggregates over the timeline (`sum`, `mean`, `min`, `max`, +`median`, `count`). + @@ -5925,59 +7713,165 @@ Return all property values. - + - + - - + + - + - - + + - - + + - + - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
String! -Key of a property. +The property key (name).
history History! + +Event history for this property — one entry per temporal update, in +insertion order. Use this to navigate the full timeline: access the +raw `timestamps` / `datetimes` / `eventId` lists, analyse gaps between +updates via `intervals` (mean/median/min/max), ask `isEmpty`, or +paginate the events. + +
values[String!]![PropertyOutput!]! -Return the values of the properties. +All values this property has ever taken, in temporal order (one per update). +Typed as `PropertyOutput` so numeric values stay numeric.
atStringPropertyOutput + +The value at or before time `t` (latest update on or before `t`). Returns null +if no update exists on or before `t`. + +
t TimeInput! + +A TimeInput (epoch millis integer, RFC3339 string, or `{timestamp, eventId}` object). + +
latestStringPropertyOutput + +The most recent value, or null if the property has never been set in this view. + +
unique[String!]![PropertyOutput!]! + +The set of distinct values this property has ever taken (order not guaranteed). + +
orderedDedupe [PropertyTuple!]! + +Collapses runs of consecutive-equal updates into a single `(time, value)` pair. + +
latestTime Boolean! + +If true, each run is represented by its *last* timestamp; if false, by its *first*. Useful for compressing chatter in a timeline. + +
sumPropertyOutput + +Sum of all updates. Returns null if the dtype is not additive or the property is empty. + +
meanPropertyOutput + +Mean of all updates as an F64. Returns null if any value is non-numeric or the property is +empty. + +
averagePropertyOutput + +Alias for `mean` — same F64 average, same null cases. + +
minPropertyTuple + +Minimum `(time, value)` pair. Returns null if the dtype is not comparable or the property is +empty. + +
maxPropertyTuple + +Maximum `(time, value)` pair. Returns null if the dtype is not comparable or the property is +empty. + +
medianPropertyTuple + +Median `(time, value)` pair (lower median on even-length inputs). Returns null if the dtype +is not comparable or the property is empty. + +
countInt! + +Number of updates recorded for this property in the current view. + +
### VectorSelection +A working set of documents / nodes / edges built up via similarity +searches on a `VectorisedGraph`. Selections are mutable: you can grow +them with more hops (`expand*`), dereference the contents (`nodes`, +`edges`, `getDocuments`), or start fresh with `emptySelection`. + @@ -6020,131 +7914,191 @@ Returns a list of documents in the current selection. - - + + - + - + - + - + - + - + - + - + - + - + - + - +
VectorSelection! -Adds all the documents associated with the specified nodes to the current selection. - -Documents added by this call are assumed to have a score of 0. +Add every document associated with the named nodes to the selection. +Documents added this way receive a score of 0 (no similarity ranking).
nodes[String!]![NodeId!]! + +Node ids whose documents to include. + +
addEdges VectorSelection! -Adds all the documents associated with the specified edges to the current selection. - -Documents added by this call are assumed to have a score of 0. +Add every document associated with the named edges to the selection. +Documents added this way receive a score of 0 (no similarity ranking).
edges [InputEdge!]! + +List of `{src, dst}` pairs identifying the edges. + +
expand VectorSelection! -Add all the documents a specified number of hops away to the selection. - -Two documents A and B are considered to be 1 hop away of each other if they are on the same entity or if they are on the same node and edge pair. +Grow the selection by including documents that are within `hops` of any +document already in the selection. Two documents are 1 hop apart if +they're on the same entity or on a connected node/edge pair.
hops Int! + +Number of expansion rounds (1 = direct neighbours). + +
window VectorisedGraphWindow + +Optional `{start, end}` to restrict expansion to entities active in that interval. + +
expandEntitiesBySimilarity VectorSelection! -Adds documents, from the set of one hop neighbours to the current selection, to the selection based on their similarity score with the specified query. This function loops so that the set of one hop neighbours expands on each loop and number of documents added is determined by the specified limit. +Iteratively expand the selection by similarity to a natural-language +query. Each pass takes the one-hop neighbour set of the current +selection and adds the highest-scoring entities (mixed nodes and +edges); the loop continues until `limit` entities have been added.
query String! + +Natural-language search string; embedded by the server. + +
limit Int! + +Total number of entities to add across all passes. + +
window VectorisedGraphWindow + +Optional `{start, end}` to restrict matches to entities active in that interval. + +
expandNodesBySimilarity VectorSelection! -Add the adjacent nodes with higher score for query to the selection up to a specified limit. This function loops like expand_entities_by_similarity but is restricted to nodes. +Like `expandEntitiesBySimilarity` but restricted to nodes — iteratively +add the highest-scoring adjacent nodes to the selection.
query String! + +Natural-language search string; embedded by the server. + +
limit Int! + +Total number of nodes to add across all passes. + +
window VectorisedGraphWindow + +Optional `{start, end}` to restrict matches to nodes active in that interval. + +
expandEdgesBySimilarity VectorSelection! -Add the adjacent edges with higher score for query to the selection up to a specified limit. This function loops like expand_entities_by_similarity but is restricted to edges. +Like `expandEntitiesBySimilarity` but restricted to edges — iteratively +add the highest-scoring adjacent edges to the selection.
query String! + +Natural-language search string; embedded by the server. + +
limit Int! + +Total number of edges to add across all passes. + +
window VectorisedGraphWindow + +Optional `{start, end}` to restrict matches to edges active in that interval. + +
### VectorisedGraph +A graph with embedded vector representations for its nodes and edges. +Exposes similarity search over documents, nodes, and edges, plus +selection building (`emptySelection`) and index maintenance +(`optimizeIndex`). + @@ -6160,7 +8114,9 @@ Add the adjacent edges with higher score for query to the selection up to a spec @@ -6178,72 +8134,114 @@ Returns an empty selection of documents. - + - + - + - + - + - + - + - + - +
Boolean! -Optmize the vector index +Rebuild (or incrementally update) the on-disk vector indexes for nodes +and edges so subsequent similarity searches hit the fresh embeddings. +Safe to call repeatedly; returns true on success.
VectorSelection! -Search the top scoring entities according to a specified query returning no more than a specified limit of entities. +Find the highest-scoring nodes *and* edges (mixed) by similarity to a +natural-language query. The query is embedded server-side and matched +against indexed entity vectors.
query String! + +Natural-language search string; embedded by the server. + +
limit Int! + +Maximum number of results to return. + +
window VectorisedGraphWindow + +Optional `{start, end}` to restrict matches to entities active in that interval. + +
nodesBySimilarity VectorSelection! -Search the top scoring nodes according to a specified query returning no more than a specified limit of nodes. +Find the highest-scoring nodes by similarity to a natural-language +query. The query is embedded server-side and matched against indexed +node vectors.
query String! + +Natural-language search string; embedded by the server. + +
limit Int! + +Maximum number of nodes to return. + +
window VectorisedGraphWindow + +Optional `{start, end}` to restrict matches to nodes active in that interval. + +
edgesBySimilarity VectorSelection! -Search the top scoring edges according to a specified query returning no more than a specified limit of edges. +Find the highest-scoring edges by similarity to a natural-language +query. The query is embedded server-side and matched against indexed +edge vectors.
query String! + +Natural-language search string; embedded by the server. + +
limit Int! + +Maximum number of edges to return. + +
window VectorisedGraphWindow + +Optional `{start, end}` to restrict matches to edges active in that interval. + +
@@ -6263,19 +8261,19 @@ Search the top scoring edges according to a specified query returning no more th src -String! +NodeId! -Source node. +Source node id (string or non-negative integer). dst -String! +NodeId! -Destination node. +Destination node id (string or non-negative integer). @@ -7356,7 +9354,7 @@ Single excluded layer. subgraph -[String!] +[NodeId!] Subgraph nodes. @@ -7374,7 +9372,7 @@ Subgraph node types. excludeNodes -[String!] +[NodeId!] List of excluded nodes. @@ -7599,19 +9597,19 @@ Edge properties. src -String! +NodeId! -Source node. +Source node id (string or non-negative integer). dst -String! +NodeId! -Destination node. +Destination node id (string or non-negative integer). @@ -7631,10 +9629,10 @@ Destination node. name -String! +NodeId! -Name. +Node id (string or non-negative integer). @@ -7663,6 +9661,15 @@ Metadata. Updates. + + + +layer +String + + +Layer. + @@ -9185,10 +11192,11 @@ A custom template. time -Int! +TimeInput! -Time. +Time of the update — accepts the same forms as `TimeInput` (epoch +millis Int, RFC3339 string, or `{timestamp, eventId}` object). @@ -9321,6 +11329,33 @@ List. Object. + + + +dtime +String + + +Timezone-aware datetime. + + + + +ndtime +String + + +Naive datetime (no timezone). + + + + +decimal +String + + +BigDecimal number (string representation, e.g. "3.14159" or "123e-5"). + @@ -9339,19 +11374,19 @@ Object. start -Int! +TimeInput! -Start time. +Inclusive lower bound of the search window. end -Int! +TimeInput! -End time. +Exclusive upper bound of the search window. @@ -9629,6 +11664,12 @@ The `Float` scalar type represents signed double-precision fractional values as The `Int` scalar type represents non-fractional signed whole numeric values. Int can represent values between -(2^31) and 2^31 - 1. +### NodeId + +Identifier for a node — either a string (`"alice"`) or a non-negative +integer (`42`). Use whichever form matches how the graph was indexed +when nodes were added. + ### PropertyOutput ### String @@ -9642,6 +11683,11 @@ where the timestamp is either an Int or a DateTime formatted String, and eventId Valid string formats are RFC3339, RFC2822, %Y-%m-%d, %Y-%m-%dT%H:%M:%S%.3f, %Y-%m-%dT%H:%M:%S%, %Y-%m-%d %H:%M:%S%.3f and %Y-%m-%d %H:%M:%S%. +Internally wraps `InputTime` so write paths (`addNode`, `addEdge`, +`addProperties`, etc.) can preserve auto-increment of `event_id` when only +a timestamp is given. Pass the object form `{timestamp, eventId}` to lock +the event_id explicitly. + ### Upload A multipart file upload @@ -9692,11 +11738,25 @@ Raphtory graph edge. Namespace - + + +A directory-like container for graphs and nested namespaces. Graphs are +addressed by path (e.g. `"team/project/graph"`), and every segment except +the last is a namespace. Use to browse what's stored on the server without +loading any graph data. + + MetaGraph - + + +Lightweight summary of a stored graph — its name, path, counts, and +filesystem timestamps — served without deserializing the full graph. +Useful for listing what's available on the server before committing to a +full load. + + diff --git a/docs/src/rust/home/example.rs b/docs/src/rust/home/example.rs deleted file mode 100644 index 0b3603df73..0000000000 --- a/docs/src/rust/home/example.rs +++ /dev/null @@ -1,8 +0,0 @@ -fn main() -> Result<(), Box>{ - - // --8<-- [start:example] - println!("RUST stub") - // --8<-- [end:example] - - Ok(()) -} diff --git a/examples/custom-gql-apis/Cargo.toml b/examples/custom-gql-apis/Cargo.toml index 4074e14ab0..fa19368e0d 100644 --- a/examples/custom-gql-apis/Cargo.toml +++ b/examples/custom-gql-apis/Cargo.toml @@ -4,7 +4,7 @@ description = "Python package for raphtory, a temporal graph library" version = "0.9.3" keywords = ["graph", "temporal-graph", "temporal", "jira"] authors = ["Pometry"] -rust-version = "1.75.0" +rust-version = "1.89.0" edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html[lib] diff --git a/examples/custom-gql-apis/src/mutation.rs b/examples/custom-gql-apis/src/mutation.rs index e48e421a7f..a836f400b9 100644 --- a/examples/custom-gql-apis/src/mutation.rs +++ b/examples/custom-gql-apis/src/mutation.rs @@ -20,7 +20,7 @@ impl<'a> Operation<'a, MutationPlugin> for HelloMutation { fn apply<'b>( _entry_point: &MutationPlugin, - ctx: ResolverContext, + ctx: ResolverContext<'b>, ) -> BoxFuture<'b, FieldResult>>> { let name = ctx .args diff --git a/examples/custom-gql-apis/src/query.rs b/examples/custom-gql-apis/src/query.rs index 453d127f61..7c5e226346 100644 --- a/examples/custom-gql-apis/src/query.rs +++ b/examples/custom-gql-apis/src/query.rs @@ -20,7 +20,7 @@ impl<'a> Operation<'a, QueryPlugin> for HelloQuery { fn apply<'b>( _entry_point: &QueryPlugin, - ctx: ResolverContext, + ctx: ResolverContext<'b>, ) -> BoxFuture<'b, FieldResult>>> { let name = ctx .args diff --git a/examples/netflow/src/netflow_one_path_node.rs b/examples/netflow/src/netflow_one_path_node.rs index c6567436c6..95c3393e95 100644 --- a/examples/netflow/src/netflow_one_path_node.rs +++ b/examples/netflow/src/netflow_one_path_node.rs @@ -122,7 +122,7 @@ pub fn netflow_one_path_node( vec![], vec![Job::new(step1)], None, - |egs, _, _, _| egs.finalize(&total_value), + |egs, _, _, _, _| egs.finalize(&total_value), threads, 1, None, diff --git a/examples/python/enron/nx.html b/examples/python/enron/nx.html index d4d35188ac..8ef2dbb6cb 100644 --- a/examples/python/enron/nx.html +++ b/examples/python/enron/nx.html @@ -1,155 +1,272 @@ - - - - - - - -

-

-
+ - + -
-

+

- -
- -
- - - - \ No newline at end of file + diff --git a/examples/rust/Cargo.toml b/examples/rust/Cargo.toml index f4a7622bf7..09d8c27d8b 100644 --- a/examples/rust/Cargo.toml +++ b/examples/rust/Cargo.toml @@ -7,7 +7,7 @@ keywords = ["graph", "temporal-graph", "temporal", "examples"] # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -raphtory = { workspace = true, features = ["io", "proto"] } +raphtory = { workspace = true, features = ["io"] } chrono = { workspace = true } regex = { workspace = true } serde = { workspace = true } diff --git a/examples/rust/src/bin/bench/main.rs b/examples/rust/src/bin/bench/main.rs index ad63c92313..610098076f 100644 --- a/examples/rust/src/bin/bench/main.rs +++ b/examples/rust/src/bin/bench/main.rs @@ -58,10 +58,10 @@ fn main() { CsvLoader::new(data_dir) .set_delimiter("\t") .load_into_graph(&g, |lotr: Benchr, g: &Graph| { - g.add_node(1, lotr.src_id.clone(), NO_PROPS, None) + g.add_node(1, lotr.src_id.clone(), NO_PROPS, None, None) .expect("Failed to add node"); - g.add_node(1, lotr.dst_id.clone(), NO_PROPS, None) + g.add_node(1, lotr.dst_id.clone(), NO_PROPS, None, None) .expect("Failed to add node"); g.add_edge(1, lotr.src_id.clone(), lotr.dst_id.clone(), NO_PROPS, None) diff --git a/examples/rust/src/bin/lotr/main.rs b/examples/rust/src/bin/lotr/main.rs index 763c828e2b..ba17548c18 100644 --- a/examples/rust/src/bin/lotr/main.rs +++ b/examples/rust/src/bin/lotr/main.rs @@ -61,6 +61,7 @@ fn main() { lotr.src_id.clone(), [("type", Prop::str("Character"))], None, + None, ) .expect("Failed to add node"); @@ -69,6 +70,7 @@ fn main() { lotr.dst_id.clone(), [("type", Prop::str("Character"))], None, + None, ) .expect("Failed to add node"); diff --git a/fast_merge/src/lib.rs b/fast_merge/src/lib.rs new file mode 100644 index 0000000000..e69de29bb2 diff --git a/graphql-bench/.gitignore b/graphql-bench/.gitignore index 074e8e7140..93b8db964b 100644 --- a/graphql-bench/.gitignore +++ b/graphql-bench/.gitignore @@ -11,11 +11,8 @@ reports terraform.tfstate terraform.tfstate.backup .virtual_documents -data/apache node_modules dist output.csv.gz -output.json +data -!data/apache/master/graph.tar.xz -!data/apache/master/.raph diff --git a/graphql-bench/Makefile b/graphql-bench/Makefile index ba93a7abfc..86ae38e930 100644 --- a/graphql-bench/Makefile +++ b/graphql-bench/Makefile @@ -4,15 +4,12 @@ CURRENT_TIME := $(shell date +"%Y-%m-%dT%H-%M-%S") K6_IP=$(shell terraform output k6_ip | jq -r '.') RAPHTORY_IP=$(shell terraform output raphtory_ip | jq -r '.') -data/apache/master/graph: - @echo "Unzipping apache master graph" - @cd data/apache/master && tar -Jxf graph.tar.xz -C . build: pnpm install --frozen-lockfile pnpm build -bench-local: data/apache/master/graph build +bench-local: build pnpm concurrently --raw --kill-others --names 'raphtory,bench' 'python server.py' 'sleep 10 && k6 run --out csv=output.csv.gz dist/bench.js' || : python process-k6-output.py diff --git a/graphql-bench/data/apache/master/.raph b/graphql-bench/data/apache/master/.raph index 45297e31c6..e157c0d4f4 100644 --- a/graphql-bench/data/apache/master/.raph +++ b/graphql-bench/data/apache/master/.raph @@ -1 +1 @@ -{"node_count":73369,"edge_count":54654,"metadata":[["hidden",{"Bool":true}]]} \ No newline at end of file +{"path":"data0"} \ No newline at end of file diff --git a/graphql-bench/data/apache/master/data0/.meta b/graphql-bench/data/apache/master/data0/.meta new file mode 100644 index 0000000000..61cf45fc9b --- /dev/null +++ b/graphql-bench/data/apache/master/data0/.meta @@ -0,0 +1 @@ +{"path":"graph0","meta":{"node_count":52151,"edge_count":44045,"graph_type":"EventGraph","is_diskgraph":false}} diff --git a/graphql-bench/data/apache/master/data0/graph0/edges_c/00000000.parquet b/graphql-bench/data/apache/master/data0/graph0/edges_c/00000000.parquet new file mode 100644 index 0000000000..2d382e300a Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/edges_c/00000000.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/edges_c/00000001.parquet b/graphql-bench/data/apache/master/data0/graph0/edges_c/00000001.parquet new file mode 100644 index 0000000000..a2babe03cf Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/edges_c/00000001.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/edges_c/00000002.parquet b/graphql-bench/data/apache/master/data0/graph0/edges_c/00000002.parquet new file mode 100644 index 0000000000..a2babe03cf Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/edges_c/00000002.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/edges_c/00000003.parquet b/graphql-bench/data/apache/master/data0/graph0/edges_c/00000003.parquet new file mode 100644 index 0000000000..a2babe03cf Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/edges_c/00000003.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/edges_c/00000004.parquet b/graphql-bench/data/apache/master/data0/graph0/edges_c/00000004.parquet new file mode 100644 index 0000000000..a2babe03cf Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/edges_c/00000004.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/edges_c/00000005.parquet b/graphql-bench/data/apache/master/data0/graph0/edges_c/00000005.parquet new file mode 100644 index 0000000000..a2babe03cf Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/edges_c/00000005.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/edges_c/00000006.parquet b/graphql-bench/data/apache/master/data0/graph0/edges_c/00000006.parquet new file mode 100644 index 0000000000..a2babe03cf Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/edges_c/00000006.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/edges_c/00000007.parquet b/graphql-bench/data/apache/master/data0/graph0/edges_c/00000007.parquet new file mode 100644 index 0000000000..a2babe03cf Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/edges_c/00000007.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/edges_c/00000008.parquet b/graphql-bench/data/apache/master/data0/graph0/edges_c/00000008.parquet new file mode 100644 index 0000000000..a2babe03cf Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/edges_c/00000008.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/edges_c/00000009.parquet b/graphql-bench/data/apache/master/data0/graph0/edges_c/00000009.parquet new file mode 100644 index 0000000000..a2babe03cf Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/edges_c/00000009.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/edges_c/00000010.parquet b/graphql-bench/data/apache/master/data0/graph0/edges_c/00000010.parquet new file mode 100644 index 0000000000..a2babe03cf Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/edges_c/00000010.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/edges_c/00000011.parquet b/graphql-bench/data/apache/master/data0/graph0/edges_c/00000011.parquet new file mode 100644 index 0000000000..a2babe03cf Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/edges_c/00000011.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/edges_c/00000012.parquet b/graphql-bench/data/apache/master/data0/graph0/edges_c/00000012.parquet new file mode 100644 index 0000000000..a2babe03cf Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/edges_c/00000012.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/edges_c/00000013.parquet b/graphql-bench/data/apache/master/data0/graph0/edges_c/00000013.parquet new file mode 100644 index 0000000000..a2babe03cf Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/edges_c/00000013.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/edges_c/00000014.parquet b/graphql-bench/data/apache/master/data0/graph0/edges_c/00000014.parquet new file mode 100644 index 0000000000..a2babe03cf Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/edges_c/00000014.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/edges_c/00000015.parquet b/graphql-bench/data/apache/master/data0/graph0/edges_c/00000015.parquet new file mode 100644 index 0000000000..a2babe03cf Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/edges_c/00000015.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/edges_c/00000016.parquet b/graphql-bench/data/apache/master/data0/graph0/edges_c/00000016.parquet new file mode 100644 index 0000000000..a2babe03cf Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/edges_c/00000016.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/edges_c/00000017.parquet b/graphql-bench/data/apache/master/data0/graph0/edges_c/00000017.parquet new file mode 100644 index 0000000000..a2babe03cf Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/edges_c/00000017.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/edges_c/00000018.parquet b/graphql-bench/data/apache/master/data0/graph0/edges_c/00000018.parquet new file mode 100644 index 0000000000..a2babe03cf Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/edges_c/00000018.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/edges_c/00000019.parquet b/graphql-bench/data/apache/master/data0/graph0/edges_c/00000019.parquet new file mode 100644 index 0000000000..a2babe03cf Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/edges_c/00000019.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/edges_c/00000020.parquet b/graphql-bench/data/apache/master/data0/graph0/edges_c/00000020.parquet new file mode 100644 index 0000000000..a2babe03cf Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/edges_c/00000020.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/edges_c/00000021.parquet b/graphql-bench/data/apache/master/data0/graph0/edges_c/00000021.parquet new file mode 100644 index 0000000000..a2babe03cf Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/edges_c/00000021.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/edges_c/00000022.parquet b/graphql-bench/data/apache/master/data0/graph0/edges_c/00000022.parquet new file mode 100644 index 0000000000..a2babe03cf Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/edges_c/00000022.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/edges_c/00000023.parquet b/graphql-bench/data/apache/master/data0/graph0/edges_c/00000023.parquet new file mode 100644 index 0000000000..a2babe03cf Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/edges_c/00000023.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/edges_d/00000000.parquet b/graphql-bench/data/apache/master/data0/graph0/edges_d/00000000.parquet new file mode 100644 index 0000000000..7409dad9ce Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/edges_d/00000000.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/edges_d/00000001.parquet b/graphql-bench/data/apache/master/data0/graph0/edges_d/00000001.parquet new file mode 100644 index 0000000000..7409dad9ce Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/edges_d/00000001.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/edges_d/00000002.parquet b/graphql-bench/data/apache/master/data0/graph0/edges_d/00000002.parquet new file mode 100644 index 0000000000..7409dad9ce Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/edges_d/00000002.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/edges_d/00000003.parquet b/graphql-bench/data/apache/master/data0/graph0/edges_d/00000003.parquet new file mode 100644 index 0000000000..7409dad9ce Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/edges_d/00000003.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/edges_d/00000004.parquet b/graphql-bench/data/apache/master/data0/graph0/edges_d/00000004.parquet new file mode 100644 index 0000000000..7409dad9ce Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/edges_d/00000004.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/edges_d/00000005.parquet b/graphql-bench/data/apache/master/data0/graph0/edges_d/00000005.parquet new file mode 100644 index 0000000000..7409dad9ce Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/edges_d/00000005.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/edges_d/00000006.parquet b/graphql-bench/data/apache/master/data0/graph0/edges_d/00000006.parquet new file mode 100644 index 0000000000..7409dad9ce Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/edges_d/00000006.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/edges_d/00000007.parquet b/graphql-bench/data/apache/master/data0/graph0/edges_d/00000007.parquet new file mode 100644 index 0000000000..7409dad9ce Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/edges_d/00000007.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/edges_d/00000008.parquet b/graphql-bench/data/apache/master/data0/graph0/edges_d/00000008.parquet new file mode 100644 index 0000000000..7409dad9ce Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/edges_d/00000008.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/edges_d/00000009.parquet b/graphql-bench/data/apache/master/data0/graph0/edges_d/00000009.parquet new file mode 100644 index 0000000000..7409dad9ce Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/edges_d/00000009.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/edges_d/00000010.parquet b/graphql-bench/data/apache/master/data0/graph0/edges_d/00000010.parquet new file mode 100644 index 0000000000..7409dad9ce Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/edges_d/00000010.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/edges_d/00000011.parquet b/graphql-bench/data/apache/master/data0/graph0/edges_d/00000011.parquet new file mode 100644 index 0000000000..7409dad9ce Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/edges_d/00000011.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/edges_d/00000012.parquet b/graphql-bench/data/apache/master/data0/graph0/edges_d/00000012.parquet new file mode 100644 index 0000000000..7409dad9ce Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/edges_d/00000012.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/edges_d/00000013.parquet b/graphql-bench/data/apache/master/data0/graph0/edges_d/00000013.parquet new file mode 100644 index 0000000000..7409dad9ce Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/edges_d/00000013.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/edges_d/00000014.parquet b/graphql-bench/data/apache/master/data0/graph0/edges_d/00000014.parquet new file mode 100644 index 0000000000..7409dad9ce Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/edges_d/00000014.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/edges_d/00000015.parquet b/graphql-bench/data/apache/master/data0/graph0/edges_d/00000015.parquet new file mode 100644 index 0000000000..7409dad9ce Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/edges_d/00000015.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/edges_d/00000016.parquet b/graphql-bench/data/apache/master/data0/graph0/edges_d/00000016.parquet new file mode 100644 index 0000000000..7409dad9ce Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/edges_d/00000016.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/edges_d/00000017.parquet b/graphql-bench/data/apache/master/data0/graph0/edges_d/00000017.parquet new file mode 100644 index 0000000000..7409dad9ce Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/edges_d/00000017.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/edges_d/00000018.parquet b/graphql-bench/data/apache/master/data0/graph0/edges_d/00000018.parquet new file mode 100644 index 0000000000..7409dad9ce Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/edges_d/00000018.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/edges_d/00000019.parquet b/graphql-bench/data/apache/master/data0/graph0/edges_d/00000019.parquet new file mode 100644 index 0000000000..7409dad9ce Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/edges_d/00000019.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/edges_d/00000020.parquet b/graphql-bench/data/apache/master/data0/graph0/edges_d/00000020.parquet new file mode 100644 index 0000000000..7409dad9ce Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/edges_d/00000020.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/edges_d/00000021.parquet b/graphql-bench/data/apache/master/data0/graph0/edges_d/00000021.parquet new file mode 100644 index 0000000000..7409dad9ce Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/edges_d/00000021.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/edges_d/00000022.parquet b/graphql-bench/data/apache/master/data0/graph0/edges_d/00000022.parquet new file mode 100644 index 0000000000..7409dad9ce Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/edges_d/00000022.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/edges_d/00000023.parquet b/graphql-bench/data/apache/master/data0/graph0/edges_d/00000023.parquet new file mode 100644 index 0000000000..7409dad9ce Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/edges_d/00000023.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/edges_t/00000000.parquet b/graphql-bench/data/apache/master/data0/graph0/edges_t/00000000.parquet new file mode 100644 index 0000000000..92711bdaa2 Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/edges_t/00000000.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/edges_t/00000001.parquet b/graphql-bench/data/apache/master/data0/graph0/edges_t/00000001.parquet new file mode 100644 index 0000000000..7409dad9ce Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/edges_t/00000001.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/edges_t/00000002.parquet b/graphql-bench/data/apache/master/data0/graph0/edges_t/00000002.parquet new file mode 100644 index 0000000000..7409dad9ce Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/edges_t/00000002.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/edges_t/00000003.parquet b/graphql-bench/data/apache/master/data0/graph0/edges_t/00000003.parquet new file mode 100644 index 0000000000..7409dad9ce Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/edges_t/00000003.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/edges_t/00000004.parquet b/graphql-bench/data/apache/master/data0/graph0/edges_t/00000004.parquet new file mode 100644 index 0000000000..7409dad9ce Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/edges_t/00000004.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/edges_t/00000005.parquet b/graphql-bench/data/apache/master/data0/graph0/edges_t/00000005.parquet new file mode 100644 index 0000000000..7409dad9ce Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/edges_t/00000005.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/edges_t/00000006.parquet b/graphql-bench/data/apache/master/data0/graph0/edges_t/00000006.parquet new file mode 100644 index 0000000000..7409dad9ce Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/edges_t/00000006.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/edges_t/00000007.parquet b/graphql-bench/data/apache/master/data0/graph0/edges_t/00000007.parquet new file mode 100644 index 0000000000..7409dad9ce Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/edges_t/00000007.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/edges_t/00000008.parquet b/graphql-bench/data/apache/master/data0/graph0/edges_t/00000008.parquet new file mode 100644 index 0000000000..7409dad9ce Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/edges_t/00000008.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/edges_t/00000009.parquet b/graphql-bench/data/apache/master/data0/graph0/edges_t/00000009.parquet new file mode 100644 index 0000000000..7409dad9ce Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/edges_t/00000009.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/edges_t/00000010.parquet b/graphql-bench/data/apache/master/data0/graph0/edges_t/00000010.parquet new file mode 100644 index 0000000000..7409dad9ce Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/edges_t/00000010.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/edges_t/00000011.parquet b/graphql-bench/data/apache/master/data0/graph0/edges_t/00000011.parquet new file mode 100644 index 0000000000..7409dad9ce Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/edges_t/00000011.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/edges_t/00000012.parquet b/graphql-bench/data/apache/master/data0/graph0/edges_t/00000012.parquet new file mode 100644 index 0000000000..7409dad9ce Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/edges_t/00000012.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/edges_t/00000013.parquet b/graphql-bench/data/apache/master/data0/graph0/edges_t/00000013.parquet new file mode 100644 index 0000000000..7409dad9ce Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/edges_t/00000013.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/edges_t/00000014.parquet b/graphql-bench/data/apache/master/data0/graph0/edges_t/00000014.parquet new file mode 100644 index 0000000000..7409dad9ce Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/edges_t/00000014.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/edges_t/00000015.parquet b/graphql-bench/data/apache/master/data0/graph0/edges_t/00000015.parquet new file mode 100644 index 0000000000..7409dad9ce Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/edges_t/00000015.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/edges_t/00000016.parquet b/graphql-bench/data/apache/master/data0/graph0/edges_t/00000016.parquet new file mode 100644 index 0000000000..7409dad9ce Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/edges_t/00000016.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/edges_t/00000017.parquet b/graphql-bench/data/apache/master/data0/graph0/edges_t/00000017.parquet new file mode 100644 index 0000000000..7409dad9ce Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/edges_t/00000017.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/edges_t/00000018.parquet b/graphql-bench/data/apache/master/data0/graph0/edges_t/00000018.parquet new file mode 100644 index 0000000000..7409dad9ce Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/edges_t/00000018.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/edges_t/00000019.parquet b/graphql-bench/data/apache/master/data0/graph0/edges_t/00000019.parquet new file mode 100644 index 0000000000..7409dad9ce Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/edges_t/00000019.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/edges_t/00000020.parquet b/graphql-bench/data/apache/master/data0/graph0/edges_t/00000020.parquet new file mode 100644 index 0000000000..7409dad9ce Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/edges_t/00000020.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/edges_t/00000021.parquet b/graphql-bench/data/apache/master/data0/graph0/edges_t/00000021.parquet new file mode 100644 index 0000000000..7409dad9ce Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/edges_t/00000021.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/edges_t/00000022.parquet b/graphql-bench/data/apache/master/data0/graph0/edges_t/00000022.parquet new file mode 100644 index 0000000000..7409dad9ce Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/edges_t/00000022.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/edges_t/00000023.parquet b/graphql-bench/data/apache/master/data0/graph0/edges_t/00000023.parquet new file mode 100644 index 0000000000..7409dad9ce Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/edges_t/00000023.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/graph_c/0.parquet b/graphql-bench/data/apache/master/data0/graph0/graph_c/0.parquet new file mode 100644 index 0000000000..f26e4b8504 Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/graph_c/0.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/graph_t/0.parquet b/graphql-bench/data/apache/master/data0/graph0/graph_t/0.parquet new file mode 100644 index 0000000000..29ecebc21b Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/graph_t/0.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000000.parquet b/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000000.parquet new file mode 100644 index 0000000000..ef2d83af38 Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000000.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000001.parquet b/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000001.parquet new file mode 100644 index 0000000000..12820f0bbb Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000001.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000002.parquet b/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000002.parquet new file mode 100644 index 0000000000..12820f0bbb Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000002.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000003.parquet b/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000003.parquet new file mode 100644 index 0000000000..12820f0bbb Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000003.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000004.parquet b/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000004.parquet new file mode 100644 index 0000000000..12820f0bbb Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000004.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000005.parquet b/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000005.parquet new file mode 100644 index 0000000000..12820f0bbb Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000005.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000006.parquet b/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000006.parquet new file mode 100644 index 0000000000..12820f0bbb Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000006.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000007.parquet b/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000007.parquet new file mode 100644 index 0000000000..12820f0bbb Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000007.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000008.parquet b/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000008.parquet new file mode 100644 index 0000000000..12820f0bbb Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000008.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000009.parquet b/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000009.parquet new file mode 100644 index 0000000000..12820f0bbb Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000009.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000010.parquet b/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000010.parquet new file mode 100644 index 0000000000..12820f0bbb Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000010.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000011.parquet b/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000011.parquet new file mode 100644 index 0000000000..12820f0bbb Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000011.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000012.parquet b/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000012.parquet new file mode 100644 index 0000000000..12820f0bbb Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000012.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000013.parquet b/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000013.parquet new file mode 100644 index 0000000000..12820f0bbb Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000013.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000014.parquet b/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000014.parquet new file mode 100644 index 0000000000..12820f0bbb Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000014.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000015.parquet b/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000015.parquet new file mode 100644 index 0000000000..12820f0bbb Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000015.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000016.parquet b/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000016.parquet new file mode 100644 index 0000000000..12820f0bbb Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000016.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000017.parquet b/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000017.parquet new file mode 100644 index 0000000000..12820f0bbb Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000017.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000018.parquet b/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000018.parquet new file mode 100644 index 0000000000..12820f0bbb Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000018.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000019.parquet b/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000019.parquet new file mode 100644 index 0000000000..12820f0bbb Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000019.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000020.parquet b/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000020.parquet new file mode 100644 index 0000000000..12820f0bbb Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000020.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000021.parquet b/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000021.parquet new file mode 100644 index 0000000000..12820f0bbb Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000021.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000022.parquet b/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000022.parquet new file mode 100644 index 0000000000..12820f0bbb Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000022.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000023.parquet b/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000023.parquet new file mode 100644 index 0000000000..12820f0bbb Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000023.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000024.parquet b/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000024.parquet new file mode 100644 index 0000000000..12820f0bbb Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000024.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000025.parquet b/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000025.parquet new file mode 100644 index 0000000000..12820f0bbb Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000025.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000026.parquet b/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000026.parquet new file mode 100644 index 0000000000..12820f0bbb Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000026.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000027.parquet b/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000027.parquet new file mode 100644 index 0000000000..12820f0bbb Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000027.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000028.parquet b/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000028.parquet new file mode 100644 index 0000000000..12820f0bbb Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000028.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000029.parquet b/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000029.parquet new file mode 100644 index 0000000000..12820f0bbb Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000029.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000030.parquet b/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000030.parquet new file mode 100644 index 0000000000..12820f0bbb Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000030.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000031.parquet b/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000031.parquet new file mode 100644 index 0000000000..12820f0bbb Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/nodes_c/00000031.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000000.parquet b/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000000.parquet new file mode 100644 index 0000000000..898adf758d Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000000.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000001.parquet b/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000001.parquet new file mode 100644 index 0000000000..e6c478f367 Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000001.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000002.parquet b/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000002.parquet new file mode 100644 index 0000000000..e6c478f367 Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000002.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000003.parquet b/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000003.parquet new file mode 100644 index 0000000000..e6c478f367 Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000003.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000004.parquet b/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000004.parquet new file mode 100644 index 0000000000..e6c478f367 Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000004.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000005.parquet b/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000005.parquet new file mode 100644 index 0000000000..e6c478f367 Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000005.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000006.parquet b/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000006.parquet new file mode 100644 index 0000000000..e6c478f367 Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000006.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000007.parquet b/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000007.parquet new file mode 100644 index 0000000000..e6c478f367 Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000007.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000008.parquet b/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000008.parquet new file mode 100644 index 0000000000..e6c478f367 Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000008.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000009.parquet b/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000009.parquet new file mode 100644 index 0000000000..e6c478f367 Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000009.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000010.parquet b/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000010.parquet new file mode 100644 index 0000000000..e6c478f367 Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000010.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000011.parquet b/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000011.parquet new file mode 100644 index 0000000000..e6c478f367 Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000011.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000012.parquet b/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000012.parquet new file mode 100644 index 0000000000..e6c478f367 Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000012.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000013.parquet b/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000013.parquet new file mode 100644 index 0000000000..e6c478f367 Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000013.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000014.parquet b/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000014.parquet new file mode 100644 index 0000000000..e6c478f367 Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000014.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000015.parquet b/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000015.parquet new file mode 100644 index 0000000000..e6c478f367 Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000015.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000016.parquet b/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000016.parquet new file mode 100644 index 0000000000..e6c478f367 Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000016.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000017.parquet b/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000017.parquet new file mode 100644 index 0000000000..e6c478f367 Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000017.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000018.parquet b/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000018.parquet new file mode 100644 index 0000000000..e6c478f367 Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000018.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000019.parquet b/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000019.parquet new file mode 100644 index 0000000000..e6c478f367 Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000019.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000020.parquet b/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000020.parquet new file mode 100644 index 0000000000..e6c478f367 Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000020.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000021.parquet b/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000021.parquet new file mode 100644 index 0000000000..e6c478f367 Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000021.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000022.parquet b/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000022.parquet new file mode 100644 index 0000000000..e6c478f367 Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000022.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000023.parquet b/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000023.parquet new file mode 100644 index 0000000000..e6c478f367 Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000023.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000024.parquet b/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000024.parquet new file mode 100644 index 0000000000..e6c478f367 Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000024.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000025.parquet b/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000025.parquet new file mode 100644 index 0000000000..e6c478f367 Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000025.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000026.parquet b/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000026.parquet new file mode 100644 index 0000000000..e6c478f367 Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000026.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000027.parquet b/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000027.parquet new file mode 100644 index 0000000000..e6c478f367 Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000027.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000028.parquet b/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000028.parquet new file mode 100644 index 0000000000..e6c478f367 Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000028.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000029.parquet b/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000029.parquet new file mode 100644 index 0000000000..e6c478f367 Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000029.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000030.parquet b/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000030.parquet new file mode 100644 index 0000000000..e6c478f367 Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000030.parquet differ diff --git a/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000031.parquet b/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000031.parquet new file mode 100644 index 0000000000..e6c478f367 Binary files /dev/null and b/graphql-bench/data/apache/master/data0/graph0/nodes_t/00000031.parquet differ diff --git a/graphql-bench/data/apache/master/graph.tar.xz b/graphql-bench/data/apache/master/graph.tar.xz deleted file mode 100644 index 8cd640967c..0000000000 Binary files a/graphql-bench/data/apache/master/graph.tar.xz and /dev/null differ diff --git a/graphql-bench/package.json b/graphql-bench/package.json index 27de176967..f1b4ebbb85 100644 --- a/graphql-bench/package.json +++ b/graphql-bench/package.json @@ -1,35 +1,35 @@ { - "name": "bench", - "private": true, - "version": "0.0.0", - "type": "module", - "module": "./dist/index.js", - "files": [ - "dist" - ], - "scripts": { - "prebuild": "genql --schema ../raphtory-graphql/schema.graphql --output src/__generated", - "build": "tsc --noEmit && tsup", - "bench": "pnpm build && K6_WEB_DASHBOARD=true K6_WEB_DASHBOARD_EXPORT=report.html k6 run --out csv=output.csv.gz dist/bench.js" - }, - "devDependencies": { - "@genql/cli": "^6.3.3", - "@types/k6": "^1.6.0", - "@types/node": "^25.3.5", - "concurrently": "^9.2.1", - "tsup": "8.5.1", - "typescript": "^5.9.3" - }, - "dependencies": { - "@google-cloud/compute": "^6.8.0", - "dotenv": "^17.3.1", - "node-ssh": "^13.2.1", - "zod": "^4.3.6" - }, - "pnpm": { - "overrides": { - "@tootallnate/once@<3.0.1": ">=3.0.1", - "undici@<6.23.0": ">=6.23.0" - } + "name": "bench", + "private": true, + "version": "0.0.0", + "type": "module", + "module": "./dist/index.js", + "files": [ + "dist" + ], + "scripts": { + "prebuild": "genql --schema ../raphtory-graphql/schema.graphql --output src/__generated", + "build": "tsc --noEmit && tsup", + "bench": "pnpm build && K6_WEB_DASHBOARD=true K6_WEB_DASHBOARD_EXPORT=report.html k6 run --out csv=output.csv.gz dist/bench.js" + }, + "devDependencies": { + "@genql/cli": "^6.3.3", + "@types/k6": "^1.6.0", + "@types/node": "^25.3.5", + "concurrently": "^9.2.1", + "tsup": "8.5.1", + "typescript": "^5.9.3" + }, + "dependencies": { + "@google-cloud/compute": "^6.8.0", + "dotenv": "^17.3.1", + "node-ssh": "^13.2.1", + "zod": "^4.3.6" + }, + "pnpm": { + "overrides": { + "@tootallnate/once@<3.0.1": ">=3.0.1", + "undici@<6.23.0": ">=6.23.0" } + } } diff --git a/graphql-bench/src/bench.ts b/graphql-bench/src/bench.ts index 5b370114b1..6ba8b222aa 100644 --- a/graphql-bench/src/bench.ts +++ b/graphql-bench/src/bench.ts @@ -1,252 +1,251 @@ -import { check, fail, sleep } from 'k6'; -import http from 'k6/http'; -import { Rate } from 'k6/metrics'; +import { check, fail, sleep } from "k6"; +import http from "k6/http"; +import { Rate } from "k6/metrics"; -import { fetchAndCheck, fetchAndParse, mutate } from './utils'; +import { fetchAndCheck, fetchAndParse, mutate } from "./utils"; const TIME_RANGE = 2000 * 365 * 24 * 60 * 60 * 1000; const randomTime = () => Math.floor(Math.random() * TIME_RANGE); -export const errorRate = new Rate('errors'); +export const errorRate = new Rate("errors"); const duration = 1; const stagesInMinutes: { duration: number; target: number }[] = [ - { duration, target: 100 }, - { duration, target: 400 }, - { duration, target: 1600 }, - { duration, target: 6400 }, + { duration, target: 100 }, + { duration, target: 400 }, + { duration, target: 1600 }, + { duration, target: 6400 }, ]; // +1 to leave enough time for the server to recover from prev scenario const minutesPerScenario = - stagesInMinutes.map(({ duration }) => duration).reduce((a, b) => a + b) + 1; + stagesInMinutes.map(({ duration }) => duration).reduce((a, b) => a + b) + 1; const execs = [ - addNode, - randomNodePage, - randomEdgePage, - nodePropsByName, - nodeNeighboursByName, - readAndWriteNodeProperties, + addNode, + randomNodePage, + randomEdgePage, + nodePropsByName, + nodeNeighboursByName, + readAndWriteNodeProperties, ]; const scenarios = execs.map( - (exec, index) => - [ - exec.name, - { - executor: 'ramping-arrival-rate', - exec: exec.name, - startRate: 0, - startTime: `${index * minutesPerScenario}m`, - timeUnit: '1s', - preAllocatedVUs: 5, - maxVUs: 1000, - stages: stagesInMinutes.map(({ duration, target }) => ({ - duration: `${duration}m`, - target, - })), - }, - ] as const, + (exec, index) => + [ + exec.name, + { + executor: "ramping-arrival-rate", + exec: exec.name, + startRate: 0, + startTime: `${index * minutesPerScenario}m`, + timeUnit: "1s", + preAllocatedVUs: 5, + maxVUs: 1000, + stages: stagesInMinutes.map(({ duration, target }) => ({ + duration: `${duration}m`, + target, + })), + }, + ] as const, ); export const options = { - scenarios: Object.fromEntries(scenarios), + scenarios: Object.fromEntries(scenarios), }; type SetupData = { - graphPaths: string[]; - countNodes: number; - countEdges: number; + graphPaths: string[]; + countNodes: number; + countEdges: number; }; export function setup(): SetupData { - const graphListResponse = fetchAndParse({ - namespaces: { list: { graphs: { list: { path: true } } } }, - }); - const graphPaths = graphListResponse.data.namespaces.list.flatMap( - (ns: any) => ns.graphs.list.map((graph: any) => graph.path), - ); - - mutate({ - newGraph: { - __args: { - path: 'empty', - graphType: 'EVENT', - }, - }, - }); - - // this is to trigger the load of the empty graph into memory - fetchAndCheck(errorRate, { - graph: { - __args: { - path: 'empty', - }, - countNodes: true, - }, - }); - - const graphResponse = fetchAndParse({ - graph: { - __args: { - path: 'master', - }, - countNodes: true, - countEdges: true, - }, - }); - - return { - graphPaths, - countNodes: graphResponse.data.graph.countNodes, - countEdges: graphResponse.data.graph.countEdges, - }; + const graphListResponse = fetchAndParse({ + namespaces: { list: { graphs: { list: { path: true } } } }, + }); + const graphPaths = graphListResponse.data.namespaces.list.flatMap((ns: any) => + ns.graphs.list.map((graph: any) => graph.path), + ); + + mutate({ + newGraph: { + __args: { + path: "empty", + graphType: "EVENT", + }, + }, + }); + + // this is to trigger the load of the empty graph into memory + fetchAndCheck(errorRate, { + graph: { + __args: { + path: "empty", + }, + countNodes: true, + }, + }); + + const graphResponse = fetchAndParse({ + graph: { + __args: { + path: "master", + }, + countNodes: true, + countEdges: true, + }, + }); + + return { + graphPaths, + countNodes: graphResponse.data.graph.countNodes, + countEdges: graphResponse.data.graph.countEdges, + }; } - - export function addNode() { - const name = Math.random().toString(); - const time = randomTime(); - fetchAndCheck(errorRate, { - updateGraph: { - __args: { - path: 'empty', - }, - addNode: { - __args: { - name, - time, - }, - success: true, - }, + const name = Math.random().toString(); + const time = randomTime(); + fetchAndCheck(errorRate, { + updateGraph: { + __args: { + path: "empty", + }, + addNode: { + __args: { + name, + time, }, - }); + success: true, + }, + }, + }); } export function randomNodePage(input: SetupData) { - const offset = Math.floor(Math.random() * (input.countNodes - 20)); - fetchAndCheck(errorRate, { - graph: { - __args: { path: 'master' }, - nodes: { - page: { - __args: { offset, limit: 20 }, - degree: true, - name: true, - }, - }, + const offset = Math.floor(Math.random() * (input.countNodes - 20)); + fetchAndCheck(errorRate, { + graph: { + __args: { path: "master" }, + nodes: { + page: { + __args: { offset, limit: 20 }, + degree: true, + name: true, }, - }); + }, + }, + }); } export function randomEdgePage(input: SetupData) { const offset = Math.floor(Math.random() * (input.countEdges - 20)); - fetchAndCheck(errorRate, { - graph: { - __args: { path: 'master' }, - edges: { - page: { - __args: { offset, limit: 20 }, - explodeLayers: { - count: true, - }, - history: { - list: { - timestamp: true, - }, - }, - src: { name: true }, - dst: { name: true }, - }, + fetchAndCheck(errorRate, { + graph: { + __args: { path: "master" }, + edges: { + page: { + __args: { offset, limit: 20 }, + explodeLayers: { + count: true, + }, + history: { + list: { + timestamp: true, }, + }, + src: { name: true }, + dst: { name: true }, }, - }); + }, + }, + }); } export function nodePropsByName() { - fetchAndCheck(errorRate, { - graph: { - __args: { path: 'master' }, - node: { - __args: { - name: 'SPARK-22386', - }, - metadata: { - values: { - key: true, - value: true, - }, - }, - }, + fetchAndCheck(errorRate, { + graph: { + __args: { path: "master" }, + node: { + __args: { + name: "SPARK-22386", }, - }); + metadata: { + values: { + key: true, + value: true, + }, + }, + }, + }, + }); } export function nodeNeighboursByName() { - fetchAndCheck(errorRate, { - graph: { - __args: { path: 'master' }, - node: { - __args: { - name: 'SPARK-22386', - }, - neighbours: { - list: { - name: true, - }, - }, - }, + fetchAndCheck(errorRate, { + graph: { + __args: { path: "master" }, + node: { + __args: { + name: "SPARK-22386", }, - }); + neighbours: { + list: { + name: true, + }, + }, + }, + }, + }); } - export function readAndWriteNodeProperties(input: SetupData) { - const random = Math.random(); - const time = randomTime(); - if (random < 0.3) { - fetchAndCheck(errorRate, { - updateGraph: { - __args: { - path: 'master', - }, - node: { + const random = Math.random(); + const time = randomTime(); + if (random < 0.3) { + fetchAndCheck(errorRate, { + updateGraph: { + __args: { + path: "master", + }, + node: { + __args: { + name: "SPARK-22386", + }, + addUpdates: { + __args: { + time, + properties: [ + { key: "temporal_bool", value: { bool: Math.random() > 0.5 } }, + ], + }, + }, + }, + }, + }); + } else { + fetchAndCheck(errorRate, { + graph: { + __args: { path: "master" }, + node: { + __args: { + name: "SPARK-22386", + }, + at: { + __args: { + time: { simpleTime: time }, + }, + properties: { + get: { __args: { - name: "SPARK-22386" + key: "temporal_bool", }, - addUpdates: { - __args: { - time, - properties: [{key: "temporal_bool", value: {bool: Math.random() > 0.5}}] - } - } - } - - }, - }); - } else { - fetchAndCheck(errorRate, { - graph: { - __args: { path: 'master' }, - node: { - __args: { - name: 'SPARK-22386', - }, - at: { - __args: { - time: { simpleTime: time }, - }, - properties: { - get: { - __args: { - key: "temporal_bool" - } - } - } - } + value: true, }, + }, }, - }); - } + }, + }, + }); + } } diff --git a/graphql-bench/src/utils.ts b/graphql-bench/src/utils.ts index 248ed3857b..3b98b20855 100644 --- a/graphql-bench/src/utils.ts +++ b/graphql-bench/src/utils.ts @@ -1,67 +1,77 @@ import http, { RefinedResponse } from "k6/http"; -import { generateMutationOp, generateQueryOp, MutRootGenqlSelection, QueryRootGenqlSelection } from "./__generated"; +import { + generateMutationOp, + generateQueryOp, + MutRootGenqlSelection, + QueryRootGenqlSelection, +} from "./__generated"; import { Rate } from "k6/metrics"; import { check, fail } from "k6"; -const URL = __ENV.RAPHTORY_URL ?? 'http://localhost:1736'; +const URL = __ENV.RAPHTORY_URL ?? "http://localhost:1736"; -function checkResponse(response: RefinedResponse, errorRate: Rate) { +function checkResponse( + response: RefinedResponse, + errorRate: Rate, +) { const result = check(response, { - 'response status is 200': (r) => r.status === 200, - 'response has data field defined': (r) => { - if (typeof r.body === 'string') { - const body = JSON.parse(r.body); - const result = 'data' in body && - body.data !== undefined && - body.data !== null; // FIXME: improve query checking, I wish I could just rely on genql + "response status is 200": (r) => r.status === 200, + "response has data field defined": (r) => { + if (typeof r.body === "string") { + const body = JSON.parse(r.body); + const result = + "data" in body && body.data !== undefined && body.data !== null; // FIXME: improve query checking, I wish I could just rely on genql - if (result === false) { - // console.log(">>> error:", JSON.stringify(body, null, 2)); - // console.log(">>> request:", JSON.stringify(response.request.body, null, 2)) - } + if (result === false) { + console.log(">>> error:", JSON.stringify(body, null, 2)); + console.log( + ">>> request:", + JSON.stringify(response.request.body, null, 2), + ); + } - return result; - } else { - return false; - } - }, + return result; + } else { + return false; + } + }, }); errorRate.add(!result); } const params = { - headers: { 'Content-Type': 'application/json', 'Accept-Encoding': 'gzip' }, + headers: { "Content-Type": "application/json", "Accept-Encoding": "gzip" }, }; function fetch(query: QueryRootGenqlSelection) { - const { query: compiledQuery, variables } = generateQueryOp(query); - const payload = JSON.stringify({ - query: compiledQuery, - variables: variables, - }); - return http.post(URL, payload, params); + const { query: compiledQuery, variables } = generateQueryOp(query); + const payload = JSON.stringify({ + query: compiledQuery, + variables: variables, + }); + return http.post(URL, payload, params); } export function mutate(query: MutRootGenqlSelection) { - const { query: compiledQuery, variables } = generateMutationOp(query); - const payload = JSON.stringify({ - query: compiledQuery, - variables: variables, - }); - return http.post(URL, payload, params); + const { query: compiledQuery, variables } = generateMutationOp(query); + const payload = JSON.stringify({ + query: compiledQuery, + variables: variables, + }); + return http.post(URL, payload, params); } export function fetchAndParse(query: QueryRootGenqlSelection) { - const response = fetch(query); - if (typeof response.body !== 'string') { - fail(JSON.stringify(response)); - } - return JSON.parse(response.body); + const response = fetch(query); + if (typeof response.body !== "string") { + fail(JSON.stringify(response)); + } + return JSON.parse(response.body); } -export function fetchAndCheck(errorRate: Rate, query: QueryRootGenqlSelection, ) { - checkResponse(fetch(query), errorRate); +export function fetchAndCheck(errorRate: Rate, query: QueryRootGenqlSelection) { + checkResponse(fetch(query), errorRate); } -export function mutateAndCheck(errorRate: Rate, query: MutRootGenqlSelection, ) { - checkResponse(mutate(query), errorRate); +export function mutateAndCheck(errorRate: Rate, query: MutRootGenqlSelection) { + checkResponse(mutate(query), errorRate); } diff --git a/optd b/optd new file mode 160000 index 0000000000..c8a444e100 --- /dev/null +++ b/optd @@ -0,0 +1 @@ +Subproject commit c8a444e1000ecd3b3f366f52a970b6495ec6a591 diff --git a/pometry-storage-private b/pometry-storage-private deleted file mode 160000 index f28bd721ea..0000000000 --- a/pometry-storage-private +++ /dev/null @@ -1 +0,0 @@ -Subproject commit f28bd721ea91a59f80f08af3d760a33725eca481 diff --git a/pometry-storage/src/lib.rs b/pometry-storage/src/lib.rs deleted file mode 100644 index 0851e257e4..0000000000 --- a/pometry-storage/src/lib.rs +++ /dev/null @@ -1,2 +0,0 @@ -#[cfg(feature = "storage")] -compile_error!("The 'storage' feature is private"); diff --git a/python/.github/workflows/CI.yml b/python/.github/workflows/CI.yml index b34789d3ca..e345c23a21 100644 --- a/python/.github/workflows/CI.yml +++ b/python/.github/workflows/CI.yml @@ -11,7 +11,10 @@ jobs: linux: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v5 + with: + submodules: recursive + token: '${{ secrets.PULL_TOKEN }}' - uses: messense/maturin-action@v1 with: manylinux: auto @@ -26,7 +29,10 @@ jobs: windows: runs-on: windows-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v5 + with: + submodules: recursive + token: '${{ secrets.PULL_TOKEN }}' - uses: messense/maturin-action@v1 with: command: build @@ -40,7 +46,10 @@ jobs: macos: runs-on: macos-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v5 + with: + submodules: recursive + token: '${{ secrets.PULL_TOKEN }}' - uses: messense/maturin-action@v1 with: command: build diff --git a/python/Cargo.toml b/python/Cargo.toml index 7f7c77b0b3..2744f7afdc 100644 --- a/python/Cargo.toml +++ b/python/Cargo.toml @@ -21,17 +21,23 @@ crate-type = ["cdylib"] pyo3 = { workspace = true } raphtory = { workspace = true, features = [ "python", - "search", "vectors", - "proto", ] } raphtory-graphql = { workspace = true, features = [ - "python", "search" + "python", ] } +auth = { workspace = true } +clam-core = { workspace = true, features = ["python"] } + [features] -storage = ["raphtory/storage", "raphtory-graphql/storage"] extension-module = ["pyo3/extension-module"] +search = ["raphtory/search", "raphtory-graphql/search"] +proto = ["raphtory/proto"] + [build-dependencies] pyo3-build-config = { workspace = true } + +#[target.'cfg(not(target_env = "msvc"))'.dependencies] +#tikv-jemallocator.workspace = true diff --git a/python/profile.json b/python/profile.json deleted file mode 100644 index 6b806a3cfc..0000000000 --- a/python/profile.json +++ /dev/null @@ -1 +0,0 @@ -{"meta":{"categories":[{"name":"Other","color":"grey","subcategories":["Other"]},{"name":"Regular","color":"blue","subcategories":["Other"]}],"debug":false,"extensions":{"baseURL":[],"id":[],"length":0,"name":[]},"interval":1.0,"preprocessedProfileVersion":44,"processType":0,"product":"python","sampleUnits":{"eventDelay":"ms","threadCPUDelta":"µs","time":"ms"},"startTime":1708001452023.772,"symbolicated":false,"pausedRanges":[],"version":24,"usesOnlyOneStackType":true,"doesNotUseFrameImplementation":true,"sourceCodeIsNotOnSearchfox":true,"markerSchema":[]},"libs":[{"name":"libdyld.dylib","path":"/usr/lib/system/libdyld.dylib","debugName":"libdyld.dylib","debugPath":"/usr/lib/system/libdyld.dylib","breakpadId":"11007DE4ABE13079BF1C4D0EADA884DD0","codeId":null,"arch":"x86_64"},{"name":"python","path":"/Users/bensteer/miniconda3/envs/raphtory/bin/python","debugName":"python","debugPath":"/Users/bensteer/miniconda3/envs/raphtory/bin/python","breakpadId":"6998F14EBF303B7B9B5424A3CF365B400","codeId":null,"arch":"x86_64"},{"name":"dyld","path":"/usr/lib/dyld","debugName":"dyld","debugPath":"/usr/lib/dyld","breakpadId":"57DB2053BFD5368397C6F1DB2A1F1D090","codeId":null,"arch":"x86_64"},{"name":"libsystem_c.dylib","path":"/usr/lib/system/libsystem_c.dylib","debugName":"libsystem_c.dylib","debugPath":"/usr/lib/system/libsystem_c.dylib","breakpadId":"337F746F82323439BBFFBAB9C10731F40","codeId":null,"arch":"x86_64"},{"name":"libsystem_notify.dylib","path":"/usr/lib/system/libsystem_notify.dylib","debugName":"libsystem_notify.dylib","debugPath":"/usr/lib/system/libsystem_notify.dylib","breakpadId":"1AE2715689B53E4AAB14A77E91F93DC70","codeId":null,"arch":"x86_64"},{"name":"libsystem_kernel.dylib","path":"/usr/lib/system/libsystem_kernel.dylib","debugName":"libsystem_kernel.dylib","debugPath":"/usr/lib/system/libsystem_kernel.dylib","breakpadId":"B51DEA59126035C6B7B6AA6D8459ECEC0","codeId":null,"arch":"x86_64"},{"name":"libsystem_platform.dylib","path":"/usr/lib/system/libsystem_platform.dylib","debugName":"libsystem_platform.dylib","debugPath":"/usr/lib/system/libsystem_platform.dylib","breakpadId":"6F368391D41B3A7E99926000FA985F210","codeId":null,"arch":"x86_64"}],"threads":[{"frameTable":{"length":154,"address":[-1,89916,8695,2512997,2513518,2306561,2350515,3960831,85029,85223,21834,160704,146794,146136,86202,117384,117246,143407,103226,2306876,2308365,3402529,520245,1331795,1138526,2297380,2312544,599776,2306969,2323558,2231968,1638361,1638972,1877642,2323606,2324390,487603,462427,473246,1819633,2185661,490769,491270,1887990,887326,1619066,1611214,462818,464257,1033828,1040005,2324485,2180328,485106,1612976,1916830,886643,2203864,902487,2915125,2916116,339206,331565,333132,4835,5675,4794,2323717,1402420,1403274,1960942,1949965,1909745,2552401,2608845,10182,2183685,1960845,471759,1037879,1052768,5360,2323907,2328104,1611149,1888404,1052686,941933,1750820,2687434,600192,488369,486420,2685192,487358,486703,1033881,2697385,40769,6994,1619279,2373863,2365030,2557744,172990,172784,10566,1869235,551413,2747814,2767391,2769490,489926,547947,2725698,2733827,546572,2699770,2502081,7374,1882321,1042232,1908337,2274430,2263903,2265312,2264916,1027880,2525824,942819,1042136,1065529,827894,1073718,1918701,884179,1064889,1039598,1659261,2508046,2334173,2524463,2519427,2334181,2339454,2522933,1037374,794164,631834,499788,1017564,2334280,2342089,2351585],"inlineDepth":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"category":[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1],"subcategory":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"func":[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153],"nativeSymbol":[null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null],"innerWindowID":[null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null],"implementation":[null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null],"line":[null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null],"column":[null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null],"optimizations":[null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null]},"funcTable":{"length":154,"name":[0,2,4,5,6,7,8,9,10,11,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,65,66,67,69,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160],"isJS":[false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false],"relevantForJS":[false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false],"resource":[-1,0,1,1,1,1,1,1,0,0,2,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,3,3,3,4,5,5,1,1,1,1,1,1,1,1,5,1,1,1,1,1,6,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,5,5,1,1,1,1,3,3,5,1,1,1,1,1,1,1,1,1,1,1,1,5,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1],"fileName":[null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null],"lineNumber":[null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null],"columnNumber":[null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null]},"markers":{"length":0,"category":[],"data":[],"endTime":[],"name":[],"phase":[],"startTime":[]},"name":"GeckoMain","nativeSymbols":{"length":0,"address":[],"functionSize":[],"libIndex":[],"name":[]},"pausedRanges":[],"pid":72023,"processName":"python","processShutdownTime":206.111497,"processStartupTime":178.281462,"processType":"default","registerTime":178.281462,"resourceTable":{"length":7,"lib":[0,1,2,3,4,5,6],"name":[1,3,12,64,68,70,87],"host":[null,null,null,null,null,null,null],"type":[1,1,1,1,1,1,1]},"samples":{"length":25,"stack":[18,25,27,33,56,89,107,130,155,200,201,215,234,248,271,294,317,336,350,361,248,362,366,374,378],"time":[178.880918,181.882413,183.164704,184.068305,185.09071,186.054176,186.898257,188.165488,189.072589,190.109677,191.106161,192.113115,193.148543,194.083976,195.080671,195.927909,197.13454,197.989232,199.156562,200.114017,201.110151,202.107345,203.075021,203.952698,205.139406],"weight":[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1],"weightType":"samples","threadCPUDelta":[5439,98,1270,881,994,605,746,1232,813,962,957,973,896,930,914,689,1152,837,1137,847,1024,987,876,795,1053]},"stackTable":{"length":379,"prefix":[null,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,4,19,20,21,22,23,24,19,26,4,28,29,30,31,32,28,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,34,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,28,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,103,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,92,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,28,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,171,171,202,203,204,205,206,207,208,209,210,211,212,213,214,203,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,171,235,236,237,238,239,240,241,242,243,244,245,246,247,171,249,250,251,252,253,254,255,256,257,258,259,260,261,262,263,264,265,266,267,268,269,270,270,272,273,274,275,276,277,278,279,280,281,282,283,284,285,286,287,288,289,290,291,292,293,262,295,296,297,298,299,300,301,302,303,304,305,306,307,308,309,310,311,312,313,314,315,316,304,318,319,320,321,322,323,324,325,326,327,328,329,330,331,332,333,334,335,328,337,338,339,340,341,342,343,344,345,346,347,348,349,322,351,352,353,354,355,356,357,358,359,360,177,2,363,364,365,363,367,368,369,370,371,372,373,363,375,376,377],"frame":[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,37,38,43,44,45,31,32,43,44,46,47,48,49,50,51,52,53,44,54,40,41,42,37,38,43,44,45,31,32,39,40,41,42,37,38,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,52,53,44,54,40,41,42,37,38,72,73,74,75,55,44,45,31,32,39,76,41,42,37,38,55,44,54,40,41,42,37,38,72,73,74,75,77,78,38,43,44,54,40,41,42,37,38,55,44,45,31,32,43,44,46,47,48,49,79,80,81,82,83,52,53,44,54,40,41,42,37,38,43,44,45,31,32,39,40,41,42,37,38,43,44,45,31,32,39,40,41,42,37,38,72,45,31,32,43,44,84,32,85,49,86,87,88,43,44,89,90,91,92,44,93,94,95,96,97,98,99,100,101,102,31,32,43,44,54,40,41,42,37,38,43,44,103,104,105,106,107,108,109,110,111,112,113,114,115,112,116,117,118,119,72,100,101,102,31,32,43,44,54,40,41,42,37,38,55,44,54,40,41,42,37,38,120,55,44,45,31,32,39,40,41,42,37,38,55,44,45,31,32,43,44,46,47,48,49,121,72,45,31,32,39,40,41,42,37,38,72,89,90,91,92,44,93,94,95,96,97,98,99,55,44,45,31,32,39,40,41,42,37,38,122,123,124,125,126,127,128,129,55,44,45,31,32,43,44,46,47,48,49,130,131,132,43,44,46,48,49,133,38,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,142],"category":[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1],"subcategory":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]},"stringArray":["0x1","libdyld.dylib","0x15f3c","python","0x21f7","0x265865","0x265a6e","0x233201","0x23ddb3","0x3c6fff","0x14c25","0x14ce7","dyld","0x554a","0x273c0","0x23d6a","0x23ad8","0x150ba","0x1ca88","0x1c9fe","0x2302f","0x1933a","0x23333c","0x23390d","0x33eb21","0x7f035","0x145253","0x115f5e","0x230e24","0x234960","0x926e0","0x233399","0x237466","0x220ea0","0x18ffd9","0x19023c","0x1ca68a","0x237496","0x2377a6","0x770b3","0x70e5b","0x7389e","0x1bc3f1","0x2159bd","0x77d11","0x77f06","0x1ccef6","0xd8a1e","0x18b47a","0x1895ce","0x70fe2","0x71581","0xfc664","0xfde85","0x237805","0x2144e8","0x766f2","0x189cb0","0x1d3f9e","0xd8773","0x21a0d8","0xdc557","0x2c7b35","0x2c7f14","libsystem_c.dylib","0x52d06","0x50f2d","0x5154c","libsystem_notify.dylib","0x12e3","libsystem_kernel.dylib","0x162b","0x12ba","0x237505","0x156634","0x15698a","0x1debee","0x1dc10d","0x1d23f1","0x26f251","0x27cecd","0x27c6","0x215205","0x1deb8d","0x732cf","0xfd637","0x101060","libsystem_platform.dylib","0x14f0","0x2375c3","0x238628","0x18958d","0x1cd094","0x10100e","0xe5f6d","0x1ab724","0x2901ca","0x92880","0x773b1","0x76c14","0x28f908","0x76fbe","0x76d2f","0xfc699","0x2928a9","0x9f41","0x1b52","0x18b54f","0x2438e7","0x241666","0x270730","0x2a3be","0x2a2f0","0x2946","0x1c85b3","0x869f5","0x29eda6","0x2a3a1f","0x2a4252","0x779c6","0x85c6b","0x299742","0x29b703","0x8570c","0x2931fa","0x262dc1","0x1cce","0x1cb8d1","0xfe738","0x1d1e71","0x22b47e","0x228b5f","0x2290e0","0x228f54","0xfaf28","0x268a80","0xe62e3","0xfe6d8","0x104239","0xca1f6","0x106236","0x1d46ed","0xd7dd3","0x103fb9","0xfdcee","0x19517d","0x26450e","0x239ddd","0x26852f","0x267183","0x239de5","0x23b27e","0x267f35","0xfd43e","0xc1e34","0x9a41a","0x7a04c","0xf86dc","0x239e48","0x23bcc9","0x23e1e1"],"tid":512579,"unregisterTime":206.111497}],"pages":[],"profilerOverhead":[],"counters":[]} \ No newline at end of file diff --git a/python/python/raphtory/__init__.py b/python/python/raphtory/__init__.py index 51fa280a9a..2b26bda251 100644 --- a/python/python/raphtory/__init__.py +++ b/python/python/raphtory/__init__.py @@ -8,6 +8,8 @@ _sys.modules["raphtory.vectors"] = vectors _sys.modules["raphtory.graphql"] = graphql _sys.modules["raphtory.filter"] = filter +_sys.modules["raphtory.gql"] = gql +_sys.modules["raphtory.iterables"] = iterables __doc__ = _raphtory.__doc__ if hasattr(_raphtory, "__all__"): diff --git a/python/python/raphtory/__init__.pyi b/python/python/raphtory/__init__.pyi index 1e1dec4822..6947ce3941 100644 --- a/python/python/raphtory/__init__.pyi +++ b/python/python/raphtory/__init__.pyi @@ -19,13 +19,17 @@ from raphtory.algorithms import * from raphtory.vectors import * from raphtory.node_state import * from raphtory.graphql import * +from raphtory.gql import * from raphtory.typing import * import numpy as np from numpy.typing import NDArray from datetime import datetime +import pandas from pandas import DataFrame +import pyarrow # type: ignore[import-untyped] from pyarrow import DataType # type: ignore[import-untyped] from os import PathLike +from decimal import Decimal import networkx as nx # type: ignore import pyvis # type: ignore from raphtory.iterables import * @@ -59,8 +63,6 @@ __all__ = [ "HistoryEventId", "Intervals", "WindowSet", - "IndexSpecBuilder", - "IndexSpec", "Prop", "version", "graphql", @@ -71,6 +73,7 @@ __all__ = [ "node_state", "filter", "iterables", + "gql", "nullmodels", "plottingutils", ] @@ -317,14 +320,6 @@ class GraphView(object): list[Node]: the nodes that match the properties name and value """ - def get_index_spec(self) -> IndexSpec: - """ - Get index spec - - Returns: - IndexSpec: - """ - def has_edge(self, src: NodeInput, dst: NodeInput) -> bool: """ Returns true if the graph contains the specified edge @@ -402,12 +397,24 @@ class GraphView(object): def materialize(self) -> GraphView: """ - Returns a 'materialized' clone of the graph view - i.e. a new graph with a copy of the data seen within the view instead of just a mask over the original graph + Returns a 'materialized' clone of the graph view - i.e. a new graph with a + copy of the data seen within the view instead of just a mask over the original graph. Returns: GraphView: Returns a graph clone """ + def materialize_at(self, path: str | PathLike) -> GraphView: + """ + Materializes the graph view into a folder on disk. + + Arguments: + path (str | PathLike): destination folder for the materialised graph. + + Returns: + GraphView: the materialised graph at `path`. + """ + @property def metadata(self) -> Metadata: """ @@ -477,32 +484,6 @@ class GraphView(object): WindowSet: A `WindowSet` object. """ - def search_edges(self, filter: Any, limit: int = 25, offset: int = 0) -> list[Edge]: - """ - Searches for edges which match the given filter expression. This uses Tantivy's exact search. - - Arguments: - filter: The filter expression to search for. - limit(int): The maximum number of results to return. Defaults to 25. - offset(int): The number of results to skip. This is useful for pagination. Defaults to 0. - - Returns: - list[Edge]: A list of edges which match the filter expression. The list will be empty if no edges match the query. - """ - - def search_nodes(self, filter: Any, limit: int = 25, offset: int = 0) -> list[Node]: - """ - Searches for nodes which match the given filter expression. This uses Tantivy's exact search. - - Arguments: - filter: The filter expression to search for. - limit(int): The maximum number of results to return. Defaults to 25. - offset(int): The number of results to skip. This is useful for pagination. Defaults to 0. - - Returns: - list[Node]: A list of nodes which match the filter expression. The list will be empty if no nodes match. - """ - def shrink_end(self, end: TimeInput) -> GraphView: """ Set the end of the window to the smaller of `end` and `self.end()` @@ -691,7 +672,7 @@ class GraphView(object): def vectorise( self, - model, + model: VectorCache, nodes: bool | str = True, edges: bool | str = True, verbose: bool = False, @@ -700,10 +681,9 @@ class GraphView(object): Create a VectorisedGraph from the current graph. Args: - embedding (Callable[[list], list]): Specify the embedding function used to vectorise documents into embeddings. + model (VectorCache): Cache wrapping the embedding model used to embed documents. nodes (bool | str): Enable for nodes to be embedded, disable for nodes to not be embedded or specify a custom document property to use if a string is provided. Defaults to True. edges (bool | str): Enable for edges to be embedded, disable for edges to not be embedded or specify a custom document property to use if a string is provided. Defaults to True. - cache (str, optional): Path used to store the cache of embeddings. verbose (bool): Enable to print logs reporting progress. Defaults to False. Returns: @@ -736,10 +716,13 @@ class Graph(GraphView): A temporal graph with event semantics. Arguments: - num_shards (int, optional): The number of locks to use in the storage to allow for multithreaded updates. + path (str | PathLike, optional): The path for persisting the graph (only works with disk storage enabled) + config (Config, optional): The configuration options for the graph """ - def __new__(cls, num_shards: Optional[int] = None) -> Graph: + def __new__( + cls, path: Optional[str | PathLike] = None, config: Optional[Config] = None + ) -> Graph: """Create and return a new object. See help(type) for accurate signature.""" def __reduce__(self): ... @@ -791,6 +774,7 @@ class Graph(GraphView): properties: Optional[PropInput] = None, node_type: Optional[str] = None, event_id: Optional[int] = None, + layer: Optional[str] = None, ) -> MutableNode: """ Adds a new node with the given id and properties to the graph. @@ -801,6 +785,7 @@ class Graph(GraphView): properties (PropInput, optional): The properties of the node. node_type (str, optional): The optional string which will be used as a node type. event_id (int, optional): The optional integer which will be used as an event id. + layer (str, optional): The optional string which will be used as a node layer. Returns: MutableNode: The added node. @@ -830,67 +815,6 @@ class Graph(GraphView): GraphError: If the operation fails. """ - def cache(self, path: str) -> None: - """ - Write Graph to cache file and initialise the cache. - - Future updates are tracked. Use `write_updates` to persist them to the - cache file. If the file already exists its contents are overwritten. - - Arguments: - path (str): The path to the cache file - - Returns: - None: - """ - - def create_index(self) -> None: - """ - Create graph index - - Returns: - None: - """ - - def create_index_in_ram(self) -> None: - """ - Creates a graph index in memory (RAM). - - This is primarily intended for use in tests and should not be used in production environments, - as the index will not be persisted to disk. - - Returns: - None: - """ - - def create_index_in_ram_with_spec(self, py_spec: IndexSpec) -> None: - """ - Creates a graph index in memory (RAM) with the provided index spec. - - This is primarily intended for use in tests and should not be used in production environments, - as the index will not be persisted to disk. - - Arguments: - py_spec: The specification for the in-memory index to be created. - - Arguments: - py_spec (IndexSpec): - The specification for the in-memory index to be created. - - Returns: - None: - """ - - def create_index_with_spec(self, py_spec: Any) -> None: - """ - Create graph index with the provided index spec. - - Arguments: - py_spec: - The specification for the in-memory index to be created. - - Returns: - None: - """ - def create_node( self, timestamp: TimeInput, @@ -898,6 +822,7 @@ class Graph(GraphView): properties: Optional[PropInput] = None, node_type: Optional[str] = None, event_id: Optional[int] = None, + layer: Optional[str] = None, ) -> MutableNode: """ Creates a new node with the given id and properties to the graph. It fails if the node already exists. @@ -908,7 +833,7 @@ class Graph(GraphView): properties (PropInput, optional): The properties of the node. node_type (str, optional): The optional string which will be used as a node type. event_id (int, optional): The optional integer which will be used as an event id. - + layer (str, optional): The optional string which will be used as a layer. Returns: MutableNode: The created node. @@ -973,6 +898,14 @@ class Graph(GraphView): Graph: the graph with event semantics applied """ + def flush(self) -> None: + """ + Trigger a flush of the underlying storage if disk storage is enabled + + Returns: + None: This function does not return a value, if the operation is successful. + """ + @staticmethod def from_parquet(graph_dir: str | PathLike) -> Graph: """ @@ -1153,18 +1086,17 @@ class Graph(GraphView): """ @staticmethod - def load_cached(path: str) -> Graph: + def load(path: str | PathLike, config: Optional[Config] = None) -> Graph: """ - Load Graph from a file and initialise it as a cache file. - - Future updates are tracked. Use `write_updates` to persist them to the - cache file. + Load a disk graph from path Arguments: - path (str): The path to the cache file + path (str | PathLike): the path of the graph folder + config (Config, optional): specify a new config to override the values saved for the graph + (note that the page sizes cannot be overridden and are ignored) Returns: - Graph: the loaded graph with initialised cache + Graph: the graph """ def load_edge_metadata( @@ -1222,6 +1154,7 @@ class Graph(GraphView): | dict[str, DataType | PropType | str] ] = None, csv_options: Optional[dict[str, str | bool]] = None, + event_id: Optional[str] = None, ) -> None: """ Load edges into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method), @@ -1241,6 +1174,7 @@ class Graph(GraphView): layer_col (str, optional): The edge layer column name in a dataframe. Cannot be used in combination with layer. Defaults to None. schema (list[tuple[str, DataType | PropType | str]] | dict[str, DataType | PropType | str], optional): A list of (column_name, column_type) tuples or dict of {"column_name": column_type} to cast columns to. Defaults to None. csv_options (dict[str, str | bool], optional): A dictionary of CSV reading options such as delimiter, comment, escape, quote, and terminator characters, as well as allow_truncated_rows and has_header flags. Defaults to None. + event_id (str, optional): The column name for the secondary index. Defaults to None. Returns: None: This function does not return a value if the operation is successful. @@ -1252,7 +1186,7 @@ class Graph(GraphView): @staticmethod def load_from_file(path: str) -> Graph: """ - Load Graph from a file. + Load Graph from a parquet file. Arguments: path (str): The path to the file. @@ -1313,6 +1247,9 @@ class Graph(GraphView): | dict[str, DataType | PropType | str] ] = None, csv_options: Optional[dict[str, str | bool]] = None, + event_id: Optional[str] = None, + layer: Optional[str] = None, + layer_col: Optional[str] = None, ) -> None: """ Load nodes into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method), @@ -1331,6 +1268,9 @@ class Graph(GraphView): shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. schema (list[tuple[str, DataType | PropType | str]] | dict[str, DataType | PropType | str], optional): A list of (column_name, column_type) tuples or dict of {"column_name": column_type} to cast columns to. Defaults to None. csv_options (dict[str, str | bool], optional): A dictionary of CSV reading options such as delimiter, comment, escape, quote, and terminator characters, as well as allow_truncated_rows and has_header flags. Defaults to None. + event_id (str, optional): The column name for the secondary index. Defaults to None. + layer (str, optional): A value to use as the layer for all nodes. Cannot be used in combination with layer_col. Defaults to None. + layer_col (str, optional): The node layer column name in a dataframe. Cannot be used in combination with layer. Defaults to None. Returns: None: This function does not return a value if the operation is successful. @@ -1360,7 +1300,7 @@ class Graph(GraphView): def save_to_file(self, path: str) -> None: """ - Saves the Graph to the given path. + Saves the Graph to the given path in parquet format. Arguments: path (str): The path to the file. @@ -1389,7 +1329,7 @@ class Graph(GraphView): def to_parquet(self, graph_dir: str | PathLike) -> None: """ - Persist graph to parquet files. + Persist graph to parquet files Arguments: graph_dir (str | PathLike): the folder where the graph will be persisted as parquet @@ -1412,18 +1352,18 @@ class Graph(GraphView): GraphError: If the operation fails. """ - def write_updates(self) -> None: - """ - Persist the new updates by appending them to the cache file. - - Returns: - None: - """ - class PersistentGraph(GraphView): - """A temporal graph that allows edges and nodes to be deleted.""" + """ + A temporal graph that allows edges and nodes to be deleted. + + Arguments: + path (str | PathLike, optional): The path for persisting the graph (only works with disk storage enabled). Defaults to None. + config (Config, optional): Storage/config overrides. Defaults to None. + """ - def __new__(cls) -> PersistentGraph: + def __new__( + cls, path: Optional[str | PathLike] = None, config: Optional[Config] = None + ) -> PersistentGraph: """Create and return a new object. See help(type) for accurate signature.""" def __reduce__(self): ... @@ -1475,6 +1415,7 @@ class PersistentGraph(GraphView): properties: Optional[PropInput] = None, node_type: Optional[str] = None, event_id: Optional[int] = None, + layer: Any = None, ) -> None: """ Adds a new node with the given id and properties to the graph. @@ -1483,8 +1424,9 @@ class PersistentGraph(GraphView): timestamp (TimeInput): The timestamp of the node. id (str | int): The id of the node. properties (PropInput, optional): The properties of the node. - node_type (str, optional) : The optional string which will be used as a node type. + node_type (str, optional): The optional string which will be used as a node type. event_id (int, optional): The optional integer which will be used as an event id. + layer: (str, optional): The optional string which will be used as a layer. Returns: None: This function does not return a value, if the operation is successful. @@ -1511,66 +1453,6 @@ class PersistentGraph(GraphView): GraphError: If the operation fails. """ - def cache(self, path: str) -> None: - """ - Write PersistentGraph to cache file and initialise the cache. - - Future updates are tracked. Use `write_updates` to persist them to the - cache file. If the file already exists its contents are overwritten. - - Arguments: - path (str): The path to the cache file - - Returns: - None: - """ - - def create_index(self) -> None: - """ - Create graph index - - Returns: - None: - """ - - def create_index_in_ram(self) -> None: - """ - Creates a graph index in memory (RAM). - - This is primarily intended for use in tests and should not be used in production environments, - as the index will not be persisted to disk. - - Returns: - None: - """ - - def create_index_in_ram_with_spec(self, py_spec: IndexSpec) -> None: - """ - Creates a graph index in memory (RAM) with the provided index spec. - - This is primarily intended for use in tests and should not be used in production environments, - as the index will not be persisted to disk. - - Arguments: - py_spec: The specification for the in-memory index to be created. - - Arguments: - py_spec (IndexSpec): The specification for the in-memory index to be created. - - Returns: - None: - """ - - def create_index_with_spec(self, py_spec: Any) -> None: - """ - Create graph index with the provided index spec. - Arguments: - py_spec: - The specification for the in-memory index to be created. - - Returns: - None: - """ - def create_node( self, timestamp: TimeInput, @@ -1578,6 +1460,7 @@ class PersistentGraph(GraphView): properties: Optional[PropInput] = None, node_type: Optional[str] = None, event_id: Optional[int] = None, + layer: Optional[str] = None, ) -> MutableNode: """ Creates a new node with the given id and properties to the graph. It fails if the node already exists. @@ -1586,8 +1469,9 @@ class PersistentGraph(GraphView): timestamp (TimeInput): The timestamp of the node. id (str | int): The id of the node. properties (PropInput, optional): The properties of the node. - node_type (str, optional) : The optional string which will be used as a node type. + node_type (str, optional): The optional string which will be used as a node type. event_id (int, optional): The optional integer which will be used as an event id. + layer (str, optional): The optional string which will be used as a layer. Returns: MutableNode: the newly created node. @@ -1653,6 +1537,14 @@ class PersistentGraph(GraphView): Graph: the graph with event semantics applied """ + def flush(self) -> None: + """ + Trigger a flush of the underlying storage if disk storage is enabled + + Returns: + None: This function does not return a value, if the operation is successful. + """ + def get_all_node_types(self) -> list[str]: """ Returns all the node types in the graph. @@ -1816,18 +1708,17 @@ class PersistentGraph(GraphView): """ @staticmethod - def load_cached(path: str) -> PersistentGraph: + def load(path: str | PathLike, config: Optional[Config]) -> PersistentGraph: """ - Load PersistentGraph from a file and initialise it as a cache file. - - Future updates are tracked. Use `write_updates` to persist them to the - cache file. + Load a disk graph from path Arguments: - path (str): The path to the cache file + path (str | PathLike): the path of the graph folder + config (Config, optional): specify a new config to override the values saved for the graph + (note that the page sizes cannot be overridden and are ignored) Returns: - PersistentGraph: the loaded graph with initialised cache + PersistentGraph: the graph """ def load_edge_deletions( @@ -1843,6 +1734,7 @@ class PersistentGraph(GraphView): | dict[str, DataType | PropType | str] ] = None, csv_options: Optional[dict[str, str | bool]] = None, + event_id: Optional[str] = None, ) -> None: """ Load edge deletions into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method), @@ -1859,6 +1751,7 @@ class PersistentGraph(GraphView): layer_col (str, optional): The edge layer col name in the data source. Cannot be used in combination with layer. Defaults to None. schema (list[tuple[str, DataType | PropType | str]] | dict[str, DataType | PropType | str], optional): A list of (column_name, column_type) tuples or dict of {"column_name": column_type} to cast columns to. Defaults to None. csv_options (dict[str, str | bool], optional): A dictionary of CSV reading options such as delimiter, comment, escape, quote, and terminator characters, as well as allow_truncated_rows and has_header flags. Defaults to None. + event_id (str, optional): The column name for the secondary index. Returns: None: This function does not return a value, if the operation is successful. @@ -1922,6 +1815,7 @@ class PersistentGraph(GraphView): | dict[str, DataType | PropType | str] ] = None, csv_options: Optional[dict[str, str | bool]] = None, + event_id: Optional[str] = None, ) -> None: """ Load edges into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method), @@ -1941,6 +1835,7 @@ class PersistentGraph(GraphView): layer_col (str, optional): The edge layer column name in a dataframe. Cannot be used in combination with layer. Defaults to None. schema (list[tuple[str, DataType | PropType | str]] | dict[str, DataType | PropType | str], optional): A list of (column_name, column_type) tuples or dict of {"column_name": column_type} to cast columns to. Defaults to None. csv_options (dict[str, str | bool], optional): A dictionary of CSV reading options such as delimiter, comment, escape, quote, and terminator characters, as well as allow_truncated_rows and has_header flags. Defaults to None. + event_id (str, optional): The column name for the secondary index. Returns: None: This function does not return a value if the operation is successful. @@ -1952,7 +1847,7 @@ class PersistentGraph(GraphView): @staticmethod def load_from_file(path: str) -> PersistentGraph: """ - Load PersistentGraph from a file. + Load PersistentGraph from a parquet file. Arguments: path (str): The path to the file. @@ -1961,6 +1856,39 @@ class PersistentGraph(GraphView): PersistentGraph: """ + def load_graph_properties( + self, + data: Any, + time: str, + properties: Optional[List[str]] = None, + metadata: Optional[List[str]] = None, + schema: Optional[ + list[tuple[str, DataType | PropType | str]] + | dict[str, DataType | PropType | str] + ] = None, + event_id: Optional[str] = None, + ) -> None: + """ + Load graph properties from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method), + or a path to a Parquet file, or a directory containing multiple Parquet files. + The following are known to support the ArrowStreamExportable protocol: Pandas dataframes, FireDucks(.pandas) dataframes, + Polars dataframes, Arrow tables, DuckDB (e.g. DuckDBPyRelation obtained from running an SQL query). + + Arguments: + data (Any): The data source containing graph properties. + time (str): The column name for the update timestamps. + properties (List[str], optional): List of temporal property column names. Defaults to None. + metadata (List[str], optional): List of constant property column names. Defaults to None. + schema (list[tuple[str, DataType | PropType | str]] | dict[str, DataType | PropType | str], optional): A list of (column_name, column_type) tuples or dict of {"column_name": column_type} to cast columns to. Defaults to None. + event_id (str, optional): The column name for the secondary index. + + Returns: + None: This function does not return a value if the operation is successful. + + Raises: + GraphError: If the operation fails. + """ + def load_node_metadata( self, data: Any, @@ -2013,6 +1941,9 @@ class PersistentGraph(GraphView): | dict[str, DataType | PropType | str] ] = None, csv_options: Optional[dict[str, str | bool]] = None, + event_id: Optional[str] = None, + layer: Optional[str] = None, + layer_col: Optional[str] = None, ) -> None: """ Load nodes into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method), @@ -2031,6 +1962,9 @@ class PersistentGraph(GraphView): shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. schema (list[tuple[str, DataType | PropType | str]] | dict[str, DataType | PropType | str], optional): A list of (column_name, column_type) tuples or dict of {"column_name": column_type} to cast columns to. Defaults to None. csv_options (dict[str, str | bool], optional): A dictionary of CSV reading options such as delimiter, comment, escape, quote, and terminator characters, as well as allow_truncated_rows and has_header flags. Defaults to None. + event_id (str, optional): The column name for the secondary index. + layer (str, optional): A value to use as the layer for all nodes. Cannot be used in combination with layer_col. Defaults to None. + layer_col (str, optional): The node layer column name in a dataframe. Cannot be used in combination with layer. Defaults to None. Returns: None: This function does not return a value if the operation is successful. @@ -2060,7 +1994,7 @@ class PersistentGraph(GraphView): def save_to_file(self, path: str) -> None: """ - Saves the PersistentGraph to the given path. + Saves the PersistentGraph to the given path in parquet format. Arguments: path (str): The path to the file. @@ -2087,6 +2021,17 @@ class PersistentGraph(GraphView): bytes: """ + def to_parquet(self, graph_dir: str | PathLike) -> None: + """ + Persist graph to parquet files + + Arguments: + graph_dir (str | PathLike): the folder where the graph will be persisted as parquet + + Returns: + None: + """ + def update_metadata(self, metadata: dict) -> None: """ Updates metadata of the graph. @@ -2101,14 +2046,6 @@ class PersistentGraph(GraphView): GraphError: If the operation fails. """ - def write_updates(self) -> None: - """ - Persist the new updates by appending them to the cache file. - - Returns: - None: - """ - class Node(object): """A node (or node) in the graph.""" @@ -3209,7 +3146,7 @@ class PathFromNode(object): """ @property - def earliest_time(self): + def earliest_time(self) -> OptionEventTimeIterable: """ The earliest time of each node. @@ -3331,7 +3268,7 @@ class PathFromNode(object): """ @property - def id(self): + def id(self) -> GIDIterable: """ The node IDs. @@ -3374,7 +3311,7 @@ class PathFromNode(object): """ @property - def latest_time(self): + def latest_time(self) -> OptionEventTimeIterable: """ The latest time of each node. @@ -3416,7 +3353,7 @@ class PathFromNode(object): """ @property - def name(self): + def name(self) -> StringIterable: """ The node names. @@ -3434,7 +3371,7 @@ class PathFromNode(object): """ @property - def node_type(self): + def node_type(self) -> OptionArcStringIterable: """ The node types. @@ -3687,7 +3624,7 @@ class PathFromGraph(object): PathFromGraph: The layered view """ - def degree(self): + def degree(self) -> NestedUsizeIterable: """ Returns the node degrees. @@ -3696,7 +3633,7 @@ class PathFromGraph(object): """ @property - def earliest_time(self): + def earliest_time(self) -> NestedOptionEventTimeIterable: """ The node earliest times. @@ -3704,7 +3641,7 @@ class PathFromGraph(object): NestedOptionEventTimeIterable: """ - def edge_history_count(self): + def edge_history_count(self) -> NestedUsizeIterable: """ Returns the number of edge updates for each node. @@ -3818,7 +3755,7 @@ class PathFromGraph(object): """ @property - def history(self): + def history(self) -> NestedHistoryIterable: """ Returns a history object for each node with time entries for when a node is added or change to a node is made. @@ -3827,7 +3764,7 @@ class PathFromGraph(object): """ @property - def id(self): + def id(self) -> NestedGIDIterable: """ The node ids @@ -3835,7 +3772,7 @@ class PathFromGraph(object): NestedGIDIterable: """ - def in_degree(self): + def in_degree(self) -> NestedUsizeIterable: """ Returns the node in-degrees. @@ -3870,7 +3807,7 @@ class PathFromGraph(object): """ @property - def latest_time(self): + def latest_time(self) -> NestedOptionEventTimeIterable: """ The node latest times. @@ -3903,7 +3840,7 @@ class PathFromGraph(object): """ @property - def metadata(self): + def metadata(self) -> MetadataListList: """ Returns the node metadata. @@ -3912,7 +3849,7 @@ class PathFromGraph(object): """ @property - def name(self): + def name(self) -> NestedStringIterable: """ The node names. @@ -3930,7 +3867,7 @@ class PathFromGraph(object): """ @property - def node_type(self): + def node_type(self) -> NestedOptionArcStringIterable: """ The node types. @@ -3938,7 +3875,7 @@ class PathFromGraph(object): NestedOptionArcStringIterable: """ - def out_degree(self): + def out_degree(self) -> NestedUsizeIterable: """ Returns the node out-degrees. @@ -3965,7 +3902,7 @@ class PathFromGraph(object): """ @property - def properties(self): + def properties(self) -> PyNestedPropsIterable: """ Returns the node properties. @@ -4133,6 +4070,7 @@ class MutableNode(Node): t: TimeInput, properties: Optional[PropInput] = None, event_id: Optional[int] = None, + layer: Optional[str] = None, ) -> None: """ Add updates to a node in the graph at a specified time. @@ -4145,6 +4083,7 @@ class MutableNode(Node): is of type Prop representing the property value. If None, no properties are updated. event_id (int, optional): The optional integer which will be used as an event id. + layer (str, optional): The layer this update is recorded under. Defaults to None. Returns: None: This function does not return a value, if the operation is successful. @@ -4731,7 +4670,7 @@ class Edges(object): """ @property - def deletions(self): + def deletions(self) -> HistoryIterable: """ Returns a history object for each edge containing their deletion times. @@ -4749,7 +4688,7 @@ class Edges(object): """ @property - def earliest_time(self): + def earliest_time(self) -> OptionEventTimeIterable: """ Returns the earliest time of the edges. @@ -4859,7 +4798,7 @@ class Edges(object): """ @property - def history(self): + def history(self) -> HistoryIterable: """ Returns a history object for each edge containing time entries for when the edge is added or change to the edge is made. @@ -4868,7 +4807,7 @@ class Edges(object): """ @property - def id(self): + def id(self) -> GIDGIDIterable: """ Returns all ids of the edges. @@ -4876,7 +4815,7 @@ class Edges(object): GIDGIDIterable: """ - def is_active(self): + def is_active(self) -> BoolIterable: """ Check if the edges are active (there is at least one update during this time). @@ -4884,7 +4823,7 @@ class Edges(object): BoolIterable: """ - def is_deleted(self): + def is_deleted(self) -> BoolIterable: """ Check if the edges are deleted. @@ -4892,7 +4831,7 @@ class Edges(object): BoolIterable: """ - def is_self_loop(self): + def is_self_loop(self) -> BoolIterable: """ Check if the edges are on the same node. @@ -4900,7 +4839,7 @@ class Edges(object): BoolIterable: """ - def is_valid(self): + def is_valid(self) -> BoolIterable: """ Check if the edges are valid (i.e. not deleted). @@ -4917,7 +4856,7 @@ class Edges(object): """ @property - def latest_time(self): + def latest_time(self) -> OptionEventTimeIterable: """ Returns the latest times of the edges. @@ -4938,7 +4877,7 @@ class Edges(object): """ @property - def layer_name(self): + def layer_name(self) -> ArcStringIterable: """ Get the layer name that all edges belong to - assuming they only belong to one layer @@ -4947,7 +4886,7 @@ class Edges(object): """ @property - def layer_names(self): + def layer_names(self) -> ArcStringVecIterable: """ Get the layer names that all edges belong to - assuming they only belong to one layer. @@ -5098,7 +5037,7 @@ class Edges(object): """ @property - def time(self): + def time(self) -> EventTimeIterable: """ Returns the times of exploded edges @@ -5229,7 +5168,7 @@ class NestedEdges(object): """ @property - def deletions(self): + def deletions(self) -> NestedHistoryIterable: """ Returns a history object for each edge containing their deletion times. @@ -5247,7 +5186,7 @@ class NestedEdges(object): """ @property - def earliest_time(self): + def earliest_time(self) -> NestedOptionEventTimeIterable: """ Returns the earliest time of the edges. @@ -5357,7 +5296,7 @@ class NestedEdges(object): """ @property - def history(self): + def history(self) -> NestedHistoryIterable: """ Returns a history object for each edge containing time entries for when the edge is added or change to the edge is made. @@ -5366,7 +5305,7 @@ class NestedEdges(object): """ @property - def id(self): + def id(self) -> NestedGIDGIDIterable: """ Returns all ids of the edges. @@ -5374,7 +5313,7 @@ class NestedEdges(object): NestedGIDGIDIterable: """ - def is_active(self): + def is_active(self) -> NestedBoolIterable: """ Check if the edges are active (there is at least one update during this time). @@ -5382,7 +5321,7 @@ class NestedEdges(object): NestedBoolIterable: """ - def is_deleted(self): + def is_deleted(self) -> NestedBoolIterable: """ Check if edges are deleted. @@ -5390,7 +5329,7 @@ class NestedEdges(object): NestedBoolIterable: """ - def is_self_loop(self): + def is_self_loop(self) -> NestedBoolIterable: """ Check if the edges are on the same node. @@ -5398,7 +5337,7 @@ class NestedEdges(object): NestedBoolIterable: """ - def is_valid(self): + def is_valid(self) -> NestedBoolIterable: """ Check if edges are valid (i.e., not deleted). @@ -5415,7 +5354,7 @@ class NestedEdges(object): """ @property - def latest_time(self): + def latest_time(self) -> NestedOptionEventTimeIterable: """ Returns the latest time of the edges. @@ -5436,7 +5375,7 @@ class NestedEdges(object): """ @property - def layer_name(self): + def layer_name(self) -> NestedArcStringIterable: """ Returns the name of the layer the edges belong to - assuming they only belong to one layer. @@ -5445,7 +5384,7 @@ class NestedEdges(object): """ @property - def layer_names(self): + def layer_names(self) -> NestedArcStringVecIterable: """ Returns the names of the layers the edges belong to. @@ -5466,7 +5405,7 @@ class NestedEdges(object): """ @property - def metadata(self): + def metadata(self) -> MetadataListList: """ Get a view of the metadata only. @@ -5484,7 +5423,7 @@ class NestedEdges(object): """ @property - def properties(self): + def properties(self) -> PyNestedPropsIterable: """ Returns all properties of the edges @@ -5596,7 +5535,7 @@ class NestedEdges(object): """ @property - def time(self): + def time(self) -> NestedEventTimeIterable: """ Returns the times of exploded edges. @@ -5852,8 +5791,22 @@ class PyPropValueList(object): PropValue: The average of each property values, or None if count is zero. """ - def collect(self): ... - def count(self): ... + def collect(self) -> list: + """ + Materialise the iterable as a Python list. + + Returns: + list: + """ + + def count(self) -> int: + """ + Number of properties (or rows of properties). + + Returns: + int: + """ + def drop_none(self) -> list[PropValue]: """ Drop none. @@ -5935,35 +5888,149 @@ class PropType(object): """Return str(self).""" @staticmethod - def array(p): ... + def bool() -> PropType: + """ + Boolean type. + + Returns: + PropType: + """ + @staticmethod - def bool(): ... + def datetime() -> PropType: + """ + Datetime type (timezone-aware). + + Returns: + PropType: + """ + @staticmethod - def datetime(): ... + def decimal(scale: int) -> PropType: + """ + Arbitrary-precision decimal type with a fixed scale (number of digits + after the decimal point). + + Arguments: + scale (int): the number of digits after the decimal point. + + Returns: + PropType: + """ + @staticmethod - def f32(): ... + def f32() -> PropType: + """ + 32-bit float type. + + Returns: + PropType: + """ + @staticmethod - def f64(): ... + def f64() -> PropType: + """ + 64-bit float type. + + Returns: + PropType: + """ + @staticmethod - def i32(): ... + def i32() -> PropType: + """ + Signed 32-bit integer type. + + Returns: + PropType: + """ + @staticmethod - def i64(): ... + def i64() -> PropType: + """ + Signed 64-bit integer type. + + Returns: + PropType: + """ + @staticmethod - def list(p): ... + def list(p: PropType) -> PropType: + """ + List type with a single element type. + + Arguments: + p (PropType): element type. + + Returns: + PropType: + """ + @staticmethod - def map(hash_map): ... + def map(hash_map: dict[str, PropType]) -> PropType: + """ + Map type with string keys and typed values. + + Arguments: + hash_map (dict[str, PropType]): mapping from key name to value type. + + Returns: + PropType: + """ + @staticmethod - def naive_datetime(): ... + def naive_datetime() -> PropType: + """ + Naive datetime type (timezone-unaware). + + Returns: + PropType: + """ + @staticmethod - def str(): ... + def str() -> PropType: + """ + String type. + + Returns: + PropType: + """ + @staticmethod - def u16(): ... + def u16() -> PropType: + """ + Unsigned 16-bit integer type. + + Returns: + PropType: + """ + @staticmethod - def u32(): ... + def u32() -> PropType: + """ + Unsigned 32-bit integer type. + + Returns: + PropType: + """ + @staticmethod - def u64(): ... + def u64() -> PropType: + """ + Unsigned 64-bit integer type. + + Returns: + PropType: + """ + @staticmethod - def u8(): ... + def u8() -> PropType: + """ + Unsigned 8-bit integer type. + + Returns: + PropType: + """ class Metadata(object): """A view of metadata of an entity""" @@ -6074,11 +6141,49 @@ class MetadataView(object): def __ne__(self, value): """Return self!=value.""" - def as_dict(self): ... - def get(self, key): ... - def items(self): ... - def keys(self): ... - def values(self): ... + def as_dict(self) -> dict[str, list]: + """ + Materialise the metadata as a plain dict mapping each key to the + list of values seen across the underlying entities. + + Returns: + dict[str, list]: + """ + + def get(self, key: str) -> Optional[PyPropValueList]: + """ + Look up a metadata value by key. + + Arguments: + key (str): metadata key. + + Returns: + Optional[PyPropValueList]: + """ + + def items(self) -> list[tuple[str, PyPropValueList]]: + """ + Pairs of `(key, value list)` for every metadata key. + + Returns: + list[tuple[str, PyPropValueList]]: + """ + + def keys(self) -> list[str]: + """ + Metadata keys present across the underlying entities. + + Returns: + list[str]: + """ + + def values(self) -> list[PyPropValueList]: + """ + Metadata values aligned with `keys()`. + + Returns: + list[PyPropValueList]: + """ class TemporalProperties(object): """A view of the temporal properties of an entity""" @@ -6234,12 +6339,12 @@ class PropertiesView(object): """ @property - def temporal(self): + def temporal(self) -> list[TemporalProperty]: """ Get a view of the temporal properties only. Returns: - List[TemporalProp]: + list[TemporalProperty]: """ def values(self) -> list[list[PropValue]]: @@ -6388,18 +6493,18 @@ class TemporalProperty(object): Optional[PropValue]: """ - def values(self): + def values(self) -> NDArray: """ Get the property values for each update. Returns: - NumpyArray: + NDArray: a numpy array of values, one per update. """ class EventTime(object): """ - Raphtory’s EventTime. - Represents a unique timepoint in the graph’s history as (timestamp, event_id). + Raphtory's EventTime. + Represents a unique timepoint in the graph's history as (timestamp, event_id). - timestamp: Number of milliseconds since the Unix epoch. - event_id: ID used for ordering between equal timestamps. @@ -6409,6 +6514,10 @@ class EventTime(object): EventTime can be converted into a timestamp or a Python datetime, and compared either by timestamp (against ints/floats/datetimes/strings), by tuple of (timestamp, event_id), or against another EventTime. + + Arguments: + timestamp (int | float | datetime | str): A time input convertible to an EventTime. + event_id (int | float | datetime | str | None): Optionally, specify the event id. Defaults to None. """ def __eq__(self, value): @@ -6435,7 +6544,11 @@ class EventTime(object): def __ne__(self, value): """Return self!=value.""" - def __new__(cls, timestamp, event_id=None) -> EventTime: + def __new__( + cls, + timestamp: int | float | datetime | str, + event_id: int | float | datetime | str | None = None, + ) -> EventTime: """Create and return a new object. See help(type) for accurate signature.""" def __repr__(self): @@ -6512,41 +6625,41 @@ class OptionalEventTime(object): """Return self!=value.""" @property - def as_tuple(self): + def as_tuple(self) -> Optional[tuple[int, int]]: """ Return this entry as a tuple of (timestamp, event_id), where the timestamp is in milliseconds if an EventTime is contained, or else None. Returns: - tuple[int,int] | None: (timestamp, event_id). + Optional[tuple[int, int]]: (timestamp, event_id). """ @property - def dt(self): + def dt(self) -> Optional[datetime]: """ Returns the UTC datetime representation of this EventTime's timestamp if an EventTime is contained, or else None. Returns: - datetime | None: The UTC datetime. + Optional[datetime]: The UTC datetime. Raises: TimeError: Returns TimeError on timestamp conversion errors (e.g. out-of-range timestamp). """ @property - def event_id(self): + def event_id(self) -> Optional[int]: """ Returns the event id used to order events within the same timestamp if an EventTime is contained, or else None. Returns: - int | None: The event id. + Optional[int]: The event id. """ - def get_event_time(self): + def get_event_time(self) -> Optional[EventTime]: """ Returns the contained EventTime if it exists, or else None. Returns: - EventTime | None: + Optional[EventTime]: """ def is_none(self) -> bool: @@ -6566,12 +6679,12 @@ class OptionalEventTime(object): """ @property - def t(self): + def t(self) -> Optional[int]: """ Returns the timestamp in milliseconds since the Unix epoch if an EventTime is contained, or else None. Returns: - int | None: Milliseconds since the Unix epoch. + Optional[int]: Milliseconds since the Unix epoch. """ class History(object): @@ -7060,179 +7173,222 @@ class WindowSet(object): Iterable: The time index. """ -class IndexSpecBuilder(object): - def __new__(cls, graph) -> IndexSpecBuilder: - """Create and return a new object. See help(type) for accurate signature.""" +class Prop(object): + def __eq__(self, value): + """Return self==value.""" + + def __ge__(self, value): + """Return self>=value.""" + + def __gt__(self, value): + """Return self>value.""" + + def __hash__(self): + """Return hash(self).""" + + def __le__(self, value): + """Return self<=value.""" + + def __lt__(self, value): + """Return self IndexSpec: + def __repr__(self): + """Return repr(self).""" + + @staticmethod + def aware_datetime(value: datetime) -> Prop: """ - Return a spec + Construct a `Prop` holding a timezone-aware datetime (stored as UTC). + Naive datetimes are accepted and interpreted as UTC, matching the + convention used elsewhere in Raphtory's time inputs. + + Arguments: + value (datetime): a datetime. Naive datetimes are treated as UTC. Returns: - IndexSpec: + Prop: """ - def with_all_edge_metadata(self) -> dict[str, Any]: + @staticmethod + def bool(value: bool) -> Prop: """ - Adds all edge metadata to the spec. + Construct a `Prop` holding a boolean. + + Arguments: + value (bool): the value to wrap. Returns: - dict[str, Any]: + Prop: """ - def with_all_edge_properties(self) -> dict[str, Any]: + @staticmethod + def decimal(value: Decimal | str | int | float) -> Prop: """ - Adds all edge properties to the spec. + Construct a `Prop` holding an arbitrary-precision decimal. + + Arguments: + value (Decimal | str | int | float): the value to wrap. Strings must + parse as a decimal. Note that floats only have ~15-17 digits of + precision — pass a string or `decimal.Decimal` for higher precision. Returns: - dict[str, Any]: + Prop: """ - def with_all_edge_properties_and_metadata(self) -> dict[str, Any]: + def dtype(self) -> PropType: """ - Adds all edge properties and metadata to the spec. + Returns the `PropType` of the wrapped value. Returns: - dict[str, Any]: + PropType: """ - def with_all_node_metadata(self) -> dict[str, Any]: + @staticmethod + def f32(value: float) -> Prop: """ - Adds all node metadata to the spec. + Construct a `Prop` holding a 32-bit float. + + Arguments: + value (float): the value to wrap. Returns: - dict[str, Any]: + Prop: """ - def with_all_node_properties(self) -> dict[str, Any]: + @staticmethod + def f64(value: float) -> Prop: """ - Adds all node properties to the spec. + Construct a `Prop` holding a 64-bit float. + + Arguments: + value (float): the value to wrap. Returns: - dict[str, Any]: + Prop: """ - def with_all_node_properties_and_metadata(self) -> dict[str, Any]: + @staticmethod + def i32(value: int) -> Prop: """ - Adds all node properties and metadata to the spec. + Construct a `Prop` holding a signed 32-bit integer. + + Arguments: + value (int): the value to wrap. Returns: - dict[str, Any]: + Prop: """ - def with_edge_metadata(self, props: Any) -> dict[str, Any]: + @staticmethod + def i64(value: int) -> Prop: """ - Adds specified edge metadata to the spec. + Construct a `Prop` holding a signed 64-bit integer. Arguments: - props: List of metadata. + value (int): the value to wrap. Returns: - dict[str, Any]: + Prop: """ - def with_edge_properties(self, props: Any) -> dict[str, Any]: + @staticmethod + def list(values: list) -> Prop: """ - Adds specified edge properties to the spec. + Construct a `Prop` holding a list of values. Arguments: - props: List of properties. + values (list): the values to wrap. Returns: - dict[str, Any]: + Prop: """ - def with_node_metadata(self, props: Any) -> dict[str, Any]: + @staticmethod + def map(dict: dict[str, Any]) -> Prop: """ - Adds specified node metadata to the spec. + Construct a `Prop` holding a string-keyed map of values. Arguments: - props: list of metadata. + dict (dict[str, Any]): the map to wrap. Returns: - dict[str, Any]: + Prop: """ - def with_node_properties(self, props: Any) -> dict[str, Any]: + @staticmethod + def naive_datetime(value: datetime) -> Prop: """ - Adds specified node properties to the spec. + Construct a `Prop` holding a naive (timezone-unaware) datetime. Arguments: - props: list of properties. + value (datetime): the value to wrap (any tz info is dropped). Returns: - dict[str, Any]: + Prop: """ -class IndexSpec(object): - def __repr__(self): - """Return repr(self).""" - - @property - def edge_metadata(self) -> list[str]: + @staticmethod + def str(value: str) -> Prop: """ - Get edge metadata. + Construct a `Prop` holding a string. + + Arguments: + value (str): the value to wrap. Returns: - list[str]: + Prop: """ - @property - def edge_properties(self) -> list[str]: + @staticmethod + def u16(value: int) -> Prop: """ - Get edge properties. + Construct a `Prop` holding an unsigned 16-bit integer. + + Arguments: + value (int): the value to wrap. Returns: - list[str]: + Prop: """ - @property - def node_metadata(self) -> list[str]: + @staticmethod + def u32(value: int) -> Prop: """ - Get node metadata. + Construct a `Prop` holding an unsigned 32-bit integer. + + Arguments: + value (int): the value to wrap. Returns: - list[str]: + Prop: """ - @property - def node_properties(self) -> list[str]: + @staticmethod + def u64(value: int) -> Prop: """ - Get node properties. + Construct a `Prop` holding an unsigned 64-bit integer. + + Arguments: + value (int): the value to wrap. Returns: - list[str]: + Prop: """ -class Prop(object): - def __repr__(self): - """Return repr(self).""" - - @staticmethod - def bool(value): ... - def dtype(self): ... - @staticmethod - def f32(value): ... - @staticmethod - def f64(value): ... - @staticmethod - def i32(value): ... - @staticmethod - def i64(value): ... @staticmethod - def list(values): ... - @staticmethod - def map(dict): ... - @staticmethod - def str(value): ... - @staticmethod - def u16(value): ... - @staticmethod - def u32(value): ... - @staticmethod - def u64(value): ... - @staticmethod - def u8(value): ... + def u8(value: int) -> Prop: + """ + Construct a `Prop` holding an unsigned 8-bit integer. + + Arguments: + value (int): the value to wrap. + + Returns: + Prop: + """ def version() -> str: """ diff --git a/python/python/raphtory/algorithms/__init__.pyi b/python/python/raphtory/algorithms/__init__.pyi index c185492cf8..03a058a0cc 100644 --- a/python/python/raphtory/algorithms/__init__.pyi +++ b/python/python/raphtory/algorithms/__init__.pyi @@ -19,13 +19,17 @@ import raphtory.filter as filter from raphtory.vectors import * from raphtory.node_state import * from raphtory.graphql import * +from raphtory.gql import * from raphtory.typing import * import numpy as np from numpy.typing import NDArray from datetime import datetime +import pandas from pandas import DataFrame +import pyarrow # type: ignore[import-untyped] from pyarrow import DataType # type: ignore[import-untyped] from os import PathLike +from decimal import Decimal import networkx as nx # type: ignore import pyvis # type: ignore from raphtory.iterables import * @@ -204,7 +208,7 @@ def directed_graph_density(graph: GraphView) -> float: float: Directed graph density of graph. """ -def degree_centrality(graph: GraphView): +def degree_centrality(graph: GraphView) -> OutputNodeState: """ Computes the degree centrality of all nodes in the graph. The values are normalized by dividing each result with the maximum possible degree. Graphs with self-loops can have @@ -214,10 +218,10 @@ def degree_centrality(graph: GraphView): graph (GraphView): The graph view on which the operation is to be performed. Returns: - PyOutputNodeState: NodeState mapping nodes to their associated degree centrality. + OutputNodeState: NodeState mapping nodes to their associated degree centrality. """ -def alternating_mask(graph: GraphView): +def alternating_mask(graph: GraphView) -> OutputNodeState: """ Alternating mask algorithm. It is a mock algorithm suitable only for testing purposes. @@ -225,7 +229,7 @@ def alternating_mask(graph: GraphView): graph (GraphView): The graph view on which the operation is to be performed. Returns: - PyOutputNodeState: NodeState mapping nodes to their associated alternating masks. + OutputNodeState: NodeState mapping nodes to their associated alternating masks. """ def max_degree(graph: GraphView) -> int: @@ -300,7 +304,7 @@ def pagerank( max_diff: Optional[float] = None, use_l2_norm: bool = True, damping_factor: float = 0.85, -): +) -> OutputNodeState: """ Pagerank -- pagerank centrality value of the nodes in a graph @@ -318,7 +322,7 @@ def pagerank( damping_factor (float): The damping factor for the PageRank calculation. Defaults to 0.85. Returns: - PyOutputNodeState: NodeState mapping nodes to their pagerank score. + OutputNodeState: NodeState mapping nodes to their pagerank score. """ def single_source_shortest_path( @@ -409,7 +413,7 @@ def local_clustering_coefficient(graph: GraphView, v: NodeInput) -> float: float: the local clustering coefficient of node v in graph. """ -def local_clustering_coefficient_batch(graph: Any, v: Any = None): +def local_clustering_coefficient_batch(graph: Any, v: Any = None) -> OutputNodeState: """ Returns the Local clustering coefficient (batch, intersection) for each specified node in a graph. This measures the degree to which one or multiple nodes in a graph tend to cluster together. @@ -420,7 +424,7 @@ def local_clustering_coefficient_batch(graph: Any, v: Any = None): v: vec of node ids, if empty, will return results for every node in the graph Returns: - PyOutputNodeState: Mapping of vertices to lcc score + OutputNodeState: Mapping of vertices to lcc score """ def weakly_connected_components(graph: GraphView) -> NodeStateUsize: @@ -437,7 +441,7 @@ def weakly_connected_components(graph: GraphView) -> NodeStateUsize: NodeStateUsize: Mapping of nodes to their component ids. """ -def strongly_connected_components(graph: GraphView): +def strongly_connected_components(graph: GraphView) -> OutputNodeState: """ Strongly connected components @@ -447,7 +451,7 @@ def strongly_connected_components(graph: GraphView): graph (GraphView): Raphtory graph Returns: - PyOutputNodeState: NodeState mapping nodes to their component ids + OutputNodeState: NodeState mapping nodes to their component ids """ def in_components( @@ -596,7 +600,7 @@ def global_temporal_three_node_motif_multi( """ def local_temporal_three_node_motifs( - graph: GraphView, delta: int, threads=None + graph: GraphView, delta: int, threads: Optional[int] = None ) -> NodeStateMotifs: """ Computes the number of each type of motif that each node participates in. See global_temporal_three_node_motifs for a summary of the motifs involved. @@ -604,6 +608,7 @@ def local_temporal_three_node_motifs( Arguments: graph (GraphView): A directed raphtory graph delta (int): Maximum time difference between the first and last edge of the motif. NB if time for edges was given as a UNIX epoch, this should be given in seconds, otherwise milliseconds should be used (if edge times were given as string) + threads (int, optional): Number of threads to use. Defaults to None. Returns: NodeStateMotifs: A mapping from nodes to lists of motif counts (40 counts in the same order as the global motif counts) with the number of each motif that node participates in. @@ -656,18 +661,18 @@ def balance( """ def label_propagation( - graph: GraphView, iter_count: Any = 20, seed: Optional[bytes] = None -): + graph: GraphView, iter_count: int = 20, seed: Optional[bytes] = None +) -> OutputNodeState: """ Computes components using a label propagation algorithm Arguments: graph (GraphView): A reference to the graph - iter_count: Number of iterations + iter_count (int): Number of iterations. Defaults to 20. seed (bytes, optional): Array of 32 bytes of u8 which is set as the rng seed Returns: - PyOutputNodeState: NodeState mapping nodes to community id + OutputNodeState: NodeState mapping nodes to community id """ diff --git a/python/python/raphtory/filter/__init__.pyi b/python/python/raphtory/filter/__init__.pyi index 9eb7ac3078..05eb32788e 100644 --- a/python/python/raphtory/filter/__init__.pyi +++ b/python/python/raphtory/filter/__init__.pyi @@ -15,13 +15,17 @@ from raphtory.algorithms import * from raphtory.vectors import * from raphtory.node_state import * from raphtory.graphql import * +from raphtory.gql import * from raphtory.typing import * import numpy as np from numpy.typing import NDArray from datetime import datetime +import pandas from pandas import DataFrame +import pyarrow # type: ignore[import-untyped] from pyarrow import DataType # type: ignore[import-untyped] from os import PathLike +from decimal import Decimal import networkx as nx # type: ignore import pyvis # type: ignore from raphtory.iterables import * @@ -89,14 +93,29 @@ class FilterOps(object): def __ne__(self, value): """Return self!=value.""" - def all(self): - """Requires that **all** elements match when the underlying property is list-like.""" + def all(self) -> filter.PropertyFilterOps: + """ + Requires that **all** elements match when the underlying property is list-like. + + Returns: + filter.PropertyFilterOps: + """ - def any(self): - """Requires that **any** element matches when the underlying property is list-like.""" + def any(self) -> filter.PropertyFilterOps: + """ + Requires that **any** element matches when the underlying property is list-like. - def avg(self): - """Averages list elements when the underlying property is numeric and list-like.""" + Returns: + filter.PropertyFilterOps: + """ + + def avg(self) -> filter.PropertyFilterOps: + """ + Averages list elements when the underlying property is numeric and list-like. + + Returns: + filter.PropertyFilterOps: + """ def contains(self, value: Prop) -> filter.FilterExpr: """ @@ -120,8 +139,13 @@ class FilterOps(object): filter.FilterExpr: A filter expression evaluating suffix matching. """ - def first(self): - """Selects the first element when the underlying property is list-like.""" + def first(self) -> filter.PropertyFilterOps: + """ + Selects the first element when the underlying property is list-like. + + Returns: + filter.PropertyFilterOps: + """ def fuzzy_search( self, prop_value: str, levenshtein_distance: int, prefix_match: bool @@ -178,17 +202,37 @@ class FilterOps(object): filter.FilterExpr: A filter expression evaluating `value is not None`. """ - def last(self): - """Selects the last element when the underlying property is list-like.""" + def last(self) -> filter.PropertyFilterOps: + """ + Selects the last element when the underlying property is list-like. - def len(self): - """Returns the list length when the underlying property is list-like.""" + Returns: + filter.PropertyFilterOps: + """ - def max(self): - """Returns the maximum list element when the underlying property is list-like.""" + def len(self) -> filter.PropertyFilterOps: + """ + Returns the list length when the underlying property is list-like. - def min(self): - """Returns the minimum list element when the underlying property is list-like.""" + Returns: + filter.PropertyFilterOps: + """ + + def max(self) -> filter.PropertyFilterOps: + """ + Returns the maximum list element when the underlying property is list-like. + + Returns: + filter.PropertyFilterOps: + """ + + def min(self) -> filter.PropertyFilterOps: + """ + Returns the minimum list element when the underlying property is list-like. + + Returns: + filter.PropertyFilterOps: + """ def not_contains(self, value: Prop) -> filter.FilterExpr: """ @@ -212,8 +256,13 @@ class FilterOps(object): filter.FilterExpr: A filter expression evaluating prefix matching. """ - def sum(self): - """Sums list elements when the underlying property is numeric and list-like.""" + def sum(self) -> filter.PropertyFilterOps: + """ + Sums list elements when the underlying property is numeric and list-like. + + Returns: + filter.PropertyFilterOps: + """ class PropertyFilterOps(FilterOps): """ @@ -244,7 +293,7 @@ class Node(object): """ @staticmethod - def after(time: int): + def after(time: int) -> filter.NodeViewPropsFilterBuilder: """ Restricts node evaluation to times strictly after the given time. @@ -252,11 +301,11 @@ class Node(object): time (int): Lower time bound. Returns: - filter.NodeViewPropsFilterBuilder + filter.NodeViewPropsFilterBuilder: """ @staticmethod - def at(time: int): + def at(time: int) -> filter.NodeViewPropsFilterBuilder: """ Restricts node evaluation to a single point in time. @@ -264,11 +313,11 @@ class Node(object): time (int): Event time. Returns: - filter.NodeViewPropsFilterBuilder + filter.NodeViewPropsFilterBuilder: """ @staticmethod - def before(time: int): + def before(time: int) -> filter.NodeViewPropsFilterBuilder: """ Restricts node evaluation to times strictly before the given time. @@ -276,40 +325,51 @@ class Node(object): time (int): Upper time bound. Returns: - filter.NodeViewPropsFilterBuilder + filter.NodeViewPropsFilterBuilder: """ @staticmethod - def by_state_column(state, col): ... + def by_state_column(state: OutputNodeState, col: str) -> filter.FilterExpr: + """ + Build a node filter from a boolean column of an existing node-state result. + + Arguments: + state (OutputNodeState): A pre-computed node state (e.g. from an algorithm). + col (str): Name of the boolean column on `state` whose values determine inclusion. + + Returns: + filter.FilterExpr: + """ + @staticmethod - def id(): + def id() -> filter.NodeIdFilterBuilder: """ Selects the node ID field for filtering. Returns: - filter.NodeIdFilterBuilder + filter.NodeIdFilterBuilder: """ @staticmethod - def is_active(): + def is_active() -> filter.FilterExpr: """ Matches nodes that have at least one event in the current view. Returns: - filter.FilterExpr + filter.FilterExpr: """ @staticmethod - def latest(): + def latest() -> filter.NodeViewPropsFilterBuilder: """ Evaluates filters against the latest available state of each node. Returns: - filter.NodeViewPropsFilterBuilder + filter.NodeViewPropsFilterBuilder: """ @staticmethod - def layer(layer: str): + def layer(layer: str) -> filter.NodeViewPropsFilterBuilder: """ Restricts evaluation to nodes belonging to the given layer. @@ -317,11 +377,11 @@ class Node(object): layer (str): Layer name. Returns: - filter.NodeViewPropsFilterBuilder + filter.NodeViewPropsFilterBuilder: """ @staticmethod - def layers(layers: list[str]): + def layers(layers: list[str]) -> filter.NodeViewPropsFilterBuilder: """ Restricts evaluation to nodes belonging to any of the given layers. @@ -329,11 +389,11 @@ class Node(object): layers (list[str]): Layer names. Returns: - filter.NodeViewPropsFilterBuilder + filter.NodeViewPropsFilterBuilder: """ @staticmethod - def metadata(name: str): + def metadata(name: str) -> filter.FilterOps: """ Filters a node metadata field by name. @@ -343,29 +403,29 @@ class Node(object): name (str): Metadata key. Returns: - filter.FilterOps + filter.FilterOps: """ @staticmethod - def name(): + def name() -> filter.NodeNameFilterBuilder: """ Selects the node name field for filtering. Returns: - filter.NodeNameFilterBuilder + filter.NodeNameFilterBuilder: """ @staticmethod - def node_type(): + def node_type() -> filter.NodeTypeFilterBuilder: """ Selects the node type field for filtering. Returns: - filter.NodeTypeFilterBuilder + filter.NodeTypeFilterBuilder: """ @staticmethod - def property(name: str): + def property(name: str) -> filter.PropertyFilterOps: """ Filters a node property by name. @@ -375,11 +435,11 @@ class Node(object): name (str): Property key. Returns: - filter.PropertyFilterOps + filter.PropertyFilterOps: """ @staticmethod - def snapshot_at(time: int): + def snapshot_at(time: int) -> filter.NodeViewPropsFilterBuilder: """ Evaluates filters against a snapshot of the graph at a given time. @@ -387,20 +447,20 @@ class Node(object): time (int): Snapshot time. Returns: - filter.NodeViewPropsFilterBuilder + filter.NodeViewPropsFilterBuilder: """ @staticmethod - def snapshot_latest(): + def snapshot_latest() -> filter.NodeViewPropsFilterBuilder: """ Evaluates filters against the most recent snapshot of the graph. Returns: - filter.NodeViewPropsFilterBuilder + filter.NodeViewPropsFilterBuilder: """ @staticmethod - def window(start: int, end: int): + def window(start: int, end: int) -> filter.NodeViewPropsFilterBuilder: """ Restricts node evaluation to the given time window. @@ -411,7 +471,7 @@ class Node(object): end (int): End time. Returns: - filter.NodeViewPropsFilterBuilder + filter.NodeViewPropsFilterBuilder: """ class NodeIdFilterBuilder(object): @@ -791,7 +851,7 @@ class Edge(object): """ @staticmethod - def after(time: int): + def after(time: int) -> filter.EdgeViewPropsFilterBuilder: """ Restricts edge evaluation to times strictly after the given time. @@ -799,11 +859,11 @@ class Edge(object): time (int): Lower time bound. Returns: - filter.EdgeViewPropsFilterBuilder + filter.EdgeViewPropsFilterBuilder: """ @staticmethod - def at(time: int): + def at(time: int) -> filter.EdgeViewPropsFilterBuilder: """ Restricts edge evaluation to a single point in time. @@ -811,11 +871,11 @@ class Edge(object): time (int): Event time. Returns: - filter.EdgeViewPropsFilterBuilder + filter.EdgeViewPropsFilterBuilder: """ @staticmethod - def before(time: int): + def before(time: int) -> filter.EdgeViewPropsFilterBuilder: """ Restricts edge evaluation to times strictly before the given time. @@ -823,65 +883,65 @@ class Edge(object): time (int): Upper time bound. Returns: - filter.EdgeViewPropsFilterBuilder + filter.EdgeViewPropsFilterBuilder: """ @staticmethod - def dst(): + def dst() -> filter.EdgeEndpoint: """ Selects the edge **destination endpoint** for filtering. Returns: - filter.EdgeEndpoint + filter.EdgeEndpoint: """ @staticmethod - def is_active(): + def is_active() -> filter.FilterExpr: """ Matches edges that have at least one event in the current view. Returns: - filter.FilterExpr + filter.FilterExpr: """ @staticmethod - def is_deleted(): + def is_deleted() -> filter.FilterExpr: """ Matches edges that have been deleted. Returns: - filter.FilterExpr + filter.FilterExpr: """ @staticmethod - def is_self_loop(): + def is_self_loop() -> filter.FilterExpr: """ Matches edges that are self-loops (source == destination). Returns: - filter.FilterExpr + filter.FilterExpr: """ @staticmethod - def is_valid(): + def is_valid() -> filter.FilterExpr: """ Matches edges that are structurally valid in the current view. Returns: - filter.FilterExpr + filter.FilterExpr: """ @staticmethod - def latest(): + def latest() -> filter.EdgeViewPropsFilterBuilder: """ Evaluates edge predicates against the latest available edge state. Returns: - filter.EdgeViewPropsFilterBuilder + filter.EdgeViewPropsFilterBuilder: """ @staticmethod - def layer(layer: str): + def layer(layer: str) -> filter.EdgeViewPropsFilterBuilder: """ Restricts evaluation to edges belonging to the given layer. @@ -889,11 +949,11 @@ class Edge(object): layer (str): Layer name. Returns: - filter.EdgeViewPropsFilterBuilder + filter.EdgeViewPropsFilterBuilder: """ @staticmethod - def layers(layers: list[str]): + def layers(layers: list[str]) -> filter.EdgeViewPropsFilterBuilder: """ Restricts evaluation to edges belonging to any of the given layers. @@ -901,11 +961,11 @@ class Edge(object): layers (list[str]): Layer names. Returns: - filter.EdgeViewPropsFilterBuilder + filter.EdgeViewPropsFilterBuilder: """ @staticmethod - def metadata(name: str): + def metadata(name: str) -> filter.FilterOps: """ Filters an edge metadata field by name. @@ -915,11 +975,11 @@ class Edge(object): name (str): Metadata key. Returns: - filter.FilterOps + filter.FilterOps: """ @staticmethod - def property(name: str): + def property(name: str) -> filter.PropertyFilterOps: """ Filters an edge property by name. @@ -929,11 +989,11 @@ class Edge(object): name (str): Property key. Returns: - filter.PropertyFilterOps + filter.PropertyFilterOps: """ @staticmethod - def snapshot_at(time: int): + def snapshot_at(time: int) -> filter.EdgeViewPropsFilterBuilder: """ Evaluates edge predicates against a snapshot of the graph at a given time. @@ -941,29 +1001,29 @@ class Edge(object): time (int): Snapshot time. Returns: - filter.EdgeViewPropsFilterBuilder + filter.EdgeViewPropsFilterBuilder: """ @staticmethod - def snapshot_latest(): + def snapshot_latest() -> filter.EdgeViewPropsFilterBuilder: """ Evaluates edge predicates against the most recent snapshot of the graph. Returns: - filter.EdgeViewPropsFilterBuilder + filter.EdgeViewPropsFilterBuilder: """ @staticmethod - def src(): + def src() -> filter.EdgeEndpoint: """ Selects the edge **source endpoint** for filtering. Returns: - filter.EdgeEndpoint + filter.EdgeEndpoint: """ @staticmethod - def window(start: int, end: int): + def window(start: int, end: int) -> filter.EdgeViewPropsFilterBuilder: """ Restricts edge evaluation to the given time window. @@ -974,7 +1034,7 @@ class Edge(object): end (int): End time. Returns: - filter.EdgeViewPropsFilterBuilder + filter.EdgeViewPropsFilterBuilder: """ class EdgeEndpoint(object): @@ -991,15 +1051,15 @@ class EdgeEndpoint(object): Edge.src().property("country") == "UK" """ - def id(self): + def id(self) -> filter.EdgeEndpointIdFilter: """ Selects the endpoint node ID field for filtering. Returns: - filter.EdgeEndpointIdFilter + filter.EdgeEndpointIdFilter: """ - def metadata(self, name: str): + def metadata(self, name: str) -> filter.FilterOps: """ Filters an endpoint node metadata field by name. @@ -1009,26 +1069,26 @@ class EdgeEndpoint(object): name (str): Metadata key. Returns: - filter.FilterOps + filter.FilterOps: """ - def name(self): + def name(self) -> filter.EdgeEndpointNameFilter: """ Selects the endpoint node name field for filtering. Returns: - filter.EdgeEndpointNameFilter + filter.EdgeEndpointNameFilter: """ - def node_type(self): + def node_type(self) -> filter.EdgeEndpointTypeFilter: """ Selects the endpoint node type field for filtering. Returns: - filter.EdgeEndpointTypeFilter + filter.EdgeEndpointTypeFilter: """ - def property(self, name: str): + def property(self, name: str) -> filter.PropertyFilterOps: """ Filters an endpoint node property by name. @@ -1038,7 +1098,7 @@ class EdgeEndpoint(object): name (str): Property key. Returns: - filter.PropertyFilterOps + filter.PropertyFilterOps: """ class EdgeEndpointIdFilter(object): @@ -1415,7 +1475,7 @@ class ExplodedEdge(object): """ @staticmethod - def after(time: int): + def after(time: int) -> filter.EdgeViewPropsFilterBuilder: """ Restricts exploded edge evaluation to times strictly after the given time. @@ -1423,11 +1483,11 @@ class ExplodedEdge(object): time (int): Lower time bound. Returns: - filter.EdgeViewPropsFilterBuilder + filter.EdgeViewPropsFilterBuilder: """ @staticmethod - def at(time: int): + def at(time: int) -> filter.EdgeViewPropsFilterBuilder: """ Restricts exploded edge evaluation to a single point in time. @@ -1435,11 +1495,11 @@ class ExplodedEdge(object): time (int): Event time. Returns: - filter.EdgeViewPropsFilterBuilder + filter.EdgeViewPropsFilterBuilder: """ @staticmethod - def before(time: int): + def before(time: int) -> filter.EdgeViewPropsFilterBuilder: """ Restricts exploded edge evaluation to times strictly before the given time. @@ -1447,56 +1507,56 @@ class ExplodedEdge(object): time (int): Upper time bound. Returns: - filter.EdgeViewPropsFilterBuilder + filter.EdgeViewPropsFilterBuilder: """ @staticmethod - def is_active(): + def is_active() -> filter.FilterExpr: """ Matches exploded edges that have at least one event in the current view. Returns: - filter.FilterExpr + filter.FilterExpr: """ @staticmethod - def is_deleted(): + def is_deleted() -> filter.FilterExpr: """ Matches exploded edges that have been deleted. Returns: - filter.FilterExpr + filter.FilterExpr: """ @staticmethod - def is_self_loop(): + def is_self_loop() -> filter.FilterExpr: """ Matches exploded edges that are self-loops (source == destination). Returns: - filter.FilterExpr + filter.FilterExpr: """ @staticmethod - def is_valid(): + def is_valid() -> filter.FilterExpr: """ Matches exploded edges that are structurally valid in the current view. Returns: - filter.FilterExpr + filter.FilterExpr: """ @staticmethod - def latest(): + def latest() -> filter.EdgeViewPropsFilterBuilder: """ Evaluates exploded edge predicates against the latest available state. Returns: - filter.EdgeViewPropsFilterBuilder + filter.EdgeViewPropsFilterBuilder: """ @staticmethod - def layer(layer: str): + def layer(layer: str) -> filter.EdgeViewPropsFilterBuilder: """ Restricts evaluation to exploded edges belonging to the given layer. @@ -1504,11 +1564,11 @@ class ExplodedEdge(object): layer (str): Layer name. Returns: - filter.EdgeViewPropsFilterBuilder + filter.EdgeViewPropsFilterBuilder: """ @staticmethod - def layers(layers: list[str]): + def layers(layers: list[str]) -> filter.EdgeViewPropsFilterBuilder: """ Restricts evaluation to exploded edges belonging to any of the given layers. @@ -1516,11 +1576,11 @@ class ExplodedEdge(object): layers (list[str]): Layer names. Returns: - filter.EdgeViewPropsFilterBuilder + filter.EdgeViewPropsFilterBuilder: """ @staticmethod - def metadata(name: str): + def metadata(name: str) -> filter.FilterOps: """ Filters an exploded edge metadata field by name. @@ -1530,11 +1590,11 @@ class ExplodedEdge(object): name (str): Metadata key. Returns: - filter.FilterOps + filter.FilterOps: """ @staticmethod - def property(name: str): + def property(name: str) -> filter.PropertyFilterOps: """ Filters an exploded edge property by name. @@ -1544,11 +1604,11 @@ class ExplodedEdge(object): name (str): Property key. Returns: - filter.PropertyFilterOps + filter.PropertyFilterOps: """ @staticmethod - def snapshot_at(time: int): + def snapshot_at(time: int) -> filter.EdgeViewPropsFilterBuilder: """ Evaluates exploded edge predicates against a snapshot of the graph at a given time. @@ -1556,20 +1616,20 @@ class ExplodedEdge(object): time (int): Snapshot time. Returns: - filter.EdgeViewPropsFilterBuilder + filter.EdgeViewPropsFilterBuilder: """ @staticmethod - def snapshot_latest(): + def snapshot_latest() -> filter.EdgeViewPropsFilterBuilder: """ Evaluates exploded edge predicates against the most recent snapshot of the graph. Returns: - filter.EdgeViewPropsFilterBuilder + filter.EdgeViewPropsFilterBuilder: """ @staticmethod - def window(start: int, end: int): + def window(start: int, end: int) -> filter.EdgeViewPropsFilterBuilder: """ Restricts exploded edge evaluation to the given time window. @@ -1580,7 +1640,7 @@ class ExplodedEdge(object): end (int): End time. Returns: - filter.EdgeViewPropsFilterBuilder + filter.EdgeViewPropsFilterBuilder: """ class Graph(object): @@ -1603,7 +1663,7 @@ class Graph(object): """ @staticmethod - def after(time: int): + def after(time: int) -> filter.ViewFilterBuilder: """ Restricts evaluation to times strictly after the given time. @@ -1611,11 +1671,11 @@ class Graph(object): time (int): Lower time bound. Returns: - filter.ViewFilterBuilder + filter.ViewFilterBuilder: """ @staticmethod - def at(time: int): + def at(time: int) -> filter.ViewFilterBuilder: """ Restricts evaluation to a single point in time. @@ -1623,11 +1683,11 @@ class Graph(object): time (int): Event time. Returns: - filter.ViewFilterBuilder + filter.ViewFilterBuilder: """ @staticmethod - def before(time: int): + def before(time: int) -> filter.ViewFilterBuilder: """ Restricts evaluation to times strictly before the given time. @@ -1635,20 +1695,20 @@ class Graph(object): time (int): Upper time bound. Returns: - filter.ViewFilterBuilder + filter.ViewFilterBuilder: """ @staticmethod - def latest(): + def latest() -> filter.ViewFilterBuilder: """ Evaluates filters against the latest available state of the graph. Returns: - filter.ViewFilterBuilder + filter.ViewFilterBuilder: """ @staticmethod - def layer(layer: str): + def layer(layer: str) -> filter.ViewFilterBuilder: """ Restricts evaluation to a single layer. @@ -1656,11 +1716,11 @@ class Graph(object): layer (str): Layer name. Returns: - filter.ViewFilterBuilder + filter.ViewFilterBuilder: """ @staticmethod - def layers(layers: list[str]): + def layers(layers: list[str]) -> filter.ViewFilterBuilder: """ Restricts evaluation to any of the given layers. @@ -1668,11 +1728,11 @@ class Graph(object): layers (list[str]): Layer names. Returns: - filter.ViewFilterBuilder + filter.ViewFilterBuilder: """ @staticmethod - def snapshot_at(time: int): + def snapshot_at(time: int) -> filter.ViewFilterBuilder: """ Evaluates filters against a snapshot of the graph at a given time. @@ -1680,20 +1740,20 @@ class Graph(object): time (int): Snapshot time. Returns: - filter.ViewFilterBuilder + filter.ViewFilterBuilder: """ @staticmethod - def snapshot_latest(): + def snapshot_latest() -> filter.ViewFilterBuilder: """ Evaluates filters against the most recent snapshot of the graph. Returns: - filter.ViewFilterBuilder + filter.ViewFilterBuilder: """ @staticmethod - def window(start: int, end: int): + def window(start: int, end: int) -> filter.ViewFilterBuilder: """ Restricts evaluation to events within a time window. @@ -1704,5 +1764,5 @@ class Graph(object): end (int): End time. Returns: - filter.ViewFilterBuilder + filter.ViewFilterBuilder: """ diff --git a/python/python/raphtory/gql/__init__.pyi b/python/python/raphtory/gql/__init__.pyi new file mode 100644 index 0000000000..52dc70413d --- /dev/null +++ b/python/python/raphtory/gql/__init__.pyi @@ -0,0 +1,103 @@ +from __future__ import annotations + +############################################################################### +# # +# AUTOGENERATED TYPE STUB FILE # +# # +# This file was automatically generated. Do not modify it directly. # +# Any changes made here may be lost when the file is regenerated. # +# # +############################################################################### + +from typing import * +from raphtory import * +import raphtory.filter as filter +from raphtory.algorithms import * +from raphtory.vectors import * +from raphtory.node_state import * +from raphtory.graphql import * +from raphtory.typing import * +import numpy as np +from numpy.typing import NDArray +from datetime import datetime +import pandas +from pandas import DataFrame +import pyarrow # type: ignore[import-untyped] +from pyarrow import DataType # type: ignore[import-untyped] +from os import PathLike +from decimal import Decimal +import networkx as nx # type: ignore +import pyvis # type: ignore +from raphtory.iterables import * + +__all__ = ["gql", "register_procedure", "GqlResult", "GqlRow"] + +def gql(graph: Any, query: Any, params=None): + """ + Execute a GQL query against a Raphtory graph view. + + Args: + graph: A Raphtory GraphView to query + query: A GQL query string + + Returns: + A GqlResult object with table display, pandas conversion, and iteration. + """ + +def register_procedure( + graph: Any, name: Any, input_params: Any, output_params: Any, data: Any +): + """ + Register a mock procedure with the cached coordinator. + + Args: + graph: A Raphtory GraphView (needed to ensure coordinator is initialized) + name: Procedure name (e.g., "test.my.proc") + input_params: List of input parameter names + output_params: List of output parameter names + data: List of dicts, each dict maps column name -> value + """ + +class GqlResult(object): + """GQL query result with table display, pandas conversion, and iteration.""" + + def __getitem__(self, key): + """Return self[key].""" + + def __iter__(self): + """Implement iter(self).""" + + def __len__(self): + """Return len(self).""" + + def __repr__(self): + """Return repr(self).""" + + def __str__(self): + """Return str(self).""" + + @property + def columns(self): + """Get column names.""" + + def num_columns(self): + """Number of columns.""" + + def num_rows(self): + """Number of rows.""" + + def to_df(self): + """Convert to a pandas DataFrame.""" + +class GqlRow(object): + """A single row from a GQL query result.""" + + def __getitem__(self, key): + """Return self[key].""" + + def __repr__(self): + """Return repr(self).""" + + def keys(self): ... + def to_dict(self): ... + def values(self): ... diff --git a/python/python/raphtory/graph_gen/__init__.pyi b/python/python/raphtory/graph_gen/__init__.pyi index 91b8aa18d6..84c2b739ef 100644 --- a/python/python/raphtory/graph_gen/__init__.pyi +++ b/python/python/raphtory/graph_gen/__init__.pyi @@ -20,20 +20,26 @@ from raphtory.algorithms import * from raphtory.vectors import * from raphtory.node_state import * from raphtory.graphql import * +from raphtory.gql import * from raphtory.typing import * import numpy as np from numpy.typing import NDArray from datetime import datetime +import pandas from pandas import DataFrame +import pyarrow # type: ignore[import-untyped] from pyarrow import DataType # type: ignore[import-untyped] from os import PathLike +from decimal import Decimal import networkx as nx # type: ignore import pyvis # type: ignore from raphtory.iterables import * __all__ = ["random_attachment", "ba_preferential_attachment"] -def random_attachment(g: Any, nodes_to_add: Any, edges_per_step: Any, seed: Any = None): +def random_attachment( + g: Any, nodes_to_add: Any, edges_per_step: Any, seed: Any = None +) -> None: """ Generates a graph using the random attachment model @@ -48,12 +54,12 @@ def random_attachment(g: Any, nodes_to_add: Any, edges_per_step: Any, seed: Any seed: The seed used in rng, an array of length 32 containing ints (ints must have a max size of u8) Returns: - None + None: """ def ba_preferential_attachment( g: Any, nodes_to_add: Any, edges_per_step: Any, seed: Any = None -): +) -> None: """ Generates a graph using the preferential attachment model. @@ -75,6 +81,5 @@ def ba_preferential_attachment( seed: The seed used in rng, an array of length 32 containing ints (ints must have a max size of u8) Returns: - - None + None: """ diff --git a/python/python/raphtory/graph_loader/__init__.pyi b/python/python/raphtory/graph_loader/__init__.pyi index 16af5328ff..e5901cb17f 100644 --- a/python/python/raphtory/graph_loader/__init__.pyi +++ b/python/python/raphtory/graph_loader/__init__.pyi @@ -20,13 +20,17 @@ from raphtory.algorithms import * from raphtory.vectors import * from raphtory.node_state import * from raphtory.graphql import * +from raphtory.gql import * from raphtory.typing import * import numpy as np from numpy.typing import NDArray from datetime import datetime +import pandas from pandas import DataFrame +import pyarrow # type: ignore[import-untyped] from pyarrow import DataType # type: ignore[import-untyped] from os import PathLike +from decimal import Decimal import networkx as nx # type: ignore import pyvis # type: ignore from raphtory.iterables import * @@ -70,7 +74,7 @@ def lotr_graph_with_props() -> Graph: """ def neo4j_movie_graph( - uri: str, username: str, password: str, database: str = ... + uri: str, username: str, password: str, database: str = "neo4j" ) -> Graph: """ Returns the neo4j movie graph example. @@ -79,7 +83,7 @@ def neo4j_movie_graph( uri (str): username (str): password (str): - database (str): + database (str): Neo4j database name. Defaults to "neo4j". Returns: Graph: diff --git a/python/python/raphtory/graphql/__init__.pyi b/python/python/raphtory/graphql/__init__.pyi index 698ea7e234..b5c21e2146 100644 --- a/python/python/raphtory/graphql/__init__.pyi +++ b/python/python/raphtory/graphql/__init__.pyi @@ -15,13 +15,17 @@ import raphtory.filter as filter from raphtory.algorithms import * from raphtory.vectors import * from raphtory.node_state import * +from raphtory.gql import * from raphtory.typing import * import numpy as np from numpy.typing import NDArray from datetime import datetime +import pandas from pandas import DataFrame +import pyarrow # type: ignore[import-untyped] from pyarrow import DataType # type: ignore[import-untyped] from os import PathLike +from decimal import Decimal import networkx as nx # type: ignore import pyvis # type: ignore from raphtory.iterables import * @@ -44,6 +48,7 @@ __all__ = [ "decode_graph", "schema", "cli", + "has_permissions_extension", ] class GraphServer(object): @@ -56,13 +61,26 @@ class GraphServer(object): cache_tti_seconds (int, optional): the inactive time in seconds after which a graph is evicted from the cache log_level (str, optional): the log level for the server tracing (bool, optional): whether tracing should be enabled + tracing_level (str, optional): tracing verbosity (e.g. "ERROR", "WARN", "INFO", "DEBUG", "TRACE"). otlp_agent_host (str, optional): OTLP agent host for tracing otlp_agent_port(str, optional): OTLP agent port for tracing otlp_tracing_service_name (str, optional): The OTLP tracing service name config_path (str | PathLike, optional): Path to the config file - auth_public_key: - auth_enabled_for_reads: - create_index: + auth_public_key (str, optional): Base64-encoded public key used to verify bearer tokens + require_auth_for_reads (bool, optional): Require auth tokens for read queries + create_index (bool, optional): Build a search index on startup + heavy_query_limit (int, optional): Maximum number of expensive traversal queries (outComponent, inComponent, edges, outEdges, inEdges, neighbours, outNeighbours, inNeighbours) allowed to run simultaneously. Extra queries are parked on a semaphore. + exclusive_writes (bool, optional): If True, ingestion/write operations run one at a time and block reads until complete. + disable_batching (bool, optional): If True, batched GraphQL requests are rejected. Prevents bypassing per-request depth/complexity limits. + max_batch_size (int, optional): Caps the number of queries accepted in a single batched request. + disable_lists (bool, optional): If True, bulk `list` endpoints on collections are disabled. Clients must use `page` instead. + max_page_size (int, optional): Maximum page size allowed on paged collection queries. + max_query_depth (int, optional): Maximum nesting depth of a query. + max_query_complexity (int, optional): Maximum estimated cost of a query, based on the number of fields selected. + max_recursive_depth (int, optional): Internal safety limit to prevent stack overflows from pathologically structured queries (async-graphql default is 32). + max_directives_per_field (int, optional): Maximum number of directives on any single field. + disable_introspection (bool, optional): If True, schema introspection is disabled entirely. + permissions_store_path (str | PathLike, optional): Path to the permissions store (used by the optional auth extension). """ def __new__( @@ -72,14 +90,26 @@ class GraphServer(object): cache_tti_seconds: Optional[int] = None, log_level: Optional[str] = None, tracing: Optional[bool] = None, - tracing_level=None, + tracing_level: Optional[str] = None, otlp_agent_host: Optional[str] = None, otlp_agent_port: Optional[str] = None, otlp_tracing_service_name: Optional[str] = None, - auth_public_key: Any = None, - auth_enabled_for_reads: Any = None, + auth_public_key: Optional[str] = None, + require_auth_for_reads: Optional[bool] = None, config_path: Optional[str | PathLike] = None, - create_index: Any = None, + create_index: Optional[bool] = None, + heavy_query_limit: Optional[int] = None, + exclusive_writes: Optional[bool] = None, + disable_batching: Optional[bool] = None, + max_batch_size: Optional[int] = None, + disable_lists: Optional[bool] = None, + max_page_size: Optional[int] = None, + max_query_depth: Optional[int] = None, + max_query_complexity: Optional[int] = None, + max_recursive_depth: Optional[int] = None, + max_directives_per_field: Optional[int] = None, + disable_introspection: Optional[bool] = None, + permissions_store_path: Optional[str | PathLike] = None, ) -> GraphServer: """Create and return a new object. See help(type) for accurate signature.""" @@ -109,15 +139,20 @@ class GraphServer(object): RunningGraphServer: The running server """ - def turn_off_index(self): - """Turn off index for all graphs""" + def turn_off_index(self) -> None: + """ + Turn off index for all graphs. + + Returns: + None: + """ def vectorise_all_graphs( self, embeddings: OpenAIEmbeddings, nodes: bool | str = True, edges: bool | str = True, - ): + ) -> None: """ Vectorise all graphs in the server working directory. @@ -125,6 +160,9 @@ class GraphServer(object): embeddings (OpenAIEmbeddings): the embeddings to use nodes (bool | str): if nodes have to be embedded or not or the custom template to use if a str is provided. Defaults to True. edges (bool | str): if edges have to be embedded or not or the custom template to use if a str is provided. Defaults to True. + + Returns: + None: """ def vectorise_graph( @@ -133,7 +171,7 @@ class GraphServer(object): embeddings: OpenAIEmbeddings, nodes: bool | str = True, edges: bool | str = True, - ): + ) -> None: """ Vectorise the graph name in the server working directory. @@ -142,6 +180,9 @@ class GraphServer(object): embeddings (OpenAIEmbeddings): the embeddings to use nodes (bool | str): if nodes have to be embedded or not or the custom template to use if a str is provided. Defaults to True. edges (bool | str): if edges have to be embedded or not or the custom template to use if a str is provided. Defaults to True. + + Returns: + None: """ class RunningGraphServer(object): @@ -149,20 +190,20 @@ class RunningGraphServer(object): def __enter__(self): ... def __exit__(self, _exc_type, _exc_val, _exc_tb): ... - def get_client(self): + def get_client(self) -> RaphtoryClient: """ - Get the client for the server + Get the client for the server. Returns: - RaphtoryClient: the client + RaphtoryClient: the client. """ - def stop(self): + def stop(self) -> None: """ - Stop the server and wait for it to finish + Stop the server and wait for it to finish. Returns: - None: + None: """ class RaphtoryClient(object): @@ -189,14 +230,16 @@ class RaphtoryClient(object): None: """ - def create_index(self, path: Any, index_spec, in_ram: bool = True) -> None: + def create_index( + self, path: str, index_spec: RemoteIndexSpec, in_ram: bool = True + ) -> None: """ Create Index for graph on the server at 'path' Arguments: - path: the path of the graph to be created - RemoteIndexSpec (RemoteIndexSpec): spec specifying the properties that need to be indexed - in_ram (bool): create index in ram + path (str): the path of the graph to be created + index_spec (RemoteIndexSpec): spec specifying the properties that need to be indexed + in_ram (bool): create index in ram. Defaults to True. Returns: None: @@ -266,7 +309,7 @@ class RaphtoryClient(object): Receive graph from a path path on the server Note: - This downloads a copy of the graph. Modifications are not persistet to the server. + This downloads a copy of the graph. Modifications are not persisted to the server. Arguments: path (str): the path of the graph to be received @@ -741,34 +784,48 @@ class AllPropertySpec(object): def __repr__(self): """Return repr(self).""" -def encode_graph(graph): +def encode_graph(graph: Graph | PersistentGraph) -> str: """ Encode a graph using Base64 encoding Arguments: - graph (Graph | PersistentGraph): the graph + graph (Graph | PersistentGraph): the graph Returns: - str: the encoded graph + str: the encoded graph """ -def decode_graph(graph): +def decode_graph(graph: str) -> Union[Graph, PersistentGraph]: """ Decode a Base64-encoded graph Arguments: - graph (str): the encoded graph + graph (str): the encoded graph Returns: - Union[Graph, PersistentGraph]: the decoded graph + Union[Graph, PersistentGraph]: the decoded graph """ -def schema(): +def schema() -> str: """ Returns the raphtory graphql server schema - Returns - str: Graphql schema + Returns: + str: Graphql schema + """ + +def cli() -> None: + """ + Run the Raphtory GraphQL CLI from Python. Uses `sys.argv` for arguments. + + Returns: + None: """ -def cli(): ... +def has_permissions_extension() -> bool: + """ + Returns True if the permissions extension (raphtory-auth) is compiled in. + + Returns: + bool: True if the extension is built in, False otherwise. + """ diff --git a/python/python/raphtory/iterables/__init__.pyi b/python/python/raphtory/iterables/__init__.pyi index 89fce51348..982ed7cc2c 100644 --- a/python/python/raphtory/iterables/__init__.pyi +++ b/python/python/raphtory/iterables/__init__.pyi @@ -16,13 +16,17 @@ from raphtory.algorithms import * from raphtory.vectors import * from raphtory.node_state import * from raphtory.graphql import * +from raphtory.gql import * from raphtory.typing import * import numpy as np from numpy.typing import NDArray from datetime import datetime +import pandas from pandas import DataFrame +import pyarrow # type: ignore[import-untyped] from pyarrow import DataType # type: ignore[import-untyped] from os import PathLike +from decimal import Decimal import networkx as nx # type: ignore import pyvis # type: ignore @@ -75,6 +79,16 @@ __all__ = [ "NestedResultUtcDateTimeIterable", "MetadataListList", "PyNestedPropsIterable", + "PyPropValueListList", + "PyTemporalPropsList", + "PyTemporalPropsListList", + "PyPropHistValueList", + "PyPropHistValueListList", + "PyTemporalPropList", + "PyTemporalPropListList", + "PyPropHistItemsList", + "PyPropHistItemsListList", + "PropIterable", ] class NestedUtcDateTimeIterable(object): @@ -105,7 +119,13 @@ class NestedUtcDateTimeIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... + def collect(self) -> list[list]: + """ + Materialise the nested iterable as a list of lists. + + Returns: + list[list]: + """ class NestedGIDIterable(object): def __eq__(self, value): @@ -135,9 +155,29 @@ class NestedGIDIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... - def max(self): ... - def min(self): ... + def collect(self) -> list[list]: + """ + Materialise the nested iterable as a list of lists. + + Returns: + list[list]: + """ + + def max(self) -> Any: + """ + Per-row maximum value (or None for empty rows). + + Returns: + Any: + """ + + def min(self) -> Any: + """ + Per-row minimum value (or None for empty rows). + + Returns: + Any: + """ class GIDIterable(object): def __eq__(self, value): @@ -167,9 +207,29 @@ class GIDIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... - def max(self): ... - def min(self): ... + def collect(self) -> list: + """ + Materialise the iterable as a Python list. + + Returns: + list: + """ + + def max(self) -> Any: + """ + Maximum value in the iterable, or `None` if empty. + + Returns: + Any: + """ + + def min(self) -> Any: + """ + Minimum value in the iterable, or `None` if empty. + + Returns: + Any: + """ class StringIterable(object): def __eq__(self, value): @@ -199,7 +259,13 @@ class StringIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... + def collect(self) -> list: + """ + Materialise the iterable as a Python list. + + Returns: + list: + """ class OptionArcStringIterable(object): def __eq__(self, value): @@ -229,7 +295,13 @@ class OptionArcStringIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... + def collect(self) -> list: + """ + Materialise the iterable as a Python list. + + Returns: + list: + """ class UsizeIterable(object): def __eq__(self, value): @@ -259,11 +331,45 @@ class UsizeIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... - def max(self): ... - def mean(self): ... - def min(self): ... - def sum(self): ... + def collect(self) -> list: + """ + Materialise the iterable as a Python list. + + Returns: + list: + """ + + def max(self) -> Any: + """ + Maximum value in the iterable, or `None` if empty. + + Returns: + Any: + """ + + def mean(self) -> float: + """ + Mean of all values in the iterable. + + Returns: + float: + """ + + def min(self) -> Any: + """ + Minimum value in the iterable, or `None` if empty. + + Returns: + Any: + """ + + def sum(self) -> Any: + """ + Sum of all values in the iterable. + + Returns: + Any: + """ class OptionI64Iterable(object): def __eq__(self, value): @@ -293,9 +399,29 @@ class OptionI64Iterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... - def max(self): ... - def min(self): ... + def collect(self) -> list: + """ + Materialise the iterable as a Python list. + + Returns: + list: + """ + + def max(self) -> Any: + """ + Maximum value in the iterable, or `None` if empty. + + Returns: + Any: + """ + + def min(self) -> Any: + """ + Minimum value in the iterable, or `None` if empty. + + Returns: + Any: + """ class NestedOptionArcStringIterable(object): def __eq__(self, value): @@ -325,7 +451,13 @@ class NestedOptionArcStringIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... + def collect(self) -> list[list]: + """ + Materialise the nested iterable as a list of lists. + + Returns: + list[list]: + """ class NestedStringIterable(object): def __eq__(self, value): @@ -355,7 +487,13 @@ class NestedStringIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... + def collect(self) -> list[list]: + """ + Materialise the nested iterable as a list of lists. + + Returns: + list[list]: + """ class NestedOptionI64Iterable(object): def __eq__(self, value): @@ -385,9 +523,29 @@ class NestedOptionI64Iterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... - def max(self): ... - def min(self): ... + def collect(self) -> list[list]: + """ + Materialise the nested iterable as a list of lists. + + Returns: + list[list]: + """ + + def max(self) -> Any: + """ + Per-row maximum value (or None for empty rows). + + Returns: + Any: + """ + + def min(self) -> Any: + """ + Per-row minimum value (or None for empty rows). + + Returns: + Any: + """ class NestedI64VecIterable(object): def __eq__(self, value): @@ -417,7 +575,13 @@ class NestedI64VecIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... + def collect(self) -> list[list]: + """ + Materialise the nested iterable as a list of lists. + + Returns: + list[list]: + """ class NestedUsizeIterable(object): def __eq__(self, value): @@ -447,11 +611,45 @@ class NestedUsizeIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... - def max(self): ... - def mean(self): ... - def min(self): ... - def sum(self): ... + def collect(self) -> list[list]: + """ + Materialise the nested iterable as a list of lists. + + Returns: + list[list]: + """ + + def max(self) -> Any: + """ + Per-row maximum value (or None for empty rows). + + Returns: + Any: + """ + + def mean(self) -> Any: + """ + Per-row mean of values (one entry per outer row). + + Returns: + Any: + """ + + def min(self) -> Any: + """ + Per-row minimum value (or None for empty rows). + + Returns: + Any: + """ + + def sum(self) -> Any: + """ + Per-row sum of values (one entry per outer row). + + Returns: + Any: + """ class BoolIterable(object): def __eq__(self, value): @@ -481,7 +679,13 @@ class BoolIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... + def collect(self) -> list: + """ + Materialise the iterable as a Python list. + + Returns: + list: + """ class ArcStringIterable(object): def __iter__(self): @@ -493,7 +697,13 @@ class ArcStringIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... + def collect(self) -> list: + """ + Materialise the iterable as a Python list. + + Returns: + list: + """ class NestedVecUtcDateTimeIterable(object): def __eq__(self, value): @@ -523,7 +733,13 @@ class NestedVecUtcDateTimeIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... + def collect(self) -> list[list]: + """ + Materialise the nested iterable as a list of lists. + + Returns: + list[list]: + """ class OptionVecUtcDateTimeIterable(object): def __eq__(self, value): @@ -553,7 +769,13 @@ class OptionVecUtcDateTimeIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... + def collect(self) -> list: + """ + Materialise the iterable as a Python list. + + Returns: + list: + """ class GIDGIDIterable(object): def __eq__(self, value): @@ -583,9 +805,29 @@ class GIDGIDIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... - def max(self): ... - def min(self): ... + def collect(self) -> list: + """ + Materialise the iterable as a Python list. + + Returns: + list: + """ + + def max(self) -> Any: + """ + Maximum value in the iterable, or `None` if empty. + + Returns: + Any: + """ + + def min(self) -> Any: + """ + Minimum value in the iterable, or `None` if empty. + + Returns: + Any: + """ class NestedGIDGIDIterable(object): def __eq__(self, value): @@ -615,9 +857,29 @@ class NestedGIDGIDIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... - def max(self): ... - def min(self): ... + def collect(self) -> list[list]: + """ + Materialise the nested iterable as a list of lists. + + Returns: + list[list]: + """ + + def max(self) -> Any: + """ + Per-row maximum value (or None for empty rows). + + Returns: + Any: + """ + + def min(self) -> Any: + """ + Per-row minimum value (or None for empty rows). + + Returns: + Any: + """ class NestedBoolIterable(object): def __eq__(self, value): @@ -647,7 +909,13 @@ class NestedBoolIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... + def collect(self) -> list[list]: + """ + Materialise the nested iterable as a list of lists. + + Returns: + list[list]: + """ class U64Iterable(object): def __eq__(self, value): @@ -677,11 +945,45 @@ class U64Iterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... - def max(self): ... - def mean(self): ... - def min(self): ... - def sum(self): ... + def collect(self) -> list: + """ + Materialise the iterable as a Python list. + + Returns: + list: + """ + + def max(self) -> Any: + """ + Maximum value in the iterable, or `None` if empty. + + Returns: + Any: + """ + + def mean(self) -> float: + """ + Mean of all values in the iterable. + + Returns: + float: + """ + + def min(self) -> Any: + """ + Minimum value in the iterable, or `None` if empty. + + Returns: + Any: + """ + + def sum(self) -> Any: + """ + Sum of all values in the iterable. + + Returns: + Any: + """ class OptionUtcDateTimeIterable(object): def __eq__(self, value): @@ -711,7 +1013,13 @@ class OptionUtcDateTimeIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... + def collect(self) -> list: + """ + Materialise the iterable as a Python list. + + Returns: + list: + """ class ArcStringVecIterable(object): def __eq__(self, value): @@ -741,7 +1049,13 @@ class ArcStringVecIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... + def collect(self) -> list: + """ + Materialise the iterable as a Python list. + + Returns: + list: + """ class NestedArcStringVecIterable(object): def __eq__(self, value): @@ -771,7 +1085,13 @@ class NestedArcStringVecIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... + def collect(self) -> list[list]: + """ + Materialise the nested iterable as a list of lists. + + Returns: + list[list]: + """ class NestedEventTimeIterable(object): def __eq__(self, value): @@ -801,7 +1121,14 @@ class NestedEventTimeIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... + def collect(self) -> list[list]: + """ + Materialise the nested iterable as a list of lists. + + Returns: + list[list]: + """ + @property def dt(self) -> NestedResultUtcDateTimeIterable: """ @@ -823,8 +1150,22 @@ class NestedEventTimeIterable(object): NestedUsizeIterable: Nested iterable of event ids associated to each EventTime. """ - def max(self): ... - def min(self): ... + def max(self) -> Any: + """ + Per-row maximum value (or None for empty rows). + + Returns: + Any: + """ + + def min(self) -> Any: + """ + Per-row minimum value (or None for empty rows). + + Returns: + Any: + """ + @property def t(self) -> NestedI64Iterable: """ @@ -844,7 +1185,13 @@ class NestedArcStringIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... + def collect(self) -> list[list]: + """ + Materialise the nested iterable as a list of lists. + + Returns: + list[list]: + """ class NestedOptionEventTimeIterable(object): def __eq__(self, value): @@ -874,7 +1221,14 @@ class NestedOptionEventTimeIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... + def collect(self) -> list[list]: + """ + Materialise the nested iterable as a list of lists. + + Returns: + list[list]: + """ + @property def dt(self) -> NestedResultOptionUtcDateTimeIterable: """ @@ -896,8 +1250,22 @@ class NestedOptionEventTimeIterable(object): NestedOptionUsizeIterable: Nested iterable of event ids associated to each EventTime, if available. """ - def max(self): ... - def min(self): ... + def max(self) -> Any: + """ + Per-row maximum value (or None for empty rows). + + Returns: + Any: + """ + + def min(self) -> Any: + """ + Per-row minimum value (or None for empty rows). + + Returns: + Any: + """ + @property def t(self) -> NestedOptionI64Iterable: """ @@ -997,7 +1365,14 @@ class EventTimeIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... + def collect(self) -> list: + """ + Materialise the iterable as a Python list. + + Returns: + list: + """ + @property def dt(self) -> ResultUtcDateTimeIterable: """ @@ -1019,18 +1394,32 @@ class EventTimeIterable(object): UsizeIterable: Iterable of event ids associated to each EventTime. """ - def max(self): ... - def min(self): ... - @property - def t(self) -> I64Iterable: + def max(self) -> Any: """ - Change this Iterable of EventTime into an Iterable of corresponding Unix timestamps in milliseconds. + Maximum value in the iterable, or `None` if empty. Returns: - I64Iterable: Iterable of millisecond timestamps since the Unix epoch for each EventTime. + Any: """ -class OptionEventTimeIterable(object): + def min(self) -> Any: + """ + Minimum value in the iterable, or `None` if empty. + + Returns: + Any: + """ + + @property + def t(self) -> I64Iterable: + """ + Change this Iterable of EventTime into an Iterable of corresponding Unix timestamps in milliseconds. + + Returns: + I64Iterable: Iterable of millisecond timestamps since the Unix epoch for each EventTime. + """ + +class OptionEventTimeIterable(object): def __eq__(self, value): """Return self==value.""" @@ -1058,7 +1447,14 @@ class OptionEventTimeIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... + def collect(self) -> list: + """ + Materialise the iterable as a Python list. + + Returns: + list: + """ + @property def dt(self) -> ResultOptionUtcDateTimeIterable: """ @@ -1080,8 +1476,22 @@ class OptionEventTimeIterable(object): OptionUsizeIterable: Iterable of event ids associated to each EventTime, if available. """ - def max(self): ... - def min(self): ... + def max(self) -> Any: + """ + Maximum value in the iterable, or `None` if empty. + + Returns: + Any: + """ + + def min(self) -> Any: + """ + Minimum value in the iterable, or `None` if empty. + + Returns: + Any: + """ + @property def t(self) -> OptionI64Iterable: """ @@ -1280,9 +1690,29 @@ class OptionUsizeIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... - def max(self): ... - def min(self): ... + def collect(self) -> list: + """ + Materialise the iterable as a Python list. + + Returns: + list: + """ + + def max(self) -> Any: + """ + Maximum value in the iterable, or `None` if empty. + + Returns: + Any: + """ + + def min(self) -> Any: + """ + Minimum value in the iterable, or `None` if empty. + + Returns: + Any: + """ class ResultOptionUtcDateTimeIterable(object): def __iter__(self): @@ -1294,7 +1724,13 @@ class ResultOptionUtcDateTimeIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... + def collect(self) -> list[Optional[datetime]]: + """ + Materialise the iterable as a list of optional datetimes, raising if any element produced an error. + + Returns: + list[Optional[datetime]]: one entry per element (None where absent). + """ class I64Iterable(object): def __eq__(self, value): @@ -1324,11 +1760,45 @@ class I64Iterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... - def max(self): ... - def mean(self): ... - def min(self): ... - def sum(self): ... + def collect(self) -> list: + """ + Materialise the iterable as a Python list. + + Returns: + list: + """ + + def max(self) -> Any: + """ + Maximum value in the iterable, or `None` if empty. + + Returns: + Any: + """ + + def mean(self) -> float: + """ + Mean of all values in the iterable. + + Returns: + float: + """ + + def min(self) -> Any: + """ + Minimum value in the iterable, or `None` if empty. + + Returns: + Any: + """ + + def sum(self) -> Any: + """ + Sum of all values in the iterable. + + Returns: + Any: + """ class ResultUtcDateTimeIterable(object): def __iter__(self): @@ -1340,7 +1810,13 @@ class ResultUtcDateTimeIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... + def collect(self) -> list[datetime]: + """ + Materialise the iterable as a list of datetimes, raising if any element produced an error. + + Returns: + list[datetime]: one datetime per element. + """ class NestedHistoryTimestampIterable(object): def __iter__(self): @@ -1528,9 +2004,29 @@ class NestedOptionUsizeIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... - def max(self): ... - def min(self): ... + def collect(self) -> list[list]: + """ + Materialise the nested iterable as a list of lists. + + Returns: + list[list]: + """ + + def max(self) -> Any: + """ + Per-row maximum value (or None for empty rows). + + Returns: + Any: + """ + + def min(self) -> Any: + """ + Per-row minimum value (or None for empty rows). + + Returns: + Any: + """ class NestedResultOptionUtcDateTimeIterable(object): def __iter__(self): @@ -1542,7 +2038,13 @@ class NestedResultOptionUtcDateTimeIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... + def collect(self) -> list[list[Optional[datetime]]]: + """ + Materialise the iterable as a nested list of optional datetimes, raising if any element produced an error. + + Returns: + list[list[Optional[datetime]]]: one inner list per outer element (entries are None where absent). + """ class NestedI64Iterable(object): def __eq__(self, value): @@ -1572,11 +2074,45 @@ class NestedI64Iterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... - def max(self): ... - def mean(self): ... - def min(self): ... - def sum(self): ... + def collect(self) -> list[list]: + """ + Materialise the nested iterable as a list of lists. + + Returns: + list[list]: + """ + + def max(self) -> Any: + """ + Per-row maximum value (or None for empty rows). + + Returns: + Any: + """ + + def mean(self) -> Any: + """ + Per-row mean of values (one entry per outer row). + + Returns: + Any: + """ + + def min(self) -> Any: + """ + Per-row minimum value (or None for empty rows). + + Returns: + Any: + """ + + def sum(self) -> Any: + """ + Per-row sum of values (one entry per outer row). + + Returns: + Any: + """ class NestedResultUtcDateTimeIterable(object): def __iter__(self): @@ -1588,7 +2124,13 @@ class NestedResultUtcDateTimeIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... + def collect(self) -> list[list[datetime]]: + """ + Materialise the iterable as a nested list of datetimes, raising if any element produced an error. + + Returns: + list[list[datetime]]: one inner list per outer element. + """ class MetadataListList(object): def __contains__(self, key): @@ -1618,11 +2160,48 @@ class MetadataListList(object): def __ne__(self, value): """Return self!=value.""" - def as_dict(self): ... - def get(self, key): ... - def items(self): ... - def keys(self): ... - def values(self): ... + def as_dict(self) -> dict[str, list]: + """ + Materialise as a dict mapping each key to a list of value lists. + + Returns: + dict[str, list]: + """ + + def get(self, key: str) -> Optional[PyPropValueListList]: + """ + Look up the metadata for `key` across all entities. + + Arguments: + key (str): metadata key. + + Returns: + Optional[PyPropValueListList]: + """ + + def items(self) -> list[tuple[str, PyPropValueListList]]: + """ + Pairs of `(key, value list-of-lists)` for every metadata key. + + Returns: + list[tuple[str, PyPropValueListList]]: + """ + + def keys(self) -> list[str]: + """ + Metadata keys present across the underlying entities. + + Returns: + list[str]: + """ + + def values(self) -> list[PyPropValueListList]: + """ + Per-key list of value lists. + + Returns: + list[PyPropValueListList]: + """ class PyNestedPropsIterable(object): def __contains__(self, key): @@ -1660,7 +2239,7 @@ class PyNestedPropsIterable(object): dict[str, List[List[PropValue]]]: """ - def get(self, key: str): + def get(self, key: str) -> Optional[PyPropValueListList]: """ Get property value. @@ -1668,7 +2247,7 @@ class PyNestedPropsIterable(object): key (str): the name of the property. Returns: - PyPropValueListList: + Optional[PyPropValueListList]: """ def items(self) -> list[Tuple[str, List[PropValue]]]: @@ -1679,21 +2258,21 @@ class PyNestedPropsIterable(object): list[Tuple[str, List[PropValue]]]: """ - def keys(self): + def keys(self) -> list[str]: """ Get the names for all properties. Returns: - List[Str]: + list[str]: """ @property - def temporal(self): + def temporal(self) -> list[list[TemporalProperty]]: """ Get a view of the temporal properties only. Returns: - List[List[temporalprop]]: + list[list[TemporalProperty]]: """ def values(self) -> list[list[list[PropValue]]]: @@ -1704,3 +2283,793 @@ class PyNestedPropsIterable(object): Returns: list[list[list[PropValue]]]: """ + +class PyPropValueListList(object): + def __eq__(self, value): + """Return self==value.""" + + def __ge__(self, value): + """Return self>=value.""" + + def __gt__(self, value): + """Return self>value.""" + + def __iter__(self): + """Implement iter(self).""" + + def __le__(self, value): + """Return self<=value.""" + + def __len__(self): + """Return len(self).""" + + def __lt__(self, value): + """Return self list[PropValue]: + """ + Compute the average of all property values. Alias for mean(). + + Returns: + list[PropValue]: + """ + + def collect(self) -> list[list]: + """ + Materialise the nested iterable as a list of lists. + + Returns: + list[list]: + """ + + def count(self) -> UsizeIterable: + """ + Number of properties (or rows of properties). + + Returns: + UsizeIterable: + """ + + def drop_none(self) -> PyPropValueListList: + """ + Drop missing entries from each row. + + Returns: + PyPropValueListList: + """ + + def flatten(self) -> PyPropValueList: + """ + Flatten the nested iterable into a single list of values. + + Returns: + PyPropValueList: + """ + + def max(self) -> list[PropValue]: + """ + Find the maximum property value and its associated time. + + Returns: + list[PropValue]: + """ + + def mean(self) -> PyPropValueList: + """ + Mean property value across each row. + + Returns: + PyPropValueList: + """ + + def median(self) -> PyPropValueList: + """ + Median property value across each row. + + Returns: + PyPropValueList: + """ + + def min(self) -> list[PropValue]: + """ + Min property value. + + Returns: + list[PropValue]: + """ + + def sum(self) -> list[PropValue]: + """ + Sum of property values. + + Returns: + list[PropValue]: + """ + +class PyTemporalPropsList(object): + def __contains__(self, key): + """Return bool(key in self).""" + + def __eq__(self, value): + """Return self==value.""" + + def __ge__(self, value): + """Return self>=value.""" + + def __getitem__(self, key): + """Return self[key].""" + + def __gt__(self, value): + """Return self>value.""" + + def __iter__(self): + """Implement iter(self).""" + + def __le__(self, value): + """Return self<=value.""" + + def __lt__(self, value): + """Return self Optional[PyTemporalPropList]: + """ + Look up a temporal property by key. + + Arguments: + key (str): property key. + + Returns: + Optional[PyTemporalPropList]: + """ + + def histories(self) -> dict[str, PyPropHistItemsList]: + """ + Full update history of each property across the underlying entities. + + Returns: + dict[str, PyPropHistItemsList]: + """ + + def items(self) -> list[tuple[str, PyTemporalPropList]]: + """ + Pairs of `(key, temporal property list)` for every property key. + + Returns: + list[tuple[str, PyTemporalPropList]]: + """ + + def keys(self) -> list[str]: + """ + Property keys present across the underlying entities. + + Returns: + list[str]: + """ + + def latest(self) -> dict[str, PyPropValueList]: + """ + Latest value of each property across the underlying entities. + + Returns: + dict[str, PyPropValueList]: + """ + + def values(self) -> list[PyTemporalPropList]: + """ + Per-key list of temporal property views. + + Returns: + list[PyTemporalPropList]: + """ + +class PyTemporalPropsListList(object): + def __contains__(self, key): + """Return bool(key in self).""" + + def __eq__(self, value): + """Return self==value.""" + + def __ge__(self, value): + """Return self>=value.""" + + def __getitem__(self, key): + """Return self[key].""" + + def __gt__(self, value): + """Return self>value.""" + + def __iter__(self): + """Implement iter(self).""" + + def __le__(self, value): + """Return self<=value.""" + + def __lt__(self, value): + """Return self Optional[PyTemporalPropListList]: + """ + Look up a nested temporal property by key. + + Arguments: + key (str): property key. + + Returns: + Optional[PyTemporalPropListList]: + """ + + def histories(self) -> dict[str, PyPropHistItemsListList]: + """ + Full update history of each property across the nested entities. + + Returns: + dict[str, PyPropHistItemsListList]: + """ + + def items(self) -> list[tuple[str, PyTemporalPropListList]]: + """ + Pairs of `(key, nested temporal property list)` for every property key. + + Returns: + list[tuple[str, PyTemporalPropListList]]: + """ + + def keys(self) -> list[str]: + """ + Property keys present across the underlying entities. + + Returns: + list[str]: + """ + + def latest(self) -> dict[str, PyPropValueListList]: + """ + Latest value of each property across the nested entities. + + Returns: + dict[str, PyPropValueListList]: + """ + + def values(self) -> list[PyTemporalPropListList]: + """ + Per-key list of nested temporal property views. + + Returns: + list[PyTemporalPropListList]: + """ + +class PyPropHistValueList(object): + def __eq__(self, value): + """Return self==value.""" + + def __ge__(self, value): + """Return self>=value.""" + + def __gt__(self, value): + """Return self>value.""" + + def __iter__(self): + """Implement iter(self).""" + + def __le__(self, value): + """Return self<=value.""" + + def __len__(self): + """Return len(self).""" + + def __lt__(self, value): + """Return self list[PropValue]: + """ + Compute the average of all property values. Alias for mean(). + + Returns: + list[PropValue]: + """ + + def collect(self) -> list: + """ + Materialise the iterable as a Python list. + + Returns: + list: + """ + + def count(self) -> UsizeIterable: + """ + Number of properties (or rows of properties). + + Returns: + UsizeIterable: + """ + + def flatten(self) -> PropIterable: + """ + Flatten the per-row history values into a single iterable of values. + + Returns: + PropIterable: + """ + + def max(self) -> list[PropValue]: + """ + Find the maximum property value and its associated time. + + Returns: + list[PropValue]: + """ + + def mean(self) -> list[PropValue]: + """ + Compute the mean of all property values. + + Returns: + list[PropValue]: The mean of each property values, or None if count is zero. + """ + + def median(self) -> PyPropValueList: + """ + Median property value of each row. + + Returns: + PyPropValueList: + """ + + def min(self) -> list[PropValue]: + """ + Min property value. + + Returns: + list[PropValue]: + """ + + def sum(self) -> list[PropValue]: + """ + Sum of property values. + + Returns: + list[PropValue]: + """ + +class PyPropHistValueListList(object): + def __eq__(self, value): + """Return self==value.""" + + def __ge__(self, value): + """Return self>=value.""" + + def __gt__(self, value): + """Return self>value.""" + + def __iter__(self): + """Implement iter(self).""" + + def __le__(self, value): + """Return self<=value.""" + + def __len__(self): + """Return len(self).""" + + def __lt__(self, value): + """Return self list[list]: + """ + Materialise the nested iterable as a list of lists. + + Returns: + list[list]: + """ + + def count(self) -> NestedUsizeIterable: + """ + Number of properties (or rows of properties). + + Returns: + NestedUsizeIterable: + """ + + def flatten(self) -> PyPropHistValueList: + """ + Flatten the nested history-values list to a single history-values list. + + Returns: + PyPropHistValueList: + """ + + def max(self) -> list[list[PropValue]]: + """ + Find the maximum property value and its associated time. + + Returns: + list[list[PropValue]]: + """ + + def mean(self) -> PyPropValueListList: + """ + Mean property value across each row. + + Returns: + PyPropValueListList: + """ + + def median(self) -> list[list[PropValue]]: + """ + Median + + Returns: + list[list[PropValue]]: + """ + + def min(self) -> list[list[PropValue]]: + """ + Min property value. + + Returns: + list[list[PropValue]]: + """ + + def sum(self) -> list[list[PropValue]]: + """ + Sum of property values. + + Returns: + list[list[PropValue]]: + """ + +class PyTemporalPropList(object): + def __eq__(self, value): + """Return self==value.""" + + def __ge__(self, value): + """Return self>=value.""" + + def __gt__(self, value): + """Return self>value.""" + + def __iter__(self): + """Implement iter(self).""" + + def __le__(self, value): + """Return self<=value.""" + + def __len__(self): + """Return len(self).""" + + def __lt__(self, value): + """Return self PyPropValueList: + """ + Value of each entity's property at the given time (latest update at or before `t`). + + Arguments: + t (TimeInput): the time at which to evaluate the property. + + Returns: + PyPropValueList: + """ + + def collect(self) -> list: + """ + Materialise the iterable as a Python list. + + Returns: + list: + """ + + @property + def history(self) -> HistoryIterable: + """ + Update history (one history per underlying entity). + + Returns: + HistoryIterable: + """ + + def items(self) -> PyPropHistItemsList: + """ + Per-entity list of `(time, value)` pairs across each entity's history. + + Returns: + PyPropHistItemsList: + """ + + def value(self) -> PyPropValueList: + """ + Latest value of each entity's property. + + Returns: + PyPropValueList: + """ + + def values(self) -> PyPropHistValueList: + """ + Per-entity list of property values across each entity's history. + + Returns: + PyPropHistValueList: + """ + +class PyTemporalPropListList(object): + def __eq__(self, value): + """Return self==value.""" + + def __ge__(self, value): + """Return self>=value.""" + + def __gt__(self, value): + """Return self>value.""" + + def __iter__(self): + """Implement iter(self).""" + + def __le__(self, value): + """Return self<=value.""" + + def __len__(self): + """Return len(self).""" + + def __lt__(self, value): + """Return self PyPropValueListList: + """ + Value of each inner entity's property at the given time. + + Arguments: + t (TimeInput): the time at which to evaluate the property. + + Returns: + PyPropValueListList: + """ + + def collect(self) -> list[list]: + """ + Materialise the nested iterable as a list of lists. + + Returns: + list[list]: + """ + + def flatten(self) -> PyTemporalPropList: + """ + Flatten the nested temporal property list to a single list of temporal properties. + + Returns: + PyTemporalPropList: + """ + + @property + def history(self) -> NestedHistoryIterable: + """ + Update history (per outer entity, per inner entity). + + Returns: + NestedHistoryIterable: + """ + + def items(self) -> PyPropHistItemsListList: + """ + Nested list of `(time, value)` pairs across each inner entity's history. + + Returns: + PyPropHistItemsListList: + """ + + def value(self) -> PyPropValueListList: + """ + Latest value of each inner entity's property. + + Returns: + PyPropValueListList: + """ + + def values(self) -> PyPropHistValueListList: + """ + Nested list of property values across each inner entity's history. + + Returns: + PyPropHistValueListList: + """ + +class PyPropHistItemsList(object): + def __eq__(self, value): + """Return self==value.""" + + def __ge__(self, value): + """Return self>=value.""" + + def __gt__(self, value): + """Return self>value.""" + + def __iter__(self): + """Implement iter(self).""" + + def __le__(self, value): + """Return self<=value.""" + + def __len__(self): + """Return len(self).""" + + def __lt__(self, value): + """Return self list: + """ + Materialise the iterable as a Python list. + + Returns: + list: + """ + +class PyPropHistItemsListList(object): + def __eq__(self, value): + """Return self==value.""" + + def __ge__(self, value): + """Return self>=value.""" + + def __gt__(self, value): + """Return self>value.""" + + def __iter__(self): + """Implement iter(self).""" + + def __le__(self, value): + """Return self<=value.""" + + def __len__(self): + """Return len(self).""" + + def __lt__(self, value): + """Return self list[list]: + """ + Materialise the nested iterable as a list of lists. + + Returns: + list[list]: + """ + +class PropIterable(object): + def __eq__(self, value): + """Return self==value.""" + + def __ge__(self, value): + """Return self>=value.""" + + def __gt__(self, value): + """Return self>value.""" + + def __iter__(self): + """Implement iter(self).""" + + def __le__(self, value): + """Return self<=value.""" + + def __len__(self): + """Return len(self).""" + + def __lt__(self, value): + """Return self PropValue: + """ + Compute the average of all property values. Alias for mean(). + + Returns: + PropValue: The average of each property values, or None if count is zero. + """ + + def collect(self) -> list: + """ + Materialise the iterable as a Python list. + + Returns: + list: + """ + + def count(self) -> int: + """ + Number of properties (or rows of properties). + + Returns: + int: + """ + + def max(self) -> PropValue: + """ + Find the maximum property value and its associated time. + + Returns: + PropValue: + """ + + def mean(self) -> PropValue: + """ + Compute the mean of all property values. + + Returns: + PropValue: The mean of each property values, or None if count is zero. + """ + + def median(self) -> PropValue: + """ + Median property values. + + Returns: + PropValue: + """ + + def min(self) -> PropValue: + """ + Min property value. + + Returns: + PropValue: + """ + + def sum(self) -> PropValue: + """ + Sum of property values. + + Returns: + PropValue: + """ diff --git a/python/python/raphtory/node_state/__init__.pyi b/python/python/raphtory/node_state/__init__.pyi index 4420f2f28d..6e8c79273c 100644 --- a/python/python/raphtory/node_state/__init__.pyi +++ b/python/python/raphtory/node_state/__init__.pyi @@ -15,13 +15,17 @@ import raphtory.filter as filter from raphtory.algorithms import * from raphtory.vectors import * from raphtory.graphql import * +from raphtory.gql import * from raphtory.typing import * import numpy as np from numpy.typing import NDArray from datetime import datetime +import pandas from pandas import DataFrame +import pyarrow # type: ignore[import-untyped] from pyarrow import DataType # type: ignore[import-untyped] from os import PathLike +from decimal import Decimal import networkx as nx # type: ignore import pyvis # type: ignore from raphtory.iterables import * @@ -75,6 +79,7 @@ __all__ = [ "NodeStateSEIR", "NodeLayout", "NodeStateF64String", + "OutputNodeState", ] class NodeGroups(object): @@ -2506,16 +2511,18 @@ class EarliestDateTimeView(object): NodeStateOptionDateTime: the computed `NodeState` """ - def get(self, node: NodeInput, default=...) -> Optional[datetime]: + def get( + self, node: NodeInput, default: Optional[datetime] = None + ) -> Optional[datetime]: """ Get value for node Arguments: node (NodeInput): the node - default (Optional[datetime]): the default value. Defaults to None. + default (Optional[datetime]): the default value. Defaults to None. Returns: - Optional[datetime]: the value for the node or the default value + Optional[datetime]: the value for the node or the default value """ def groups(self) -> NodeGroups: @@ -5223,11 +5230,45 @@ class UsizeIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... - def max(self): ... - def mean(self): ... - def min(self): ... - def sum(self): ... + def collect(self) -> list: + """ + Materialise the iterable as a Python list. + + Returns: + list: + """ + + def max(self) -> Any: + """ + Maximum value in the iterable, or `None` if empty. + + Returns: + Any: + """ + + def mean(self) -> float: + """ + Mean of all values in the iterable. + + Returns: + float: + """ + + def min(self) -> Any: + """ + Minimum value in the iterable, or `None` if empty. + + Returns: + Any: + """ + + def sum(self) -> Any: + """ + Sum of all values in the iterable. + + Returns: + Any: + """ class NodeTypeView(object): """A lazy view over node values""" @@ -7721,3 +7762,148 @@ class NodeStateF64String(object): Returns: Iterator[Tuple[float, str]]: Iterator over values """ + +class OutputNodeState(object): + def __eq__(self, value): + """Return self==value.""" + + def __ge__(self, value): + """Return self>=value.""" + + def __getitem__(self, key): + """Return self[key].""" + + def __gt__(self, value): + """Return self>value.""" + + def __iter__(self): + """Implement iter(self).""" + + def __le__(self, value): + """Return self<=value.""" + + def __len__(self): + """Return len(self).""" + + def __lt__(self, value): + """Return self OutputNodeState: + """ + Get OutputNodeState from Parquet + + Arguments: + file_path (str): filepath from which to read OutputNodeState + id_column (str): column to which node IDs will be written. Defaults to "id". + + Returns: + OutputNodeState: + """ + + def get(self, node: NodeInput, default: Optional[dict] = None) -> Optional[dict]: + """ + Get value for node + + Arguments: + node (NodeInput): the node + default (dict, optional): the default value (dict of field name to value). Defaults to None. + + Returns: + Optional[dict]: the value for the node or the default value + """ + + def groups(self, cols: list[str]) -> list[tuple[dict, Nodes]]: + """ + Group by value + + Arguments: + cols (list[str]): columns by which to group nodes + + Returns: + list[tuple[dict, Nodes]]: The grouped nodes + """ + + def items(self) -> Iterator[Tuple[Node, Dict]]: + """ + Iterate over items + + Returns: + Iterator[Tuple[Node, Dict]]: Iterator over items + """ + + def merge( + self, + other: OutputNodeState, + index_merge_priority: str = "left", + default_column_merge_priority: str = "left", + column_merge_priority_map: Optional[dict] = None, + ) -> OutputNodeState: + """ + Merge with another OutputNodeState (produces new OutputNodeState) + + Arguments: + other (OutputNodeState): OutputNodeState to merge with + index_merge_priority (str): "left" or "right" to take left or right index, "union" to union index sets. Defaults to "left". + default_column_merge_priority (str): "left" or "right" to prioritize left or right columns by default, "exclude" to exclude columns by default. Defaults to "left". + column_merge_priority_map (dict, optional): map of column names (str) to merge priority ("left", "right", or "exclude"). Defaults to None. + + Returns: + OutputNodeState: + """ + + def nodes(self) -> Nodes: + """ + Iterate over nodes + + Returns: + Nodes: The nodes + """ + + def sort_by(self, sort_params: Dict) -> OutputNodeState: + """ + Get value for node + + Arguments: + sort_params (Dict): Map of sort keys to sort option ('asc' or 'desc'). None defaults to 'asc' + + Returns: + OutputNodeState: Sorted NodeState + """ + + def to_parquet(self, file_path: str, id_column: str = "id") -> None: + """ + Convert OutputNodeState to Parquet + + Arguments: + file_path (str): filepath to which OutputNodeState is written + id_column (str): column containing IDs of nodes. Defaults to "id". + + Returns: + None: + """ + + def top_k(self, sort_params: Dict, k: int) -> OutputNodeState: + """ + Get value for node + + Arguments: + sort_params (Dict): Map of sort keys to sort option ('asc' or 'desc'). None defaults to 'asc' + k (int): Number of top entries to return. + + Returns: + OutputNodeState: Sorted NodeState + """ + + def values(self) -> Iterator[Dict]: + """ + Iterate over values + + Returns: + Iterator[Dict]: Iterator over values (dict of field name to value) + """ diff --git a/python/python/raphtory/typing.py b/python/python/raphtory/typing.py index 5da3eddb60..7d5ad41003 100644 --- a/python/python/raphtory/typing.py +++ b/python/python/raphtory/typing.py @@ -1,5 +1,5 @@ from datetime import datetime, date -from typing import Union, Literal, Mapping +from typing import Union, Literal, Mapping, Any import raphtory PropValue = Union[ @@ -24,3 +24,5 @@ TimeInput = Union[ int, str, float, datetime, date, raphtory.EventTime, raphtory.OptionalEventTime ] + +Config = Mapping[str, Any] diff --git a/python/python/raphtory/vectors/__init__.pyi b/python/python/raphtory/vectors/__init__.pyi index 4473546c13..a849877afc 100644 --- a/python/python/raphtory/vectors/__init__.pyi +++ b/python/python/raphtory/vectors/__init__.pyi @@ -15,13 +15,17 @@ import raphtory.filter as filter from raphtory.algorithms import * from raphtory.node_state import * from raphtory.graphql import * +from raphtory.gql import * from raphtory.typing import * import numpy as np from numpy.typing import NDArray from datetime import datetime +import pandas from pandas import DataFrame +import pyarrow # type: ignore[import-untyped] from pyarrow import DataType # type: ignore[import-untyped] from os import PathLike +from decimal import Decimal import networkx as nx # type: ignore import pyvis # type: ignore from raphtory.iterables import * @@ -33,6 +37,8 @@ __all__ = [ "VectorSelection", "OpenAIEmbeddings", "VectorCache", + "EmbeddingServer", + "RunningEmbeddingServer", "embedding_server", ] @@ -57,8 +63,13 @@ class VectorisedGraph(object): VectorSelection: The vector selection resulting from the search. """ - def empty_selection(self): - """Return an empty selection of entities.""" + def empty_selection(self) -> VectorSelection: + """ + Return an empty selection of entities. + + Returns: + VectorSelection: + """ def entities_by_similarity( self, @@ -96,8 +107,13 @@ class VectorisedGraph(object): VectorSelection: The vector selection resulting from the search. """ - def optimize_index(self): - """Optmize the vector index""" + def optimize_index(self) -> None: + """ + Optimise the vector index. + + Returns: + None: + """ class Document(object): """A document corresponding to a graph entity. Used to generate embeddings.""" @@ -136,7 +152,13 @@ class Embedding(object): def __repr__(self): """Return repr(self).""" - def to_arrow(self): ... + def to_arrow(self) -> pyarrow.Array: + """ + Returns the embedding as a `pyarrow.Array` of floats. + + Returns: + pyarrow.Array: + """ class VectorSelection(object): def add_edges(self, edges: list) -> None: @@ -295,19 +317,89 @@ class VectorSelection(object): """ class OpenAIEmbeddings(object): + """ + OpenAI-compatible embedding configuration. Pass an instance of this to + `VectorCache(...)` to drive `vectorise(...)`. + + Arguments: + model (str): The OpenAI embedding model to use. Defaults to "text-embedding-3-small". + api_base (str, optional): Base URL for the OpenAI-compatible API. If None, falls back to OpenAI's default endpoint. Defaults to None. + api_key_env (str, optional): Environment variable name to read the API key from. If None, reads from `OPENAI_API_KEY`. Defaults to None. + org_id (str, optional): OpenAI organization id. If None, no org id is sent. Defaults to None. + project_id (str, optional): OpenAI project id. If None, no project id is sent. Defaults to None. + dim (int, optional): Embedding dimension override. If None, the model's native dimension is used. Defaults to None. + """ + def __new__( cls, - model="text-embedding-3-small", - api_base=None, - api_key_env=None, - org_id=None, - project_id=None, - dim=None, + model: str = "text-embedding-3-small", + api_base: Optional[str] = None, + api_key_env: Optional[str] = None, + org_id: Optional[str] = None, + project_id: Optional[str] = None, + dim: Optional[int] = None, ) -> OpenAIEmbeddings: """Create and return a new object. See help(type) for accurate signature.""" class VectorCache(object): - def __new__(cls, v_cache, cache=None) -> VectorCache: + """ + Cache wrapping an embedding model. Pass to `Graph.vectorise(model=...)` + or other vectorisation entry points. + + Arguments: + v_cache (OpenAIEmbeddings): Embedding model configuration. + cache (str, optional): Path to persist the embedding cache on disk. Defaults to None. + """ + + def __new__( + cls, v_cache: OpenAIEmbeddings, cache: Optional[str] = None + ) -> VectorCache: """Create and return a new object. See help(type) for accurate signature.""" -def embedding_server(function): ... +class EmbeddingServer(object): + def run(self, port: int, host: Optional[str] = None) -> None: + """ + Run the embedding server in the foreground until it's stopped. + + Arguments: + port (int): Port to listen on. + host (str, optional): Host interface to bind to. Defaults to None. + + Returns: + None: + """ + + def start(self, port: int, host: Optional[str] = None) -> RunningEmbeddingServer: + """ + Start the embedding server in the background and return a handle. + + Arguments: + port (int): Port to listen on. + host (str, optional): Host interface to bind to. Defaults to None. + + Returns: + RunningEmbeddingServer: handle to stop the server. + """ + +class RunningEmbeddingServer(object): + def __enter__(self): ... + def __exit__(self, _exc_type, _exc_val, _exc_tb): ... + def stop(self) -> None: + """ + Stop the running embedding server. + + Returns: + None: + """ + +def embedding_server(function: Callable[[str], list[float]]) -> EmbeddingServer: + """ + Wrap a Python callable so it can be served as an OpenAI-compatible + embedding endpoint via `EmbeddingServer.serve(...)`. + + Arguments: + function (Callable[[str], list[float]]): A callable that maps a text input to its embedding vector. + + Returns: + EmbeddingServer: + """ diff --git a/python/scripts/gen-stubs.py b/python/scripts/gen-stubs.py index 870648aec6..7b91a78b6f 100755 --- a/python/scripts/gen-stubs.py +++ b/python/scripts/gen-stubs.py @@ -10,13 +10,17 @@ "from raphtory.vectors import *", "from raphtory.node_state import *", "from raphtory.graphql import *", + "from raphtory.gql import *", "from raphtory.typing import *", "import numpy as np", "from numpy.typing import NDArray", "from datetime import datetime", + "import pandas", "from pandas import DataFrame", + "import pyarrow # type: ignore[import-untyped]", "from pyarrow import DataType # type: ignore[import-untyped]", "from os import PathLike", + "from decimal import Decimal", "import networkx as nx # type: ignore", "import pyvis # type: ignore", "from raphtory.iterables import *", diff --git a/python/src/lib.rs b/python/src/lib.rs index 7e77ec8709..b1ca6c95d9 100644 --- a/python/src/lib.rs +++ b/python/src/lib.rs @@ -1,3 +1,4 @@ +use clam_core::python::py_gql::base_gql_module; use pyo3::prelude::*; use raphtory::python::{ filter::base_filter_module, @@ -12,7 +13,8 @@ use raphtory_graphql::python::pymodule::base_graphql_module; /// Raphtory graph analytics library #[pymodule] fn _raphtory(py: Python<'_>, m: &Bound) -> PyResult<()> { - let _ = add_raphtory_classes(m); + auth::init(); + add_raphtory_classes(m)?; let graphql_module = base_graphql_module(py)?; let algorithm_module = base_algorithm_module(py)?; @@ -30,5 +32,9 @@ fn _raphtory(py: Python<'_>, m: &Bound) -> PyResult<()> { m.add_submodule(&node_state_module)?; m.add_submodule(&filter_module)?; m.add_submodule(&iterables)?; + + let gql_module = base_gql_module(py)?; + m.add_submodule(&gql_module)?; + Ok(()) } diff --git a/python/test_utils/utils.py b/python/test_utils/utils.py index 11aebae6f9..60d659bfcc 100644 --- a/python/test_utils/utils.py +++ b/python/test_utils/utils.py @@ -1,17 +1,16 @@ import json +import os import re import tempfile import time from datetime import datetime -from typing import TypeVar, Callable -import os -import pytest from functools import wraps +from typing import Callable, TypeVar +import pytest from dateutil import parser - -from raphtory.graphql import GraphServer from raphtory import Graph, PersistentGraph +from raphtory.graphql import GraphServer B = TypeVar("B") @@ -27,6 +26,34 @@ def sort_dict_recursive(d) -> dict: return d +def gql_sort_key(v): + if isinstance(v, dict): + direct = v.get("name", v.get("id", "")) + if direct: + return direct + # sort by src/dst for edges + src = gql_sort_key(v.get("src")) + dst = gql_sort_key(v.get("dst")) + if src: + if dst: + return [src, dst] + else: + return src + else: + return dst + else: + return "" + + +def sort_by_gql_name_or_id(d): + if isinstance(d, dict): + return {key: sort_by_gql_name_or_id(value) for key, value in d.items()} + elif isinstance(d, list): + return sorted((sort_by_gql_name_or_id(v) for v in d), key=gql_sort_key) + else: + return d + + if "DISK_TEST_MARK" in os.environ: def with_disk_graph(func): @@ -123,7 +150,7 @@ def measure(name: str, f: Callable[..., B], *args, print_result: bool = True) -> return result -def run_graphql_test(query, expected_output, graph): +def run_graphql_test(query, expected_output, graph, sort_output=False): tmp_work_dir = tempfile.mkdtemp() with GraphServer(tmp_work_dir, create_index=True).start(PORT) as server: client = server.get_client() @@ -132,12 +159,15 @@ def run_graphql_test(query, expected_output, graph): # Convert response to a dictionary if needed and compare response_dict = json.loads(response) if isinstance(response, str) else response + if sort_output: + response_dict = sort_by_gql_name_or_id(response_dict) + expected_output = sort_by_gql_name_or_id(expected_output) assert ( response_dict == expected_output ), f"left={sort_dict_recursive(response_dict)}\nright={sort_dict_recursive(expected_output)}" -def run_group_graphql_test(queries_and_expected_outputs, graph): +def run_group_graphql_test(queries_and_expected_outputs, graph, sort_output=False): tmp_work_dir = tempfile.mkdtemp() with GraphServer(tmp_work_dir, create_index=True).start(PORT) as server: client = server.get_client() @@ -148,8 +178,11 @@ def run_group_graphql_test(queries_and_expected_outputs, graph): response_dict = ( json.loads(response) if isinstance(response, str) else response ) - assert sort_dict_recursive(response_dict) == sort_dict_recursive( - expected_output + if sort_output: + response_dict = sort_by_gql_name_or_id(response_dict) + expected_output = sort_by_gql_name_or_id(expected_output) + assert ( + response_dict == expected_output ), f"Expected:\n{sort_dict_recursive(expected_output)}\nGot:\n{sort_dict_recursive(response_dict)}" @@ -232,27 +265,40 @@ def assert_set_eq(left, right): def assert_has_properties(entity, props): for k, v in props.items(): - if isinstance(v, datetime): - actual = parser.parse(entity.properties.get(k)) - assert v == actual - else: - assert entity.properties.get(k) == v + actual = entity.properties.get(k) + # Convert PyArrow arrays and other array-like objects to lists for comparison + if hasattr(actual, "to_pylist"): + actual = actual.to_pylist() + elif hasattr(actual, "tolist"): + actual = actual.tolist() + assert actual == v def assert_has_metadata(entity, props): for k, v in props.items(): - if isinstance(v, datetime): - actual = parser.parse(entity.metadata.get(k)) - assert v == actual - else: - assert entity.metadata.get(k) == v + actual = entity.metadata.get(k) + # Convert PyArrow arrays and other array-like objects to lists for comparison + if hasattr(actual, "to_pylist"): + actual = actual.to_pylist() + elif hasattr(actual, "tolist"): + actual = actual.tolist() + assert actual == v, f"Expected metadata {k!r} to be {v!r}, but got {actual!r}" def expect_unify_error(fn): - with pytest.raises(BaseException, match="Cannot unify"): + with pytest.raises(BaseException) as e: + # check the message fn() + print(e.value) + assert "Failed to unify props" in str(e.value) def assert_in_all(haystack: str, needles): for n in needles: assert n in haystack, f"expected to find {n!r} in {haystack!r}" + + +# Needed because datetimes generated using .now() have sub millisecond precision which raphtory does not support. +# Equality checks are failing because of this (in assert_has_properties and assert_has_metadata). +def truncate_dt_to_ms(dt: datetime) -> datetime: + return dt.replace(microsecond=(dt.microsecond // 1000) * 1000) diff --git a/python/tests/test_auth.py b/python/tests/test_auth.py index a9c733c0b8..9c6fe3d52a 100644 --- a/python/tests/test_auth.py +++ b/python/tests/test_auth.py @@ -19,16 +19,48 @@ RAPHTORY = "http://localhost:1736" -READ_JWT = jwt.encode({"a": "ro"}, PRIVATE_KEY, algorithm="EdDSA") +READ_JWT = jwt.encode({"access": "ro"}, PRIVATE_KEY, algorithm="EdDSA") READ_HEADERS = { "Authorization": f"Bearer {READ_JWT}", } -WRITE_JWT = jwt.encode({"a": "rw"}, PRIVATE_KEY, algorithm="EdDSA") +WRITE_JWT = jwt.encode({"access": "rw"}, PRIVATE_KEY, algorithm="EdDSA") WRITE_HEADERS = { "Authorization": f"Bearer {WRITE_JWT}", } +# openssl genpkey -algorithm RSA -pkeyopt rsa_keygen_bits:2048 -out rsa-key.pem +# openssl pkey -in rsa-key.pem -pubout -outform DER | base64 | tr -d '\n' +RSA_PUB_KEY = "MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA4sqe3DlHB/DaSm8Ab99yKj0KDc/WZGFPwXeTbPwCMKKSEc8zuSuIZc/fHXLSORn1apMnDq3aLryfPwyNTbpvhGiYVyp76XQGwSlN+EF2TsJZVAzp4/EI+bnHeHyv2Yc5q6AkFtoBPNtAz2P/18g7Yv/eZqNNSd7FOeuRFRs9y0LkswvMelQmoMOK7UKdC00AyiGksvFvljNC70VT9b0uVHggJwUYT0hdCbdaDj2fCJZBEmTqBBr97u3fIHo5T41sIEEPgE2j368mI+uk6V1saEU1BU+hkcq56TabgVqUYZTln5Rdm1MuBsNz+NQwOmVxgPNo45H2cNwTfsPDAAESlwIDAQAB" +RSA_PRIVATE_KEY = """-----BEGIN PRIVATE KEY----- +MIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQDiyp7cOUcH8NpK +bwBv33IqPQoNz9ZkYU/Bd5Ns/AIwopIRzzO5K4hlz98dctI5GfVqkycOrdouvJ8/ +DI1Num+EaJhXKnvpdAbBKU34QXZOwllUDOnj8Qj5ucd4fK/ZhzmroCQW2gE820DP +Y//XyDti/95mo01J3sU565EVGz3LQuSzC8x6VCagw4rtQp0LTQDKIaSy8W+WM0Lv +RVP1vS5UeCAnBRhPSF0Jt1oOPZ8IlkESZOoEGv3u7d8gejlPjWwgQQ+ATaPfryYj +66TpXWxoRTUFT6GRyrnpNpuBWpRhlOWflF2bUy4Gw3P41DA6ZXGA82jjkfZw3BN+ +w8MAARKXAgMBAAECggEAWIH78nU2B97Syja8xGw/KUXODSreACnMDvRkKCXHkwR3 +HhUvmeXn4tf3uo3rhhZf5TpNhViK7C93tIrpAHswd0u8nFP7rNW3px3ADJE7oywM +4ZTymJ8iQhdjRd3fYPT5qEWkn/hvgDkO94EOwT8nEhFKUeMMUDZs4RhSdBrACHk0 +CrOC2S9xbgYb5OWGV6vkSqNB0k0Kv+LxU8sS46BLE7DxfpzSXDyeYaCAkk+wbwfb +hX7lysczbSl5l5Bulcf/LHL4Oa/5t+NcBZqyN6ylRXyqQ8LEdK4+TOJfvnePX1go +3rG4rtyaBCuW5JD1ytxUsyfh8WE4GinUbHWzxvaYQQKBgQD5PxF2CmqMY6yiaxU3 +0LFtRS9DtwIPnPX3Wdchq7ivSU1W6sHJjNfyEggi10DSOOINalRM/ZnVlDo8hJ3A +SybESWWzLuDZNAAAWkmoir0UpnURz847tKd8hJUivhsbdQBeKwaCuepcW6Hdwzh1 +JsJjXPovrzVGQe5FSRfBy7gswQKBgQDo78p/jEVHzuxHqSn3AsOdBdMZvPavpHb2 +Bx7tRhZOOp2QiGUHZLfjI++sQQyTu1PJqmmxOOF+eD/zkqCkLLeZsmRYOQVDOQDM +Z+u+zKYRj7KaWBeGB2Oy/WEU0pGnhyMB/T5iHmroO0Hn4gDHqkEDvwFI7SUjLNAK +1RjTxVgdVwKBgCRHNMBspbOHcoI1eeIk4x5Xepitk4Q4QWjeT7zb5MbGsZYcF1bB +xFC8pSiFEi9HDkgLmPeX1gNLTuquFtP9XEgnssDQ6vNSaUmj2qLIhtrxm4qbJ5Zz +JgmutpJW/1UQw5vxQUJX0y/cOoQvvRD4MkUKLHQyWVu/jvHQwL95anZBAoGBAIrZ +9aGWYe3uINaOth8yHJzLTgz3oS0OIoOBtyPFNaKoOihfxalklmDlmQbbN74QWl/K +H3qu52vWDnkJHI0Awujxd/NG+iYaIqm2AMcZgpzRRavPeyY/3WRiua4J3x035txW +swsWCrAoMp8hD0n16Q9smj14bzzKh7ENWeFSr7W9AoGBAMOSyRdVQxVHXagh3fAa ++FNbR8pFmQC6bQGCO74DzGe6uKYpgu+XD1yinufwwsXxjieDXCHkKTGR92Kzp5VY +Hp6HhhhCcXICRRnbxhvdpyaDbCQrT522bqRJ4rNmSVYOQQiD2vng/HVB2oWMVwa+ +fEtYNjbxjhX9qInHjHxeaNOp +-----END PRIVATE KEY-----""" + NEW_TEST_GRAPH = """mutation { newGraph(path:"test", graphType:EVENT) }""" QUERY_NAMESPACES = """query { namespaces { list{ path} } }""" @@ -54,7 +86,7 @@ def test_expired_token(): work_dir = tempfile.mkdtemp() with GraphServer(work_dir, auth_public_key=PUB_KEY).start(): exp = time() - 100 - token = jwt.encode({"a": "ro", "exp": exp}, PRIVATE_KEY, algorithm="EdDSA") + token = jwt.encode({"access": "ro", "exp": exp}, PRIVATE_KEY, algorithm="EdDSA") headers = { "Authorization": f"Bearer {token}", } @@ -63,7 +95,7 @@ def test_expired_token(): ) assert response.status_code == 401 - token = jwt.encode({"a": "rw", "exp": exp}, PRIVATE_KEY, algorithm="EdDSA") + token = jwt.encode({"access": "rw", "exp": exp}, PRIVATE_KEY, algorithm="EdDSA") headers = { "Authorization": f"Bearer {token}", } @@ -94,7 +126,7 @@ def test_default_read_access(query): def test_disabled_read_access(query): work_dir = tempfile.mkdtemp() with GraphServer( - work_dir, auth_public_key=PUB_KEY, auth_enabled_for_reads=False + work_dir, auth_public_key=PUB_KEY, require_auth_for_reads=False ).start(): add_test_graph() data = json.dumps({"query": query}) @@ -206,6 +238,70 @@ def test_raphtory_client(): assert g.node("test") is not None +def test_raphtory_client_write_denied_for_read_jwt(): + """RaphtoryClient initialized with a read JWT is denied write operations.""" + work_dir = tempfile.mkdtemp() + with GraphServer(work_dir, auth_public_key=PUB_KEY).start(): + client = RaphtoryClient(url=RAPHTORY, token=READ_JWT) + with pytest.raises(Exception, match="requires write access"): + client.new_graph("test", "EVENT") + + +# --- RSA JWT support --- + + +def test_rsa_signed_jwt_rs256_accepted(): + """Server configured with an RSA public key accepts RS256-signed JWTs.""" + work_dir = tempfile.mkdtemp() + with GraphServer(work_dir, auth_public_key=RSA_PUB_KEY).start(): + token = jwt.encode({"access": "ro"}, RSA_PRIVATE_KEY, algorithm="RS256") + response = requests.post( + RAPHTORY, + headers={"Authorization": f"Bearer {token}"}, + data=json.dumps({"query": QUERY_ROOT}), + ) + assert_successful_response(response) + + +def test_rsa_signed_jwt_rs512_accepted(): + """RS512 JWT is also accepted for the same RSA key (different hash, same key material).""" + work_dir = tempfile.mkdtemp() + with GraphServer(work_dir, auth_public_key=RSA_PUB_KEY).start(): + token = jwt.encode({"access": "ro"}, RSA_PRIVATE_KEY, algorithm="RS512") + response = requests.post( + RAPHTORY, + headers={"Authorization": f"Bearer {token}"}, + data=json.dumps({"query": QUERY_ROOT}), + ) + assert_successful_response(response) + + +def test_eddsa_jwt_rejected_against_rsa_key(): + """EdDSA JWT is rejected when the server is configured with an RSA public key.""" + work_dir = tempfile.mkdtemp() + with GraphServer(work_dir, auth_public_key=RSA_PUB_KEY).start(): + token = jwt.encode({"access": "ro"}, PRIVATE_KEY, algorithm="EdDSA") + response = requests.post( + RAPHTORY, + headers={"Authorization": f"Bearer {token}"}, + data=json.dumps({"query": QUERY_ROOT}), + ) + assert response.status_code == 401 + + +def test_raphtory_client_read_jwt_can_receive_graph(): + """RaphtoryClient initialized with a read JWT can download graphs.""" + work_dir = tempfile.mkdtemp() + with GraphServer(work_dir, auth_public_key=PUB_KEY).start(): + client = RaphtoryClient(url=RAPHTORY, token=WRITE_JWT) + client.new_graph("test", "EVENT") + client.remote_graph("test").add_node(0, "mynode") + + client2 = RaphtoryClient(url=RAPHTORY, token=READ_JWT) + g = client2.receive_graph("test") + assert g.node("mynode") is not None + + def test_upload_graph(): work_dir = tempfile.mkdtemp() with GraphServer(work_dir, auth_public_key=PUB_KEY).start(): diff --git a/python/tests/test_base_install/test_filters/test_edge_property_filter.py b/python/tests/test_base_install/test_filters/test_edge_property_filter.py index 87b650c9ee..77333e51c7 100644 --- a/python/tests/test_base_install/test_filters/test_edge_property_filter.py +++ b/python/tests/test_base_install/test_filters/test_edge_property_filter.py @@ -1236,42 +1236,54 @@ def check(graph): def test_nested_edges_getitem_property_filter_expr(): def check(graph): filter_expr = filter.Edge.property("p2") > 5 - result_ids = graph.nodes.edges[filter_expr].id.collect() - expected_ids = [ - [("2", "1"), ("3", "1")], - [("2", "1")], - [("3", "1"), ("3", "4")], - [("3", "4")], - [("David Gilmour", "John Mayer")], - [("David Gilmour", "John Mayer"), ("John Mayer", "Jimmy Page")], - [("John Mayer", "Jimmy Page")], - ] + result_ids = dict( + zip(graph.nodes.id, (sorted(v) for v in graph.nodes.edges[filter_expr].id)) + ) + expected_ids = { + "1": [("2", "1"), ("3", "1")], + "2": [("2", "1")], + "3": [("3", "1"), ("3", "4")], + "4": [("3", "4")], + "David Gilmour": [("David Gilmour", "John Mayer")], + "John Mayer": [ + ("David Gilmour", "John Mayer"), + ("John Mayer", "Jimmy Page"), + ], + "Jimmy Page": [("John Mayer", "Jimmy Page")], + } assert result_ids == expected_ids filter_expr2 = filter.Edge.property("p20") == "Gold_ship" - result_ids = graph.nodes.edges[filter_expr][filter_expr2].id.collect() - expected_ids = [ - [], - [], - [], - [], - [], - [("John Mayer", "Jimmy Page")], - [("John Mayer", "Jimmy Page")], - ] + result_ids = dict( + zip( + graph.nodes.id, + (sorted(v) for v in graph.nodes.edges[filter_expr][filter_expr2].id), + ) + ) + expected_ids = { + "1": [], + "2": [], + "3": [], + "4": [], + "David Gilmour": [], + "John Mayer": [("John Mayer", "Jimmy Page")], + "Jimmy Page": [("John Mayer", "Jimmy Page")], + } assert result_ids == expected_ids filter_expr3 = filter_expr & filter_expr2 - result_ids = graph.nodes.edges[filter_expr3].id.collect() - expected_ids = [ - [], - [], - [], - [], - [], - [("John Mayer", "Jimmy Page")], - [("John Mayer", "Jimmy Page")], - ] + result_ids = dict( + zip(graph.nodes.id, (sorted(v) for v in graph.nodes.edges[filter_expr3].id)) + ) + expected_ids = { + "1": [], + "2": [], + "3": [], + "4": [], + "David Gilmour": [], + "John Mayer": [("John Mayer", "Jimmy Page")], + "Jimmy Page": [("John Mayer", "Jimmy Page")], + } assert result_ids == expected_ids return check @@ -1284,26 +1296,81 @@ def check(graph): filter_expr2 = filter.ExplodedEdge.property("p2") == 4 # Test 1 - result_ids = graph.nodes.edges[filter_expr].explode()[filter_expr2].id.collect() - expected_ids = [[("1", "2")], [("1", "2")], [], [], [], [], []] + result_ids = dict( + zip( + graph.nodes.id, + ( + sorted(v) + for v in graph.nodes.edges[filter_expr].explode()[filter_expr2].id + ), + ) + ) + expected_ids = { + "1": [("1", "2")], + "2": [("1", "2")], + "3": [], + "4": [], + "David Gilmour": [], + "John Mayer": [], + "Jimmy Page": [], + } assert result_ids == expected_ids - result_ids = graph.nodes.edges[filter_expr].explode()[filter_expr2].id.collect() - expected_ids = [[("1", "2")], [("1", "2")], [], [], [], [], []] + result_ids = dict( + zip( + graph.nodes.id, + ( + sorted(v) + for v in graph.nodes.edges[filter_expr].explode()[filter_expr2].id + ), + ) + ) + expected_ids = { + "1": [("1", "2")], + "2": [("1", "2")], + "3": [], + "4": [], + "David Gilmour": [], + "John Mayer": [], + "Jimmy Page": [], + } assert result_ids == expected_ids # Test 2 filter_expr = filter.ExplodedEdge.property("p20") == "Gold_ship" filter_expr2 = filter.ExplodedEdge.property("p2") == 4 - result_ids = graph.nodes.edges.explode()[filter_expr][filter_expr2].id.collect() - expected_ids = [[("1", "2")], [("1", "2")], [], [], [], [], []] + result_ids = dict( + zip( + graph.nodes.id, + graph.nodes.edges.explode()[filter_expr][filter_expr2].id.collect(), + ) + ) + expected_ids = { + "1": [("1", "2")], + "2": [("1", "2")], + "3": [], + "4": [], + "David Gilmour": [], + "John Mayer": [], + "Jimmy Page": [], + } assert result_ids == expected_ids filter_expr = filter.ExplodedEdge.property("p20") == "Gold_ship" filter_expr2 = filter.ExplodedEdge.property("p2") == 4 filter_expr3 = filter_expr & filter_expr2 - result_ids = graph.nodes.edges.explode()[filter_expr3].id.collect() - expected_ids = [[("1", "2")], [("1", "2")], [], [], [], [], []] + result_ids = dict( + zip(graph.nodes.id, graph.nodes.edges.explode()[filter_expr3].id.collect()) + ) + expected_ids = { + "1": [("1", "2")], + "2": [("1", "2")], + "3": [], + "4": [], + "David Gilmour": [], + "John Mayer": [], + "Jimmy Page": [], + } assert result_ids == expected_ids return check @@ -1313,44 +1380,63 @@ def check(graph): def test_nodes_nested_edges_getitem_property_filter_expr(): def check(graph): filter_expr = filter.Edge.property("p2") > 5 - result_ids = graph.nodes.neighbours.edges[filter_expr].id.collect() - expected_ids = [ - [("2", "1"), ("3", "1"), ("3", "4")], - [("2", "1"), ("3", "1"), ("3", "1"), ("3", "4")], - [("2", "1"), ("3", "1"), ("2", "1"), ("3", "4")], - [("3", "1"), ("3", "4")], - [("David Gilmour", "John Mayer"), ("John Mayer", "Jimmy Page")], - [("David Gilmour", "John Mayer"), ("John Mayer", "Jimmy Page")], - [("David Gilmour", "John Mayer"), ("John Mayer", "Jimmy Page")], - ] + result_ids = dict( + zip( + graph.nodes.id, + (sorted(v) for v in graph.nodes.neighbours.edges[filter_expr].id), + ) + ) + expected_ids = { + "1": [("2", "1"), ("3", "1"), ("3", "4")], + "2": [("2", "1"), ("3", "1"), ("3", "1"), ("3", "4")], + "3": [("2", "1"), ("2", "1"), ("3", "1"), ("3", "4")], + "4": [("3", "1"), ("3", "4")], + "David Gilmour": [ + ("David Gilmour", "John Mayer"), + ("John Mayer", "Jimmy Page"), + ], + "John Mayer": [ + ("David Gilmour", "John Mayer"), + ("John Mayer", "Jimmy Page"), + ], + "Jimmy Page": [ + ("David Gilmour", "John Mayer"), + ("John Mayer", "Jimmy Page"), + ], + } assert result_ids == expected_ids filter_expr2 = filter.Edge.property("p20") == "Gold_ship" - result_ids = graph.nodes.neighbours.edges[filter_expr][ - filter_expr2 - ].id.collect() - expected_ids = [ - [], - [], - [], - [], - [("John Mayer", "Jimmy Page")], - [("John Mayer", "Jimmy Page")], - [("John Mayer", "Jimmy Page")], - ] + result_ids = dict( + zip( + graph.nodes.id, + graph.nodes.neighbours.edges[filter_expr][filter_expr2].id.collect(), + ) + ) + expected_ids = { + "1": [], + "2": [], + "3": [], + "4": [], + "David Gilmour": [("John Mayer", "Jimmy Page")], + "John Mayer": [("John Mayer", "Jimmy Page")], + "Jimmy Page": [("John Mayer", "Jimmy Page")], + } assert result_ids == expected_ids filter_expr3 = filter_expr & filter_expr2 - result_ids = graph.nodes.neighbours.edges[filter_expr3].id.collect() - expected_ids = [ - [], - [], - [], - [], - [("John Mayer", "Jimmy Page")], - [("John Mayer", "Jimmy Page")], - [("John Mayer", "Jimmy Page")], - ] + result_ids = dict( + zip(graph.nodes.id, graph.nodes.neighbours.edges[filter_expr3].id.collect()) + ) + expected_ids = { + "1": [], + "2": [], + "3": [], + "4": [], + "David Gilmour": [("John Mayer", "Jimmy Page")], + "John Mayer": [("John Mayer", "Jimmy Page")], + "Jimmy Page": [("John Mayer", "Jimmy Page")], + } assert result_ids == expected_ids return check diff --git a/python/tests/test_base_install/test_filters/test_exploded_edge_filter.py b/python/tests/test_base_install/test_filters/test_exploded_edge_filter.py index 977196b47b..03860e8c3a 100644 --- a/python/tests/test_base_install/test_filters/test_exploded_edge_filter.py +++ b/python/tests/test_base_install/test_filters/test_exploded_edge_filter.py @@ -824,7 +824,7 @@ def test_all_property_types(GraphClass): with pytest.raises(Exception) as e: filter.ExplodedEdge.property("name").fuzzy_search(2, 2, False) - assert "'int' object cannot be converted to 'PyString'" in str(e.value) + assert "argument 'prop_value': 'int' object cannot be cast as 'str'" in str(e.value) missing_prop = [ (filter.ExplodedEdge.property("blah") == 2), diff --git a/python/tests/test_base_install/test_filters/test_node_filter.py b/python/tests/test_base_install/test_filters/test_node_filter.py index 464ca867bf..e95c238493 100644 --- a/python/tests/test_base_install/test_filters/test_node_filter.py +++ b/python/tests/test_base_install/test_filters/test_node_filter.py @@ -436,21 +436,14 @@ def test_filter_nodes_by_column(): graph.add_node(1, 4, {}) graph.add_node(1, 5, {}) + expected = {i: {"bool_col": v % 2 != 0} for (v, i) in enumerate(graph.nodes.id)} actual = alternating_mask(graph) - expected = { - 1: {"bool_col": False}, - 2: {"bool_col": True}, - 3: {"bool_col": False}, - 4: {"bool_col": True}, - 5: {"bool_col": False}, - } assert actual == expected filter_expr = filter.Node.by_state_column(actual, "bool_col") result_ids = sorted(graph.filter(filter_expr).nodes.id) - expected_ids = sorted([2, 4]) + expected_ids = sorted(i for i, v in expected.items() if v["bool_col"]) assert result_ids == expected_ids result_ids = sorted(graph.nodes[filter_expr].id) - expected_ids = sorted([2, 4]) assert result_ids == expected_ids diff --git a/python/tests/test_base_install/test_filters/test_node_property_filter.py b/python/tests/test_base_install/test_filters/test_node_property_filter.py index ef41cdcf46..41824dbd5c 100644 --- a/python/tests/test_base_install/test_filters/test_node_property_filter.py +++ b/python/tests/test_base_install/test_filters/test_node_property_filter.py @@ -867,26 +867,39 @@ def check(graph): assert result_ids == expected_ids filter_expr = filter.Node.property("p100") > 30 - result_ids = sorted(graph.nodes[filter_expr].neighbours.name.collect()) - expected_ids = [["1", "2", "4"], ["2", "3"]] + result_ids = dict( + zip( + graph.nodes[filter_expr].id, + (sorted(v) for v in graph.nodes[filter_expr].neighbours.name), + ) + ) + expected_ids = {"1": ["2", "3"], "3": ["1", "2", "4"]} assert result_ids == expected_ids filter_expr = filter.Node.property("p100") > 30 - result_ids = sorted(graph.filter(filter_expr).nodes.neighbours.name.collect()) - expected_ids = [ - ["1"], - ["3"], - ] # graph filter applies to nodes neighbours as well + result_ids = dict( + zip( + graph.filter(filter_expr).nodes.id, + graph.filter(filter_expr).nodes.neighbours.name.collect(), + ) + ) + expected_ids = { + "3": ["1"], + "1": ["3"], + } # graph filter applies to nodes neighbours as well assert result_ids == expected_ids filter_expr = filter.Node.property("p100") > 30 - result_ids = sorted(graph.nodes[filter_expr].degree()) - expected_ids = [2, 3] + result_ids = graph.nodes[filter_expr].degree() + expected_ids = {"1": 2, "3": 3} assert result_ids == expected_ids filter_expr = filter.Node.property("p100") > 30 - result_ids = sorted(graph.filter(filter_expr).nodes.degree()) - expected_ids = [1, 1] # graph filter applies to nodes neighbours as well + result_ids = graph.filter(filter_expr).nodes.degree() + expected_ids = { + "1": 1, + "3": 1, + } # graph filter applies to nodes neighbours as well assert result_ids == expected_ids # Test 2 @@ -910,47 +923,84 @@ def check(graph): filter_expr = filter.Node.property("p100") > 30 # Test 1 - result_ids = graph.nodes.id.collect() - expected_ids = ["1", "2", "3", "4", "David Gilmour", "John Mayer", "Jimmy Page"] - assert result_ids == expected_ids - - result_ids = graph.nodes.neighbours.id.collect() - expected_ids = [ - ["2", "3"], - ["1", "3"], - ["1", "2", "4"], - ["3"], - ["John Mayer"], - ["David Gilmour", "Jimmy Page"], - ["John Mayer"], - ] - assert result_ids == expected_ids - - result_ids = graph.nodes.neighbours[filter_expr].id.collect() - expected_ids = [["3"], ["1", "3"], ["1"], ["3"], [], [], []] - assert result_ids == expected_ids - - result_ids = graph.nodes.neighbours[filter_expr].neighbours.id.collect() - expected_ids = [ - ["1", "2", "4"], - ["2", "3", "1", "2", "4"], - ["2", "3"], - ["1", "2", "4"], - [], - [], - [], - ] + node_ids = graph.nodes.id.collect() + expected_ids = ["1", "2", "3", "4", "David Gilmour", "Jimmy Page", "John Mayer"] + assert sorted(node_ids) == expected_ids + + result_ids = dict(zip(node_ids, (sorted(v) for v in graph.nodes.neighbours.id))) + expected_ids = { + "1": ["2", "3"], + "2": ["1", "3"], + "3": ["1", "2", "4"], + "4": ["3"], + "David Gilmour": ["John Mayer"], + "John Mayer": ["David Gilmour", "Jimmy Page"], + "Jimmy Page": ["John Mayer"], + } + assert result_ids == expected_ids + + result_ids = dict( + zip(node_ids, (sorted(v) for v in graph.nodes.neighbours[filter_expr].id)) + ) + expected_ids = { + "1": ["3"], + "2": ["1", "3"], + "3": ["1"], + "4": ["3"], + "David Gilmour": [], + "John Mayer": [], + "Jimmy Page": [], + } + assert result_ids == expected_ids + + result_ids = dict( + zip( + node_ids, + (sorted(v) for v in graph.nodes.neighbours[filter_expr].neighbours.id), + ) + ) + expected_ids = { + "1": ["1", "2", "4"], + "2": ["1", "2", "2", "3", "4"], + "3": ["2", "3"], + "4": ["1", "2", "4"], + "David Gilmour": [], + "John Mayer": [], + "Jimmy Page": [], + } assert result_ids == expected_ids # Test 2 filter_expr2 = filter.Node.property("p9") == 5 - result_ids = graph.nodes.neighbours[filter_expr][filter_expr2].id.collect() - expected_ids = [[], ["1"], ["1"], [], [], [], []] + result_ids = dict( + zip( + node_ids, graph.nodes.neighbours[filter_expr][filter_expr2].id.collect() + ) + ) + expected_ids = { + "1": [], + "2": ["1"], + "3": ["1"], + "4": [], + "David Gilmour": [], + "John Mayer": [], + "Jimmy Page": [], + } assert result_ids == expected_ids filter_expr3 = filter_expr & filter_expr2 - result_ids = graph.nodes.neighbours[filter_expr3].id.collect() - expected_ids = [[], ["1"], ["1"], [], [], [], []] + result_ids = dict( + zip(node_ids, graph.nodes.neighbours[filter_expr3].id.collect()) + ) + expected_ids = { + "1": [], + "2": ["1"], + "3": ["1"], + "4": [], + "David Gilmour": [], + "John Mayer": [], + "Jimmy Page": [], + } assert result_ids == expected_ids return check @@ -963,7 +1013,7 @@ def check(graph): assert graph.node("1") is not None # Test 1 - result_ids = graph.node("1").neighbours.id.collect() + result_ids = sorted(graph.node("1").neighbours.id) expected_ids = ["2", "3"] assert result_ids == expected_ids @@ -971,7 +1021,7 @@ def check(graph): expected_ids = ["3"] assert result_ids == expected_ids - result_ids = graph.node("1").neighbours[filter_expr].neighbours.id.collect() + result_ids = sorted(graph.node("1").neighbours[filter_expr].neighbours.id) expected_ids = ["1", "2", "4"] assert result_ids == expected_ids diff --git a/python/tests/test_base_install/test_graphdb/test_algorithms.py b/python/tests/test_base_install/test_graphdb/test_algorithms.py index e9736c713e..ac80a5d138 100644 --- a/python/tests/test_base_install/test_graphdb/test_algorithms.py +++ b/python/tests/test_base_install/test_graphdb/test_algorithms.py @@ -1,8 +1,8 @@ import pytest - -from raphtory import Graph -from raphtory import algorithms -from raphtory import graph_loader +from raphtory import Graph, algorithms, graph_loader +from numpy.linalg import norm +import math +import numpy as np def gen_graph(): @@ -43,7 +43,6 @@ def test_local_clustering_coefficient(): assert actual == expected expected = {k: {"lcc": v} for k, v in expected.items()} actual = algorithms.local_clustering_coefficient_batch(g, list(range(1, 9))) - # actual = {str(i): actual[i]["lcc"] for i in range(1, 9)} assert actual == expected @@ -390,8 +389,7 @@ def test_degree_centrality(): def test_max_min_degree(): from raphtory import Graph - from raphtory.algorithms import max_degree - from raphtory.algorithms import min_degree + from raphtory.algorithms import max_degree, min_degree g = Graph() g.add_edge(0, 0, 1, {}) @@ -506,10 +504,14 @@ def test_betweenness_centrality(): def test_hits_algorithm(): g = graph_loader.lotr_graph() - assert algorithms.hits(g).get("Aldor") == { + expected = { "hub_score": 0.0035840950440615416, "auth_score": 0.007476256228983402, } + actual = algorithms.hits(g).get("Aldor") + assert all( + math.isclose(actual[key], e, rel_tol=1e-6) for key, e in expected.items() + ) def test_balance_algorithm(): @@ -635,7 +637,7 @@ def test_temporal_SEIR(): ########## -def test_nodestate_merge_test(): +def test_nodestate_merge(): from raphtory.algorithms import degree_centrality, pagerank g = Graph() @@ -717,170 +719,41 @@ def test_max_weight_matching(): assert max_weight.dst(3) is None +@pytest.mark.skip(reason="Probability test - to be investigated") def test_fast_rp(): g = Graph() edges = [ (1, 2, 1), (1, 3, 1), (2, 3, 1), + (3, 1, 1), + (2, 1, 1), (4, 5, 1), (4, 6, 1), (4, 7, 1), (5, 6, 1), (5, 7, 1), (6, 7, 1), + (7, 5, 1), (6, 8, 1), ] for src, dst, ts in edges: g.add_edge(ts, src, dst) result = algorithms.fast_rp(g, 16, 1.0, [1.0, 1.0], 42) - baseline = { - 1: [ - 1.6817928305074292, - 0.4204482076268573, - -0.4204482076268573, - 0.0, - 0.0, - 2.1022410381342866, - 0.4204482076268573, - 0.4204482076268573, - 2.1022410381342866, - -0.8408964152537146, - 0.0, - 1.6817928305074292, - 0.0, - -1.6817928305074292, - 0.0, - -0.8408964152537146, - ], - 2: [ - 0.4204482076268573, - 1.6817928305074292, - -1.6817928305074292, - 0.0, - 0.0, - 0.8408964152537146, - 1.6817928305074292, - 1.6817928305074292, - 2.1022410381342866, - -2.1022410381342866, - 0.0, - 0.4204482076268573, - 0.0, - -0.4204482076268573, - 0.0, - -2.1022410381342866, - ], - 3: [ - 0.4204482076268573, - 0.4204482076268573, - -0.4204482076268573, - 0.0, - 0.0, - 2.1022410381342866, - 0.4204482076268573, - 0.4204482076268573, - 0.8408964152537146, - -2.1022410381342866, - 0.0, - 0.4204482076268573, - 0.0, - -0.4204482076268573, - 0.0, - -2.1022410381342866, - ], - 4: [ - -1.4014940254228576, - 0.560597610169143, - 1.121195220338286, - -0.2802988050845715, - 0.2802988050845715, - -0.2802988050845715, - 0.2802988050845715, - 0.0, - -1.6817928305074292, - 0.0, - 0.0, - -0.2802988050845715, - 0.2802988050845715, - 0.2802988050845715, - -0.2802988050845715, - -1.6817928305074292, - ], - 5: [ - 0.0, - 1.9620916355920008, - -1.6817928305074292, - -1.6817928305074292, - 0.2802988050845715, - -0.2802988050845715, - 0.2802988050845715, - 1.4014940254228576, - -0.2802988050845715, - 0.0, - 0.0, - -1.6817928305074292, - 0.2802988050845715, - 0.2802988050845715, - -0.2802988050845715, - 1.121195220338286, - ], - 6: [ - -0.21022410381342865, - 0.6306723114402859, - -1.6817928305074292, - -1.4715687266940005, - 1.6817928305074292, - -1.6817928305074292, - 0.0, - -1.4715687266940005, - -0.21022410381342865, - 0.0, - 0.0, - -0.4204482076268573, - 1.6817928305074292, - 0.21022410381342865, - -0.21022410381342865, - -0.21022410381342865, - ], - 7: [ - 1.4014940254228576, - 1.9620916355920008, - -0.2802988050845715, - 1.121195220338286, - 0.2802988050845715, - -0.2802988050845715, - 1.6817928305074292, - 0.0, - -0.2802988050845715, - 0.0, - 0.0, - -0.2802988050845715, - 0.2802988050845715, - 1.6817928305074292, - -1.6817928305074292, - -1.6817928305074292, - ], - 8: [ - -1.6817928305074292, - 1.6817928305074292, - -0.8408964152537146, - 0.8408964152537146, - 0.8408964152537146, - -0.8408964152537146, - -1.6817928305074292, - -0.8408964152537146, - 0.0, - 0.0, - 0.0, - -1.6817928305074292, - 0.8408964152537146, - 0.0, - 0.0, - 0.0, - ], - } - result = {n.id: v["embedding_state"] for n, v in result.items()} - assert result == baseline + + group_1 = [1, 2, 3] + group_2 = [4, 5, 6, 7] + + d1 = max( + norm(np.array(result[i]) - np.array(result[j])) + for i in group_1 + for j in group_1 + ) + d2 = min( + norm(np.array(result[i]) - np.array(result[j])) + for i in group_1 + for j in group_2 + ) + assert d1 < d2 diff --git a/python/tests/test_base_install/test_graphdb/test_event_time.py b/python/tests/test_base_install/test_graphdb/test_event_time.py index fef6a73e5e..27d2833bef 100644 --- a/python/tests/test_base_install/test_graphdb/test_event_time.py +++ b/python/tests/test_base_install/test_graphdb/test_event_time.py @@ -98,13 +98,13 @@ def test_time_input_parsing(example_graph): assert gw.nodes == [1], f"Unexpected nodes for end={end!r}" assert g.window(86400000, 88200000).nodes == [2] - assert g.window(86400000, 88200001).nodes == [2, 3] + assert g.window(86400000, 88200001).nodes.id.sorted_by_id() == [2, 3] gw = g.window(88200000, "2000-01-01") assert gw.nodes == [3] gw = g.window(88200000, "2000-01-01 00:00:01") - assert gw.nodes == [3, 4] + assert gw.nodes.id.sorted_by_id() == [3, 4] gw = g.window(88200000, "2000-01-02") - assert gw.nodes == [3, 4] + assert gw.nodes.id.sorted_by_id() == [3, 4] def test_optional_event_time_none_comparison(): diff --git a/python/tests/test_base_install/test_graphdb/test_graphdb.py b/python/tests/test_base_install/test_graphdb/test_graphdb.py index 2c8811d200..525cc9db6b 100644 --- a/python/tests/test_base_install/test_graphdb/test_graphdb.py +++ b/python/tests/test_base_install/test_graphdb/test_graphdb.py @@ -1,28 +1,28 @@ from __future__ import unicode_literals -from decimal import Decimal + import math -import sys +import os +import pickle import random import re - -import pandas as pd -import pandas.core.frame -import pytest -import pyarrow as pa -from raphtory import Graph, PersistentGraph -from raphtory import algorithms -from raphtory import graph_loader +import shutil +import string +import sys import tempfile +from decimal import Decimal from math import isclose from datetime import date, datetime, timezone import string from pathlib import Path -from pytest import fixture -from numpy.testing import assert_equal as check_arr -import os -import shutil + import numpy as np -import pickle +import pandas as pd +import pandas.core.frame +import pyarrow as pa +import pytest +from numpy.testing import assert_equal as check_arr +from pytest import fixture +from raphtory import Graph, PersistentGraph, algorithms, graph_loader from utils import with_disk_graph base_dir = Path(__file__).parent @@ -256,29 +256,19 @@ def test_windowed_graph_edges(): def check(g): view = g.window(0, sys.maxsize) - tedges = [v.edges for v in view.nodes] - edges = [] - for e_iter in tedges: - for e in e_iter: - edges.append([e.src.id, e.dst.id]) - - assert edges == [[1, 1], [1, 2], [1, 3], [1, 2], [3, 2], [1, 3], [3, 2]] - - tedges = [v.in_edges for v in view.nodes] - in_edges = [] - for e_iter in tedges: - for e in e_iter: - in_edges.append([e.src.id, e.dst.id]) + edges = {v.id: sorted(v.edges.id) for v in view.nodes} + assert edges == { + 1: [(1, 1), (1, 2), (1, 3)], + 2: [(1, 2), (3, 2)], + 3: [(1, 3), (3, 2)], + } - assert in_edges == [[1, 1], [1, 2], [3, 2], [1, 3]] + in_edges = {v.id: sorted(v.in_edges.id) for v in view.nodes} + assert in_edges == {1: [(1, 1)], 2: [(1, 2), (3, 2)], 3: [(1, 3)]} - tedges = [v.out_edges for v in view.nodes] - out_edges = [] - for e_iter in tedges: - for e in e_iter: - out_edges.append([e.src.id, e.dst.id]) + out_edges = {v.id: sorted(v.out_edges.id) for v in view.nodes} - assert out_edges == [[1, 1], [1, 2], [1, 3], [3, 2]] + assert out_edges == {1: [(1, 1), (1, 2), (1, 3)], 2: [], 3: [(3, 2)]} check(g) @@ -321,14 +311,20 @@ def check(g): view = g.window(min_size, max_size) - neighbours = view.nodes.neighbours.id.collect() - assert neighbours == [[1, 2, 3], [1, 3], [1, 2]] + neighbours = dict( + zip(view.nodes.id, (sorted(v) for v in view.nodes.neighbours.id)) + ) + assert neighbours == {1: [1, 2, 3], 2: [1, 3], 3: [1, 2]} - in_neighbours = view.nodes.in_neighbours.id.collect() - assert in_neighbours == [[1, 2], [1, 3], [1]] + in_neighbours = dict( + zip(view.nodes.id, (sorted(v) for v in view.nodes.in_neighbours.id)) + ) + assert in_neighbours == {1: [1, 2], 2: [1, 3], 3: [1]} - out_neighbours = view.nodes.out_neighbours.id.collect() - assert out_neighbours == [[1, 2, 3], [1], [2]] + out_neighbours = dict( + zip(view.nodes.id, (sorted(v) for v in view.nodes.out_neighbours.id)) + ) + assert out_neighbours == {1: [1, 2, 3], 2: [1], 3: [2]} check(g) @@ -1257,7 +1253,7 @@ def test_save_missing_dir(): g = create_graph() tmpdirname = tempfile.TemporaryDirectory() inner_folder = "".join(random.choice(string.ascii_letters) for _ in range(10)) - graph_path = tmpdirname.name + "/" + inner_folder + "/test_graph.bin" + graph_path = tmpdirname.name + "/" + inner_folder + "/test_graph" with pytest.raises(Exception): g.save_to_file(graph_path) @@ -1687,46 +1683,39 @@ def check(g): def test_edge_history(): + expected_history = {(1, 2): [1, 3], (1, 3): [2], (1, 4): [4]} g = Graph() + for (src, dst), timestamps in expected_history.items(): + for t in timestamps: + g.add_edge(t, src, dst) - g.add_edge(1, 1, 2) - g.add_edge(2, 1, 3) - g.add_edge(3, 1, 2) - g.add_edge(4, 1, 4) - - @with_disk_graph - def check(g): - view = g.window(1, 5) - view2 = g.window(1, 4) - - check_arr(g.edge(1, 2).history.t.collect(), [1, 3]) - check_arr(view.edge(1, 4).history.t.collect(), [4]) - check_arr(g.edges.history.t.collect(), [[1, 3], [2], [4]]) - check_arr(view2.edges.history.t.collect(), [[1, 3], [2]]) + view = g.window(1, 5) + view2 = g.window(1, 4) - old_way = [] - for e in g.edges: - old_way.append(e.history.collect()) - check_arr(g.edges.history.collect(), old_way) + check_arr(g.edge(1, 2).history.t.collect(), expected_history[(1, 2)]) + check_arr(view.edge(1, 4).history.t.collect(), expected_history[(1, 4)]) + check_arr(g.edges.history.t.collect(), [expected_history[e] for e in g.edges.id]) + assert sorted(view2.edges.id) == [(1, 2), (1, 3)] + check_arr( + view2.edges.history.t.collect(), [expected_history[e] for e in view2.edges.id] + ) - check_arr( - g.nodes.edges.history.t.collect(), - [ - [[1, 3], [2], [4]], - [[1, 3]], - [[2]], - [[4]], - ], - ) + old_way = [] + for e in g.edges: + old_way.append(e.history.collect()) + check_arr(g.edges.history.collect(), old_way) - old_way2 = [] - for edges in g.nodes.edges: - for edge in edges: - old_way2.append(edge.history.collect()) - new_way = g.nodes.edges.history.collect() - check_arr([np.array(item) for sublist in new_way for item in sublist], old_way2) + res = g.nodes.edges.history.t.collect() + for node, v in zip(g.nodes, res): + for e, vv in zip(node.edges.id, v): + check_arr(vv, expected_history[e]) - check(g) + old_way2 = [] + for edges in g.nodes.edges: + for edge in edges: + old_way2.append(edge.history.collect()) + new_way = g.nodes.edges.history.collect() + check_arr([np.array(item) for sublist in new_way for item in sublist], old_way2) def test_lotr_edge_history(): @@ -2230,7 +2219,7 @@ def test_exclude_nodes(): @with_disk_graph def check(g): exclude_nodes = g.exclude_nodes([1]) - assert exclude_nodes.nodes.id.collect() == [2, 3] + assert sorted(exclude_nodes.nodes.id.collect()) == [2, 3] check(g) @@ -2274,9 +2263,11 @@ def check_g_inner(mg): assert mg.node(4).metadata.get("abc") == "xyz" check_arr(mg.node(1).history.t.collect(), [-1, 0, 0, 1, 1, 2]) check_arr(mg.node(4).history.t.collect(), [6, 8]) - assert mg.nodes.id.collect() == [1, 2, 3, 4] + assert len(mg.nodes.id.collect()) == 4 + assert set(mg.nodes.id.collect()) == {1, 3, 2, 4} assert set(mg.edges.id) == {(1, 1), (1, 2), (1, 3), (2, 1), (3, 2), (2, 4)} - assert g.nodes.id.collect() == mg.nodes.id.collect() + assert len(g.nodes.id.collect()) == len(mg.nodes.id.collect()) + assert set(g.nodes.id.collect()) == set(mg.nodes.id.collect()) assert set(g.edges.id) == set(mg.edges.id) assert mg.node(1).metadata == {} assert mg.node(4).metadata == {"abc": "xyz"} @@ -2632,12 +2623,8 @@ def test_type_filter(): g.add_node(1, 3, node_type="timer") g.add_node(1, 4, node_type="wallet") - @with_disk_graph - def check(g): - assert [node.name for node in g.nodes.type_filter(["wallet"])] == ["1", "4"] - assert g.subgraph_node_types(["timer"]).nodes.name.collect() == ["2", "3"] - - check(g) + assert sorted(node.name for node in g.nodes.type_filter(["wallet"])) == ["1", "4"] + assert g.subgraph_node_types(["timer"]).nodes.name.sorted_by_id() == ["2", "3"] g = PersistentGraph() g.add_node(1, 1, node_type="wallet") @@ -2645,23 +2632,19 @@ def check(g): g.add_node(3, 3, node_type="timer") g.add_node(4, 4, node_type="wallet") - # @with_disk_graph # FIXME PersistentGraph cannot be used with with_disk_graph - def check(g): - assert [node.name for node in g.nodes.type_filter(["wallet"])] == ["1", "4"] - assert g.subgraph_node_types(["timer"]).nodes.name.collect() == ["2", "3"] - - subgraph = g.subgraph([1, 2, 3]) - assert [node.name for node in subgraph.nodes.type_filter(["wallet"])] == ["1"] - assert subgraph.subgraph_node_types(["timer"]).nodes.name.collect() == [ - "2", - "3", - ] + assert sorted(node.name for node in g.nodes.type_filter(["wallet"])) == ["1", "4"] + assert sorted(g.subgraph_node_types(["timer"]).nodes.name.collect()) == ["2", "3"] - w = g.window(1, 4) - assert [node.name for node in w.nodes.type_filter(["wallet"])] == ["1"] - assert w.subgraph_node_types(["timer"]).nodes.name.collect() == ["2", "3"] + subgraph = g.subgraph([1, 2, 3]) + assert [node.name for node in subgraph.nodes.type_filter(["wallet"])] == ["1"] + assert sorted(subgraph.subgraph_node_types(["timer"]).nodes.name.collect()) == [ + "2", + "3", + ] - check(g) + w = g.window(1, 4) + assert [node.name for node in w.nodes.type_filter(["wallet"])] == ["1"] + assert sorted(w.subgraph_node_types(["timer"]).nodes.name.collect()) == ["2", "3"] g = Graph() g.add_node(1, 1, node_type="wallet") @@ -2672,13 +2655,12 @@ def check(g): g.add_edge(2, 2, 3, layer="layer1") g.add_edge(3, 2, 4, layer="layer2") - @with_disk_graph - def check(g): - layer = g.layers(["layer1"]) - assert [node.name for node in layer.nodes.type_filter(["wallet"])] == ["1"] - assert layer.subgraph_node_types(["timer"]).nodes.name.collect() == ["2", "3"] - - check(g) + layer = g.layers(["layer1"]) + assert [node.name for node in layer.nodes.type_filter(["wallet"])] == ["1"] + assert sorted(layer.subgraph_node_types(["timer"]).nodes.name.collect()) == [ + "2", + "3", + ] g = Graph() g.add_node(1, 1, node_type="a") @@ -2698,84 +2680,117 @@ def check(g): g.add_edge(2, 5, 6, layer="a") g.add_edge(2, 3, 6, layer="a") - # @with_disk_graph # FIXME: add support for type_filters + layers support on edges - def check(g): - assert g.nodes.type_filter([""]).name.collect() == ["7", "8", "9"] - - assert g.nodes.type_filter(["a"]).name.collect() == ["1", "4"] - assert g.nodes.type_filter(["a", "c"]).name.collect() == ["1", "4", "5"] - assert g.nodes.type_filter(["a"]).neighbours.name.collect() == [ - ["2"], - ["2", "5"], - ] + assert sorted(g.nodes.type_filter([""]).name.collect()) == ["7", "8", "9"] - assert g.nodes.degree().collect() == [1, 3, 2, 2, 2, 2, 0, 0, 0] - assert g.nodes.type_filter(["a"]).degree().collect() == [1, 2] - assert g.nodes.type_filter(["d"]).degree().collect() == [] - assert g.nodes.type_filter([]).name.collect() == [] + assert sorted(g.nodes.type_filter(["a"]).name.collect()) == ["1", "4"] + assert sorted(g.nodes.type_filter(["a", "c"]).name.collect()) == ["1", "4", "5"] + assert dict( + zip( + g.nodes.type_filter(["a"]).id, + (sorted(v) for v in g.nodes.type_filter(["a"]).neighbours.name), + ) + ) == { + 1: ["2"], + 4: ["2", "5"], + } - assert len(g.nodes) == 9 - assert len(g.nodes.type_filter(["b"])) == 2 - assert len(g.nodes.type_filter(["d"])) == 0 + assert g.nodes.degree() == {1: 1, 2: 3, 3: 2, 4: 2, 5: 2, 6: 2, 7: 0, 8: 0, 9: 0} + assert g.nodes.type_filter(["a"]).degree() == {1: 1, 4: 2} + assert g.nodes.type_filter(["d"]).degree().collect() == [] + assert g.nodes.type_filter([]).name.collect() == [] - assert g.nodes.type_filter(["d"]).neighbours.name.collect() == [] - assert g.nodes.type_filter(["a"]).neighbours.name.collect() == [ - ["2"], - ["2", "5"], - ] - assert g.nodes.type_filter(["a", "c"]).neighbours.name.collect() == [ - ["2"], - ["2", "5"], - ["4", "6"], - ] + assert len(g.nodes) == 9 + assert len(g.nodes.type_filter(["b"])) == 2 + assert len(g.nodes.type_filter(["d"])) == 0 - assert g.nodes.type_filter(["a"]).neighbours.type_filter( - ["c"] - ).name.collect() == [ - [], - ["5"], - ] - assert g.nodes.type_filter(["a"]).neighbours.type_filter([]).name.collect() == [ - [], - [], - ] - assert g.nodes.type_filter(["a"]).neighbours.type_filter( - ["b", "c"] - ).name.collect() == [["2"], ["2", "5"]] - assert g.nodes.type_filter(["a"]).neighbours.type_filter( - ["d"] - ).name.collect() == [ - [], - [], - ] - assert g.nodes.type_filter(["a"]).neighbours.neighbours.name.collect() == [ - ["1", "3", "4"], - ["1", "3", "4", "4", "6"], - ] - assert g.nodes.type_filter(["a"]).neighbours.type_filter( - ["c"] - ).neighbours.name.collect() == [[], ["4", "6"]] - assert g.nodes.type_filter(["a"]).neighbours.type_filter( - ["d"] - ).neighbours.name.collect() == [[], []] - - assert g.node("2").neighbours.type_filter(["b"]).name.collect() == ["3"] - assert g.node("2").neighbours.type_filter(["d"]).name.collect() == [] - assert g.node("2").neighbours.type_filter([]).name.collect() == [] - assert g.node("2").neighbours.type_filter(["c", "a"]).name.collect() == [ - "1", - "4", - ] - assert g.node("2").neighbours.type_filter(["c"]).neighbours.name.collect() == [] - assert g.node("2").neighbours.neighbours.name.collect() == [ - "2", - "2", - "6", - "2", - "5", - ] + assert g.nodes.type_filter(["d"]).neighbours.name.collect() == [] + assert dict( + zip( + g.nodes.type_filter(["a"]).id, + (sorted(v) for v in g.nodes.type_filter(["a"]).neighbours.name), + ) + ) == { + 1: ["2"], + 4: ["2", "5"], + } + assert dict( + zip( + g.nodes.type_filter(["a", "c"]).id, + (sorted(v) for v in g.nodes.type_filter(["a", "c"]).neighbours.name), + ) + ) == { + 1: ["2"], + 4: ["2", "5"], + 5: ["4", "6"], + } - check(g) + assert dict( + zip( + g.nodes.type_filter(["a"]).id, + g.nodes.type_filter(["a"]).neighbours.type_filter(["c"]).name.collect(), + ) + ) == { + 1: [], + 4: ["5"], + } + assert g.nodes.type_filter(["a"]).neighbours.type_filter([]).name.collect() == [ + [], + [], + ] + assert dict( + zip( + g.nodes.type_filter(["a"]).id, + ( + sorted(v) + for v in g.nodes.type_filter(["a"]) + .neighbours.type_filter(["b", "c"]) + .name + ), + ) + ) == {1: ["2"], 4: ["2", "5"]} + assert g.nodes.type_filter(["a"]).neighbours.type_filter(["d"]).name.collect() == [ + [], + [], + ] + assert dict( + zip( + g.nodes.type_filter(["a"]).id, + (sorted(v) for v in g.nodes.type_filter(["a"]).neighbours.neighbours.name), + ) + ) == { + 1: ["1", "3", "4"], + 4: ["1", "3", "4", "4", "6"], + } + assert dict( + zip( + g.nodes.type_filter(["a"]).id, + ( + sorted(v) + for v in g.nodes.type_filter(["a"]) + .neighbours.type_filter(["c"]) + .neighbours.name + ), + ) + ) == {1: [], 4: ["4", "6"]} + assert g.nodes.type_filter(["a"]).neighbours.type_filter( + ["d"] + ).neighbours.name.collect() == [[], []] + + assert g.node("2").neighbours.type_filter(["b"]).name.collect() == ["3"] + assert g.node("2").neighbours.type_filter(["d"]).name.collect() == [] + assert g.node("2").neighbours.type_filter([]).name.collect() == [] + assert sorted(g.node("2").neighbours.type_filter(["c", "a"]).name.collect()) == [ + "1", + "4", + ] + assert g.node("2").neighbours.type_filter(["c"]).neighbours.name.collect() == [] + assert sorted(g.node("2").neighbours.neighbours.name.collect()) == [ + "2", + "2", + "2", + "5", + "6", + ] def test_time_exploded_edges(): @@ -2827,24 +2842,16 @@ def test_leading_zeroes_ids(): g.add_node(0, "001") g.add_node(0, "0001") - @with_disk_graph - def check(g): - assert g.count_nodes() == 4 - assert g.nodes.name.collect() == ["1", "01", "001", "0001"] - - check(g) + assert g.count_nodes() == 4 + assert sorted(g.nodes.name.collect()) == ["0001", "001", "01", "1"] g = Graph() g.add_node(0, 0) g.add_node(1, 0) - # @with_disk_graph # FIXME: need special handling for nodes additions from Graph - def check(g): - check_arr(g.node(0).history.t.collect(), [0, 1]) - check_arr(g.node("0").history.t.collect(), [0, 1]) - assert g.nodes.name.collect() == ["0"] - - check(g) + check_arr(g.node(0).history.t.collect(), [0, 1]) + check_arr(g.node("0").history.t.collect(), [0, 1]) + assert g.nodes.name.collect() == ["0"] def test_node_types(): diff --git a/python/tests/test_base_install/test_graphdb/test_graphdb_imports.py b/python/tests/test_base_install/test_graphdb/test_graphdb_imports.py index c0aa8624b0..d26463370e 100644 --- a/python/tests/test_base_install/test_graphdb/test_graphdb_imports.py +++ b/python/tests/test_base_install/test_graphdb/test_graphdb_imports.py @@ -300,7 +300,7 @@ def test_import_edges(): g2 = Graph() g2.import_edges(g.edges) assert g2.count_edges() == 3 - assert g.edges.id == g2.edges.id + assert sorted(g.edges.id) == sorted(g2.edges.id) def test_import_edges_iterator(): @@ -314,4 +314,4 @@ def test_import_edges_iterator(): g2 = Graph() g2.import_edges(iter(g.edges)) assert g2.count_edges() == 3 - assert g.edges.id == g2.edges.id + assert sorted(g.edges.id) == sorted(g2.edges.id) diff --git a/python/tests/test_base_install/test_graphdb/test_latest_graph.py b/python/tests/test_base_install/test_graphdb/test_latest_graph.py index a3e63c3ec3..6262fd9119 100644 --- a/python/tests/test_base_install/test_graphdb/test_latest_graph.py +++ b/python/tests/test_base_install/test_graphdb/test_latest_graph.py @@ -117,46 +117,112 @@ def test_persistent_edge_latest(): assert wg.edge(1, 4).latest().is_active() - assert g.edges.latest().earliest_time.collect() == [6, 6, None] - assert g.edges.latest().latest_time.collect() == [6, 6, None] - - assert g.edges.latest().is_active().collect() == [False, True, False] - assert g.edges.latest().is_deleted().collect() == [False, False, True] - assert g.edges.latest().is_valid().collect() == [True, True, False] + assert dict(zip(g.edges.id, g.edges.latest().earliest_time.collect())) == { + (1, 2): 6, + (1, 3): 6, + (1, 4): None, + } + assert dict(zip(g.edges.id, g.edges.latest().latest_time.collect())) == { + (1, 2): 6, + (1, 3): 6, + (1, 4): None, + } + + assert dict(zip(g.edges.id, g.edges.latest().is_active().collect())) == { + (1, 2): False, + (1, 3): True, + (1, 4): False, + } + assert dict(zip(g.edges.id, g.edges.latest().is_deleted().collect())) == { + (1, 2): False, + (1, 3): False, + (1, 4): True, + } + assert dict(zip(g.edges.id, g.edges.latest().is_valid().collect())) == { + (1, 2): True, + (1, 3): True, + (1, 4): False, + } assert wg.edges.latest().earliest_time.collect() == [5, 5, 5] assert wg.edges.latest().latest_time.collect() == [5, 5, 5] - assert wg.edges.latest().is_active().collect() == [False, True, True] - assert wg.edges.latest().is_deleted().collect() == [False, False, False] + assert dict(zip(g.edges.id, wg.edges.latest().is_active().collect())) == { + (1, 2): False, + (1, 3): True, + (1, 4): True, + } + assert dict(zip(g.edges.id, wg.edges.latest().is_deleted().collect())) == { + (1, 2): False, + (1, 3): False, + (1, 4): False, + } assert wg.edges.latest().is_valid().collect() == [True, True, True] - assert g.nodes.edges.latest().earliest_time.collect() == [ - [6, 6, None], - [6], - [6], - [None], - ] - assert g.nodes.edges.latest().latest_time.collect() == [ - [6, 6, None], - [6], - [6], - [None], - ] - assert g.nodes.edges.latest().is_active().collect() == [ - [False, True, False], - [False], - [True], - [False], - ] - - assert wg.nodes.edges.latest().earliest_time.collect() == [[5, 5, 5], [5], [5], [5]] - assert wg.nodes.edges.latest().latest_time.collect() == [[5, 5, 5], [5], [5], [5]] - assert wg.nodes.edges.latest().is_active().collect() == [ - [False, True, True], - [False], - [True], - [True], - ] + res = { + n.id: {e: v for e, v in zip(n.edges.id, ev)} + for n, ev in zip(g.nodes, g.nodes.edges.latest().earliest_time.collect()) + } + assert res == { + 1: {(1, 2): 6, (1, 3): 6, (1, 4): None}, + 2: {(1, 2): 6}, + 3: {(1, 3): 6}, + 4: {(1, 4): None}, + } + + res = { + n.id: {e: v for e, v in zip(n.edges.id, ev)} + for n, ev in zip(g.nodes, g.nodes.edges.latest().latest_time.collect()) + } + assert res == { + 1: {(1, 2): 6, (1, 3): 6, (1, 4): None}, + 2: {(1, 2): 6}, + 3: {(1, 3): 6}, + 4: {(1, 4): None}, + } + + res = { + n.id: {e: v for e, v in zip(n.edges.id, ev)} + for n, ev in zip(g.nodes, g.nodes.edges.latest().is_active().collect()) + } + assert res == { + 1: {(1, 2): False, (1, 3): True, (1, 4): False}, + 2: {(1, 2): False}, + 3: {(1, 3): True}, + 4: {(1, 4): False}, + } + + res = { + n.id: {e: v for e, v in zip(n.edges.id, ev)} + for n, ev in zip(g.nodes, wg.nodes.edges.latest().earliest_time.collect()) + } + assert res == { + 1: {(1, 2): 5, (1, 3): 5, (1, 4): 5}, + 2: {(1, 2): 5}, + 3: {(1, 3): 5}, + 4: {(1, 4): 5}, + } + + res = { + n.id: {e: v for e, v in zip(n.edges.id, ev)} + for n, ev in zip(g.nodes, wg.nodes.edges.latest().latest_time.collect()) + } + assert res == { + 1: {(1, 2): 5, (1, 3): 5, (1, 4): 5}, + 2: {(1, 2): 5}, + 3: {(1, 3): 5}, + 4: {(1, 4): 5}, + } + + res = { + n.id: {e: v for e, v in zip(n.edges.id, ev)} + for n, ev in zip(g.nodes, wg.nodes.edges.latest().is_active().collect()) + } + assert res == { + 1: {(1, 2): False, (1, 3): True, (1, 4): True}, + 2: {(1, 2): False}, + 3: {(1, 3): True}, + 4: {(1, 4): True}, + } def test_persistent_node_latest(): diff --git a/python/tests/test_base_install/test_graphql/edit_graph/test_archive_graph.py b/python/tests/test_base_install/test_graphql/edit_graph/test_archive_graph.py index 858dd15f30..64abdc470e 100644 --- a/python/tests/test_base_install/test_graphql/edit_graph/test_archive_graph.py +++ b/python/tests/test_base_install/test_graphql/edit_graph/test_archive_graph.py @@ -21,7 +21,7 @@ def test_archive_graph_fails_if_graph_not_found(): }""" with pytest.raises(Exception) as excinfo: client.query(query) - assert "Graph not found" in str(excinfo.value) + assert "Graph 'g1' does not exist" in str(excinfo.value) def test_archive_graph_fails_if_graph_not_found_at_namespace(): @@ -38,7 +38,7 @@ def test_archive_graph_fails_if_graph_not_found_at_namespace(): }""" with pytest.raises(Exception) as excinfo: client.query(query) - assert "Graph not found" in str(excinfo.value) + assert "Graph 'shivam/g1' does not exist" in str(excinfo.value) def test_archive_graph_succeeds(): diff --git a/python/tests/test_base_install/test_graphql/edit_graph/test_copy_graph.py b/python/tests/test_base_install/test_graphql/edit_graph/test_copy_graph.py index 3d72683421..734e08cce9 100644 --- a/python/tests/test_base_install/test_graphql/edit_graph/test_copy_graph.py +++ b/python/tests/test_base_install/test_graphql/edit_graph/test_copy_graph.py @@ -20,7 +20,7 @@ def test_copy_graph_fails_if_graph_not_found(): }""" with pytest.raises(Exception) as excinfo: client.query(query) - assert "Graph not found" in str(excinfo.value) + assert "Graph 'ben/g5' does not exist" in str(excinfo.value) def test_copy_graph_fails_if_graph_with_same_name_already_exists(): @@ -45,7 +45,7 @@ def test_copy_graph_fails_if_graph_with_same_name_already_exists(): }""" with pytest.raises(Exception) as excinfo: client.query(query) - assert "Graph already exists by name" in str(excinfo.value) + assert "Graph 'g6' already exists" in str(excinfo.value) def test_copy_graph_fails_if_graph_with_same_name_already_exists_at_same_namespace_as_graph(): @@ -70,7 +70,7 @@ def test_copy_graph_fails_if_graph_with_same_name_already_exists_at_same_namespa }""" with pytest.raises(Exception) as excinfo: client.query(query) - assert "Graph already exists by name" in str(excinfo.value) + assert "Graph 'ben/g6' already exists" in str(excinfo.value) def test_copy_graph_fails_if_graph_with_same_name_already_exists_at_diff_namespace_as_graph(): @@ -96,7 +96,7 @@ def test_copy_graph_fails_if_graph_with_same_name_already_exists_at_diff_namespa }""" with pytest.raises(Exception) as excinfo: client.query(query) - assert "Graph already exists by name" in str(excinfo.value) + assert "Graph 'shivam/g6' already exists" in str(excinfo.value) def test_copy_graph_succeeds(): diff --git a/python/tests/test_base_install/test_graphql/edit_graph/test_delete_graph.py b/python/tests/test_base_install/test_graphql/edit_graph/test_delete_graph.py index 29b7a1d2b1..7b74574344 100644 --- a/python/tests/test_base_install/test_graphql/edit_graph/test_delete_graph.py +++ b/python/tests/test_base_install/test_graphql/edit_graph/test_delete_graph.py @@ -18,7 +18,7 @@ def test_delete_graph_fails_if_graph_not_found(): }""" with pytest.raises(Exception) as excinfo: client.query(query) - assert "Graph not found" in str(excinfo.value) + assert "Graph 'ben/g5' does not exist" in str(excinfo.value) def test_delete_graph_succeeds_if_graph_found(): @@ -30,8 +30,7 @@ def test_delete_graph_succeeds_if_graph_found(): g.add_edge(1, "ben", "hamza") g.add_edge(2, "haaroon", "hamza") g.add_edge(3, "ben", "haaroon") - - g.save_to_file(os.path.join(work_dir, "g1")) + client.send_graph("g1", g) query = """mutation { deleteGraph( @@ -43,7 +42,7 @@ def test_delete_graph_succeeds_if_graph_found(): query = """{graph(path: "g1") {nodes {list {name}}}}""" with pytest.raises(Exception) as excinfo: client.query(query) - assert "Graph not found" in str(excinfo.value) + assert "Graph 'g1' does not exist" in str(excinfo.value) def test_delete_graph_using_client_api_succeeds_if_graph_found(): @@ -62,7 +61,7 @@ def test_delete_graph_using_client_api_succeeds_if_graph_found(): query = """{graph(path: "g1") {nodes {list {name}}}}""" with pytest.raises(Exception) as excinfo: client.query(query) - assert "Graph not found" in str(excinfo.value) + assert "Graph 'g1' does not exist" in str(excinfo.value) def test_delete_graph_succeeds_if_graph_found_at_namespace(): @@ -87,4 +86,4 @@ def test_delete_graph_succeeds_if_graph_found_at_namespace(): query = """{graph(path: "g1") {nodes {list {name}}}}""" with pytest.raises(Exception) as excinfo: client.query(query) - assert "Graph not found" in str(excinfo.value) + assert "Graph 'g1' does not exist" in str(excinfo.value) diff --git a/python/tests/test_base_install/test_graphql/edit_graph/test_get_graph.py b/python/tests/test_base_install/test_graphql/edit_graph/test_get_graph.py index 740278d623..6f22bc0928 100644 --- a/python/tests/test_base_install/test_graphql/edit_graph/test_get_graph.py +++ b/python/tests/test_base_install/test_graphql/edit_graph/test_get_graph.py @@ -16,7 +16,7 @@ def test_get_graph_fails_if_graph_not_found(): query = """{ graph(path: "g1") { name, path, nodes { list { name } } } }""" with pytest.raises(Exception) as excinfo: client.query(query) - assert "Graph not found" in str(excinfo.value) + assert "Graph 'g1' does not exist" in str(excinfo.value) def test_get_graph_fails_if_graph_not_found_at_namespace(): @@ -29,7 +29,7 @@ def test_get_graph_fails_if_graph_not_found_at_namespace(): ) with pytest.raises(Exception) as excinfo: client.query(query) - assert "Graph not found" in str(excinfo.value) + assert "Graph 'shivam/g1' does not exist" in str(excinfo.value) def test_get_graph_succeeds_if_graph_found(): diff --git a/python/tests/test_base_install/test_graphql/edit_graph/test_graphql.py b/python/tests/test_base_install/test_graphql/edit_graph/test_graphql.py index 3f21bdca32..9c0f624be9 100644 --- a/python/tests/test_base_install/test_graphql/edit_graph/test_graphql.py +++ b/python/tests/test_base_install/test_graphql/edit_graph/test_graphql.py @@ -1,18 +1,16 @@ +import json import os import tempfile - import pytest - +from utils import sort_by_gql_name_or_id +from raphtory import Graph, graph_loader from raphtory.graphql import ( GraphServer, RaphtoryClient, - encode_graph, - decode_graph, RemoteGraph, + decode_graph, + encode_graph, ) -from raphtory import graph_loader -from raphtory import Graph -import json def normalize_path(path): @@ -156,39 +154,47 @@ def assert_graph_fetch(path): path = "../shivam/g" with pytest.raises(Exception) as excinfo: client.send_graph(path=path, graph=g, overwrite=True) - assert "References to the parent dir are not allowed within the path:" in str( - excinfo.value + assert ( + "Invalid path '../shivam/g': References to the parent dir are not allowed within the path" + in str(excinfo.value) ) path = "./shivam/g" with pytest.raises(Exception) as excinfo: client.send_graph(path=path, graph=g, overwrite=True) - assert "References to the current dir are not allowed within the path" in str( - excinfo.value + assert ( + "Invalid path './shivam/g': References to the current dir are not allowed within the path" + in str(excinfo.value) ) path = "shivam/../../../../investigation/g" with pytest.raises(Exception) as excinfo: client.send_graph(path=path, graph=g, overwrite=True) - assert "References to the parent dir are not allowed within the path:" in str( - excinfo.value + assert ( + "Invalid path 'shivam/../../../../investigation/g': References to the parent dir are not allowed within the path" + in str(excinfo.value) ) path = "//shivam/investigation/g" with pytest.raises(Exception) as excinfo: client.send_graph(path=path, graph=g, overwrite=True) - assert "Double forward slashes are not allowed in path" in str(excinfo.value) + assert ( + "Invalid path '//shivam/investigation/g': Double forward slashes are not allowed in path" + in str(excinfo.value) + ) path = "shivam/investigation//2024-12-12/g" with pytest.raises(Exception) as excinfo: client.send_graph(path=path, graph=g, overwrite=True) - assert "Double forward slashes are not allowed in path" in str(excinfo.value) + assert ( + "Invalid path 'shivam/investigation//2024-12-12/g': Double forward slashes are not allowed in path" + in str(excinfo.value) + ) path = r"shivam/investigation\2024-12-12" with pytest.raises(Exception) as excinfo: client.send_graph(path=path, graph=g, overwrite=True) - assert "Backslash not allowed in path" in str(excinfo.value) - + assert r"Backslash not allowed in path" in str(excinfo.value) # Test if we can escape through a symlink tmp_dir2 = tempfile.mkdtemp() nested_dir = os.path.join(tmp_work_dir, "shivam", "graphs") @@ -199,7 +205,10 @@ def assert_graph_fetch(path): path = "shivam/graphs/not_a_symlink_i_promise/escaped" with pytest.raises(Exception) as excinfo: client.send_graph(path=path, graph=g, overwrite=True) - assert "A component of the given path was a symlink" in str(excinfo.value) + assert ( + "Invalid path 'shivam/graphs/not_a_symlink_i_promise/escaped': A component of the given path was a symlink" + in str(excinfo.value) + ) def test_graph_windows_and_layers_query(): @@ -479,13 +488,11 @@ def test_create_node(): assert client.query(create_node_query) == { "updateGraph": {"createNode": {"success": True}} } - assert client.query(query_nodes) == { - "graph": { - "nodes": { - "list": [{"name": "ben"}, {"name": "shivam"}, {"name": "oogway"}] - } - } - } + nodes = sorted( + n["name"] for n in client.query(query_nodes)["graph"]["nodes"]["list"] + ) + expected_nodes = ["ben", "oogway", "shivam"] + assert nodes == expected_nodes with pytest.raises(Exception) as excinfo: client.query(create_node_query) @@ -509,13 +516,11 @@ def test_create_node_using_client(): remote_graph = client.remote_graph(path="g") remote_graph.create_node(timestamp=0, id="oogway") - assert client.query(query_nodes) == { - "graph": { - "nodes": { - "list": [{"name": "ben"}, {"name": "shivam"}, {"name": "oogway"}] - } - } - } + nodes = sorted( + n["name"] for n in client.query(query_nodes)["graph"]["nodes"]["list"] + ) + expected_nodes = ["ben", "oogway", "shivam"] + assert nodes == expected_nodes with pytest.raises(Exception) as excinfo: remote_graph.create_node(timestamp=0, id="oogway") @@ -664,30 +669,25 @@ def test_create_node_using_client_with_node_type(): client.send_graph(path="g", graph=g) query_nodes = """{graph(path: "g") {nodes {list {name, nodeType}}}}""" - assert client.query(query_nodes) == { - "graph": { - "nodes": { - "list": [ - {"name": "ben", "nodeType": None}, - {"name": "shivam", "nodeType": None}, - ] - } - } - } + + node_and_types = sorted( + client.query(query_nodes)["graph"]["nodes"]["list"], key=lambda n: n["name"] + ) + assert node_and_types == [ + {"name": "ben", "nodeType": None}, + {"name": "shivam", "nodeType": None}, + ] remote_graph = client.remote_graph(path="g") remote_graph.create_node(timestamp=0, id="oogway", node_type="master") - assert client.query(query_nodes) == { - "graph": { - "nodes": { - "list": [ - {"name": "ben", "nodeType": None}, - {"name": "shivam", "nodeType": None}, - {"name": "oogway", "nodeType": "master"}, - ] - } - } - } + node_and_types = sorted( + client.query(query_nodes)["graph"]["nodes"]["list"], key=lambda n: n["name"] + ) + assert node_and_types == [ + {"name": "ben", "nodeType": None}, + {"name": "oogway", "nodeType": "master"}, + {"name": "shivam", "nodeType": None}, + ] with pytest.raises(Exception) as excinfo: remote_graph.create_node(timestamp=0, id="oogway", node_type="master") @@ -707,7 +707,7 @@ def test_edge_id(): client.send_graph(path="g", graph=g) query_nodes = """{graph(path: "g") {edges {list {id}}}}""" - assert client.query(query_nodes) == { + assert sort_by_gql_name_or_id(client.query(query_nodes)) == { "graph": { "edges": { "list": [ @@ -720,6 +720,154 @@ def test_edge_id(): } +def test_graph_persistence_across_restarts(): + tmp_work_dir = tempfile.mkdtemp() + + # First server session: create graph with 3 nodes and 2 edges + with GraphServer(tmp_work_dir).start(port=1738): + client = RaphtoryClient("http://localhost:1738") + client.new_graph(path="persistent_graph", graph_type="EVENT") + remote_graph = client.remote_graph(path="persistent_graph") + # Create 3 nodes + remote_graph.add_node(timestamp=1, id="node1") + remote_graph.add_node(timestamp=2, id="node2") + remote_graph.add_node(timestamp=3, id="node3") + + # Create 2 edges + remote_graph.add_edge(timestamp=4, src="node1", dst="node2") + remote_graph.add_edge(timestamp=5, src="node2", dst="node3") + + # Verify initial creation + query_nodes = """{graph(path: "persistent_graph") {nodes {list {name}}}}""" + query_edges = """{graph(path: "persistent_graph") {edges {list {id}}}}""" + + assert sort_by_gql_name_or_id(client.query(query_nodes)) == { + "graph": { + "nodes": { + "list": [{"name": "node1"}, {"name": "node2"}, {"name": "node3"}] + } + } + } + + assert sort_by_gql_name_or_id(client.query(query_edges)) == { + "graph": { + "edges": { + "list": [ + {"id": ["node1", "node2"]}, + {"id": ["node2", "node3"]}, + ] + } + } + } + + # Server is now shutdown, start it again + with GraphServer(tmp_work_dir).start(port=1738): + client = RaphtoryClient("http://localhost:1738") + + # Verify persistence: check that nodes and edges are still there + query_nodes = """{graph(path: "persistent_graph") {nodes {sorted (sortBys: [{id: true}]){ list {name} }}}}""" + query_edges = """{graph(path: "persistent_graph") {edges {sorted (sortBys: [{src: true, dst: true}]){ list {id} }}}}""" + + assert client.query(query_nodes) == { + "graph": { + "nodes": { + "sorted": { + "list": [ + {"name": "node1"}, + {"name": "node2"}, + {"name": "node3"}, + ] + } + } + } + } + + assert client.query(query_edges) == { + "graph": { + "edges": { + "sorted": { + "list": [ + {"id": ["node1", "node2"]}, + {"id": ["node2", "node3"]}, + ] + } + } + } + } + + # Add one more node and another edge + remote_graph = client.remote_graph(path="persistent_graph") + remote_graph.add_node(timestamp=6, id="node4") + remote_graph.add_edge(timestamp=7, src="node3", dst="node4") + + # Verify the new additions + assert client.query(query_nodes) == { + "graph": { + "nodes": { + "sorted": { + "list": [ + {"name": "node1"}, + {"name": "node2"}, + {"name": "node3"}, + {"name": "node4"}, + ] + } + } + } + } + + assert client.query(query_edges) == { + "graph": { + "edges": { + "sorted": { + "list": [ + {"id": ["node1", "node2"]}, + {"id": ["node2", "node3"]}, + {"id": ["node3", "node4"]}, + ] + } + } + } + } + + +# tests for https://github.com/Pometry/Raphtory/issues/2487 +def test_float_is_stable_on_roundtrip(): + tmp_work_dir = tempfile.mkdtemp() + float_examples = [ + -1.5186248156922167e66, + -1.7177476606208664e199, + -1.048551606005279e71, + ] + prop_key = "p" + + with GraphServer(tmp_work_dir).start(port=1738): + client = RaphtoryClient("http://localhost:1738") + client.new_graph(path="g", graph_type="EVENT") + remote_graph = client.remote_graph(path="g") + + for i, num in enumerate(float_examples): + remote_graph.add_node(timestamp=i, id=i, properties={prop_key: num}) + query = f""" + query {{ + graph(path: "g") {{ + node(name: "{i}") {{ + at(time: {i}) {{ + properties {{ + get(key: "p") {{ + value + }} + }} + }} + }} + }} + }} + """ + resp = client.query(query) + retrieved_float = resp["graph"]["node"]["at"]["properties"]["get"]["value"] + assert retrieved_float == num + + # def test_disk_graph_name(): # import pandas as pd # from raphtory import DiskGraphStorage diff --git a/python/tests/test_base_install/test_graphql/edit_graph/test_move_graph.py b/python/tests/test_base_install/test_graphql/edit_graph/test_move_graph.py index 98eb97d4bf..f72762e3d8 100644 --- a/python/tests/test_base_install/test_graphql/edit_graph/test_move_graph.py +++ b/python/tests/test_base_install/test_graphql/edit_graph/test_move_graph.py @@ -20,7 +20,7 @@ def test_move_graph_fails_if_graph_not_found(): }""" with pytest.raises(Exception) as excinfo: client.query(query) - assert "Graph not found" in str(excinfo.value) + assert "Graph 'ben/g5' does not exist" in str(excinfo.value) def test_move_graph_fails_if_graph_with_same_name_already_exists(): @@ -45,7 +45,7 @@ def test_move_graph_fails_if_graph_with_same_name_already_exists(): }""" with pytest.raises(Exception) as excinfo: client.query(query) - assert "Graph already exists by name" in str(excinfo.value) + assert "Graph 'g6' already exists" in str(excinfo.value) def test_move_graph_fails_if_graph_with_same_name_already_exists_at_same_namespace_as_graph(): @@ -70,7 +70,7 @@ def test_move_graph_fails_if_graph_with_same_name_already_exists_at_same_namespa }""" with pytest.raises(Exception) as excinfo: client.query(query) - assert "Graph already exists by name" in str(excinfo.value) + assert "Graph 'ben/g6' already exists" in str(excinfo.value) def test_move_graph_fails_if_graph_with_same_name_already_exists_at_diff_namespace_as_graph(): @@ -96,7 +96,7 @@ def test_move_graph_fails_if_graph_with_same_name_already_exists_at_diff_namespa }""" with pytest.raises(Exception) as excinfo: client.query(query) - assert "Graph already exists by name" in str(excinfo.value) + assert "Graph 'shivam/g6' already exists" in str(excinfo.value) def test_move_graph_succeeds(): @@ -124,7 +124,7 @@ def test_move_graph_succeeds(): query = """{graph(path: "shivam/g3") {nodes {list {name}}}}""" with pytest.raises(Exception) as excinfo: client.query(query) - assert "Graph not found" in str(excinfo.value) + assert "Graph 'shivam/g3' does not exist" in str(excinfo.value) query = """{graph(path: "g4") { nodes {list {name}} @@ -157,7 +157,7 @@ def test_move_graph_using_client_api_succeeds(): query = """{graph(path: "shivam/g3") {nodes {list {name}}}}""" with pytest.raises(Exception) as excinfo: client.query(query) - assert "Graph not found" in str(excinfo.value) + assert "Graph 'shivam/g3' does not exist" in str(excinfo.value) query = """{graph(path: "ben/g4") { nodes {list {name}} @@ -197,7 +197,7 @@ def test_move_graph_succeeds_at_same_namespace_as_graph(): query = """{graph(path: "shivam/g3") {nodes {list {name}}}}""" with pytest.raises(Exception) as excinfo: client.query(query) - assert "Graph not found" in str(excinfo.value) + assert "Graph 'shivam/g3' does not exist" in str(excinfo.value) query = """{graph(path: "shivam/g4") { nodes {list {name}} @@ -238,7 +238,7 @@ def test_move_graph_succeeds_at_diff_namespace_as_graph(): query = """{graph(path: "ben/g3") {nodes {list {name}}}}""" with pytest.raises(Exception) as excinfo: client.query(query) - assert "Graph not found" in str(excinfo.value) + assert "Graph 'ben/g3' does not exist" in str(excinfo.value) query = """{graph(path: "shivam/g4") { nodes {list {name}} diff --git a/python/tests/test_base_install/test_graphql/edit_graph/test_new_graph.py b/python/tests/test_base_install/test_graphql/edit_graph/test_new_graph.py index da0d3f6c9d..adba406a92 100644 --- a/python/tests/test_base_install/test_graphql/edit_graph/test_new_graph.py +++ b/python/tests/test_base_install/test_graphql/edit_graph/test_new_graph.py @@ -45,7 +45,7 @@ def test_new_graph_fails_if_graph_found(): }""" with pytest.raises(Exception) as excinfo: client.query(query) - assert "Graph already exists by name" in str(excinfo.value) + assert "Graph 'test/path/g1' already exists" in str(excinfo.value) def test_client_new_graph_works(): diff --git a/python/tests/test_base_install/test_graphql/edit_graph/test_receive_graph.py b/python/tests/test_base_install/test_graphql/edit_graph/test_receive_graph.py index 2230da5948..20bc5ce76a 100644 --- a/python/tests/test_base_install/test_graphql/edit_graph/test_receive_graph.py +++ b/python/tests/test_base_install/test_graphql/edit_graph/test_receive_graph.py @@ -16,7 +16,7 @@ def test_receive_graph_fails_if_no_graph_found(): query = """{ receiveGraph(path: "g2") }""" with pytest.raises(Exception) as excinfo: client.query(query) - assert "Graph not found" in str(excinfo.value) + assert "Graph 'g2' does not exist" in str(excinfo.value) def test_receive_graph_succeeds_if_graph_found(): @@ -28,13 +28,11 @@ def test_receive_graph_succeeds_if_graph_found(): g.add_edge(1, "ben", "hamza") g.add_edge(2, "haaroon", "hamza") g.add_edge(3, "ben", "haaroon") - - g.save_to_file(os.path.join(work_dir, "g1")) - + client.send_graph("g1", g) query = """{ receiveGraph(path: "g1") }""" received_graph = client.query(query)["receiveGraph"] - decoded_bytes = base64.b64decode(received_graph) + decoded_bytes = base64.urlsafe_b64decode(received_graph) g = Graph.deserialise(decoded_bytes) assert g.nodes.name == ["ben", "hamza", "haaroon"] @@ -62,7 +60,7 @@ def test_receive_graph_fails_if_no_graph_found_at_namespace(): query = """{ receiveGraph(path: "shivam/g2") }""" with pytest.raises(Exception) as excinfo: client.query(query) - assert "Graph not found" in str(excinfo.value) + assert "Graph 'shivam/g2' does not exist" in str(excinfo.value) def test_receive_graph_succeeds_if_graph_found_at_namespace(): @@ -81,7 +79,7 @@ def test_receive_graph_succeeds_if_graph_found_at_namespace(): query = """{ receiveGraph(path: "shivam/g2") }""" received_graph = client.query(query)["receiveGraph"] - decoded_bytes = base64.b64decode(received_graph) + decoded_bytes = base64.urlsafe_b64decode(received_graph) g = Graph.deserialise(decoded_bytes) assert g.nodes.name == ["ben", "hamza", "haaroon"] diff --git a/python/tests/test_base_install/test_graphql/edit_graph/test_send_graph.py b/python/tests/test_base_install/test_graphql/edit_graph/test_send_graph.py index d73703d88a..41a469f31f 100644 --- a/python/tests/test_base_install/test_graphql/edit_graph/test_send_graph.py +++ b/python/tests/test_base_install/test_graphql/edit_graph/test_send_graph.py @@ -31,7 +31,7 @@ def test_send_graph_fails_if_graph_already_exists(): client = RaphtoryClient("http://localhost:1736") with pytest.raises(Exception) as excinfo: client.send_graph(path="g", graph=g) - assert "Graph already exists by name = g" in str(excinfo.value) + assert "Graph 'g' already exists" in str(excinfo.value) def test_send_graph_succeeds_if_graph_already_exists_with_overwrite_enabled(): @@ -41,11 +41,12 @@ def test_send_graph_succeeds_if_graph_already_exists_with_overwrite_enabled(): g.add_edge(1, "ben", "hamza") g.add_edge(2, "haaroon", "hamza") g.add_edge(3, "ben", "haaroon") - g.save_to_file(os.path.join(tmp_work_dir, "g")) with GraphServer(tmp_work_dir).start(): client = RaphtoryClient("http://localhost:1736") + client.send_graph(path="g", graph=g) + g = Graph() g.add_edge(1, "ben", "hamza") g.add_edge(2, "haaroon", "hamza") @@ -94,7 +95,7 @@ def test_send_graph_fails_if_graph_already_exists_at_namespace(): client = RaphtoryClient("http://localhost:1736") with pytest.raises(Exception) as excinfo: client.send_graph(path="shivam/g", graph=g) - assert "Graph already exists by name" in str(excinfo.value) + assert "Graph 'shivam/g' already exists" in str(excinfo.value) def test_send_graph_succeeds_if_graph_already_exists_at_namespace_with_overwrite_enabled(): diff --git a/python/tests/test_base_install/test_graphql/edit_graph/test_upload_graph.py b/python/tests/test_base_install/test_graphql/edit_graph/test_upload_graph.py index 78e7e7ac1b..5f92d5e37a 100644 --- a/python/tests/test_base_install/test_graphql/edit_graph/test_upload_graph.py +++ b/python/tests/test_base_install/test_graphql/edit_graph/test_upload_graph.py @@ -70,7 +70,7 @@ def test_upload_graph_fails_if_graph_already_exists(): client = RaphtoryClient("http://localhost:1736") with pytest.raises(Exception) as excinfo: client.upload_graph(path="g", file_path=g_file_path) - assert "Graph already exists by name" in str(excinfo.value) + assert "Graph 'g' already exists" in str(excinfo.value) def test_upload_graph_succeeds_if_graph_already_exists_with_overwrite_enabled(): @@ -135,6 +135,8 @@ def test_upload_graph_succeeds_if_no_graph_found_with_same_name_at_namespace(): } } } + g2 = client.receive_graph("shivam/g") + assert g2.has_node("ben") def test_upload_graph_fails_if_graph_already_exists_at_namespace(): @@ -153,7 +155,7 @@ def test_upload_graph_fails_if_graph_already_exists_at_namespace(): client = RaphtoryClient("http://localhost:1736") with pytest.raises(Exception) as excinfo: client.upload_graph(path="shivam/g", file_path=g_file_path, overwrite=False) - assert "Graph already exists by name" in str(excinfo.value) + assert "Graph 'shivam/g' already exists" in str(excinfo.value) def test_upload_graph_succeeds_if_graph_already_exists_at_namespace_with_overwrite_enabled(): diff --git a/python/tests/test_base_install/test_graphql/misc/test_latest.py b/python/tests/test_base_install/test_graphql/misc/test_latest.py index 967618667f..ee58148aed 100644 --- a/python/tests/test_base_install/test_graphql/misc/test_latest.py +++ b/python/tests/test_base_install/test_graphql/misc/test_latest.py @@ -1,3 +1,4 @@ +from utils import sort_by_gql_name_or_id from raphtory.graphql import RaphtoryClient @@ -49,6 +50,7 @@ def test_latest_and_active(): edges { latest { list { + id history { list { timestamp @@ -74,6 +76,7 @@ def test_latest_and_active(): edges { latest { list { + id history { list { timestamp @@ -114,15 +117,17 @@ def test_latest_and_active(): "latest": { "list": [ { + "id": [1, 2], "history": { "list": [{"timestamp": 3, "eventId": 2}] - } + }, }, - {"history": {"list": []}}, + {"id": [1, 3], "history": {"list": []}}, { + "id": [1, 4], "history": { "list": [{"timestamp": 3, "eventId": 5}] - } + }, }, ] } @@ -134,9 +139,10 @@ def test_latest_and_active(): "latest": { "list": [ { + "id": [1, 2], "history": { "list": [{"timestamp": 3, "eventId": 2}] - } + }, } ] } @@ -144,7 +150,11 @@ def test_latest_and_active(): }, { "name": "3", - "edges": {"latest": {"list": [{"history": {"list": []}}]}}, + "edges": { + "latest": { + "list": [{"id": [1, 3], "history": {"list": []}}] + } + }, }, { "name": "4", @@ -152,9 +162,10 @@ def test_latest_and_active(): "latest": { "list": [ { + "id": [1, 4], "history": { "list": [{"timestamp": 3, "eventId": 5}] - } + }, } ] } @@ -190,9 +201,15 @@ def test_latest_and_active(): "edges": { "latest": { "list": [ - {"history": {"list": [{"timestamp": 3, "eventId": 2}]}}, - {"history": {"list": []}}, - {"history": {"list": [{"timestamp": 3, "eventId": 5}]}}, + { + "id": [1, 2], + "history": {"list": [{"timestamp": 3, "eventId": 2}]}, + }, + {"id": [1, 3], "history": {"list": []}}, + { + "id": [1, 4], + "history": {"list": [{"timestamp": 3, "eventId": 5}]}, + }, ] } }, @@ -217,4 +234,4 @@ def test_latest_and_active(): g.save_to_file(work_dir + "/graph") with GraphServer(work_dir).start(): client = RaphtoryClient("http://localhost:1736") - assert client.query(query) == result + assert sort_by_gql_name_or_id(client.query(query)) == result diff --git a/python/tests/test_base_install/test_graphql/misc/test_map_props.py b/python/tests/test_base_install/test_graphql/misc/test_map_props.py index 2b040896fa..f5ca23d73a 100644 --- a/python/tests/test_base_install/test_graphql/misc/test_map_props.py +++ b/python/tests/test_base_install/test_graphql/misc/test_map_props.py @@ -13,7 +13,7 @@ def test_map_props(): work_dir = tempfile.mkdtemp() - server = server = GraphServer(work_dir) + server = GraphServer(work_dir) with server.start(): temp_dir = tempfile.mkdtemp() client = RaphtoryClient("http://localhost:1736") @@ -27,7 +27,7 @@ def test_map_props(): check_test_prop(client) work_dir = tempfile.mkdtemp() - server = server = GraphServer(work_dir) + server = GraphServer(work_dir) with server.start(): client.new_graph("test", "EVENT") rg = client.remote_graph("test") diff --git a/python/tests/test_base_install/test_graphql/test_apply_views.py b/python/tests/test_base_install/test_graphql/test_apply_views.py index f417f6b2a6..298b98c73a 100644 --- a/python/tests/test_base_install/test_graphql/test_apply_views.py +++ b/python/tests/test_base_install/test_graphql/test_apply_views.py @@ -987,6 +987,7 @@ def test_apply_view_after(): nodes { applyViews(views: [{after: 6}]) { list { + name history { timestamps { list @@ -1042,11 +1043,11 @@ def test_apply_view_after(): "nodes": { "applyViews": { "list": [ - {"history": {"timestamps": {"list": []}}}, - {"history": {"timestamps": {"list": []}}}, - {"history": {"timestamps": {"list": []}}}, - {"history": {"timestamps": {"list": []}}}, - {"history": {"timestamps": {"list": []}}}, + {"name": "1", "history": {"timestamps": {"list": []}}}, + {"name": "2", "history": {"timestamps": {"list": []}}}, + {"name": "3", "history": {"timestamps": {"list": []}}}, + {"name": "6", "history": {"timestamps": {"list": []}}}, + {"name": "7", "history": {"timestamps": {"list": []}}}, ] } }, @@ -1080,7 +1081,7 @@ def test_apply_view_after(): }, } } - run_graphql_test(query, correct, graph) + run_graphql_test(query, correct, graph, sort_output=True) def test_apply_view_shrink_window(): @@ -1190,7 +1191,7 @@ def test_apply_view_shrink_window(): }, } } - run_graphql_test(query, correct, graph) + run_graphql_test(query, correct, graph, sort_output=True) def test_apply_view_shrink_start(): @@ -1300,7 +1301,7 @@ def test_apply_view_shrink_start(): }, } } - run_graphql_test(query, correct, graph) + run_graphql_test(query, correct, graph, sort_output=True) def test_apply_view_shrink_end(): @@ -1412,7 +1413,7 @@ def test_apply_view_shrink_end(): }, } } - run_graphql_test(query, correct, graph) + run_graphql_test(query, correct, graph, sort_output=True) def test_apply_view_layers(): @@ -1544,7 +1545,7 @@ def test_apply_view_layers(): }, } } - run_graphql_test(query, correct, graph) + run_graphql_test(query, correct, graph, sort_output=True) def test_apply_view_layer(): @@ -1671,7 +1672,7 @@ def test_apply_view_layer(): }, } } - run_graphql_test(query, correct, graph) + run_graphql_test(query, correct, graph, sort_output=True) def test_apply_view_exclude_layer(): @@ -1861,7 +1862,7 @@ def test_apply_view_exclude_layer(): }, } } - run_graphql_test(query, correct, graph) + run_graphql_test(query, correct, graph, sort_output=True) def test_apply_view_exclude_layers(): @@ -2045,7 +2046,7 @@ def test_apply_view_exclude_layers(): }, } } - run_graphql_test(query, correct, graph) + run_graphql_test(query, correct, graph, sort_output=True) def test_apply_view_type_filter(): @@ -3214,7 +3215,7 @@ def test_valid_graph(): correct = { "graph": { "applyViews": { - "edges": {"list": [{"id": ["6", "7"], "latestTime": {"timestamp": 5}}]} + "edges": {"list": [{"id": [6, 7], "latestTime": {"timestamp": 5}}]} } } } diff --git a/python/tests/test_base_install/test_graphql/test_edge_sorting.py b/python/tests/test_base_install/test_graphql/test_edge_sorting.py index a068a49069..c4358c92de 100644 --- a/python/tests/test_base_install/test_graphql/test_edge_sorting.py +++ b/python/tests/test_base_install/test_graphql/test_edge_sorting.py @@ -100,7 +100,7 @@ def test_graph_edge_no_sort(graph): } } } - run_graphql_test(query, expected_output, graph) + run_graphql_test(query, expected_output, graph, sort_output=True) @pytest.mark.parametrize("graph", [EVENT_GRAPH, PERSISTENT_GRAPH]) @@ -138,7 +138,7 @@ def test_graph_edge_sort_by_nothing(graph): } } } - run_graphql_test(query, expected_output, graph) + run_graphql_test(query, expected_output, graph, sort_output=True) @pytest.mark.parametrize("graph", [EVENT_GRAPH, PERSISTENT_GRAPH]) @@ -152,9 +152,6 @@ def test_graph_edge_sort_by_src(graph): src { id } - dst { - id - } } } } @@ -166,11 +163,11 @@ def test_graph_edge_sort_by_src(graph): "edges": { "sorted": { "list": [ - {"src": {"id": "a"}, "dst": {"id": "d"}}, - {"src": {"id": "a"}, "dst": {"id": "b"}}, - {"src": {"id": "b"}, "dst": {"id": "d"}}, - {"src": {"id": "b"}, "dst": {"id": "c"}}, - {"src": {"id": "c"}, "dst": {"id": "d"}}, + {"src": {"id": "a"}}, + {"src": {"id": "a"}}, + {"src": {"id": "b"}}, + {"src": {"id": "b"}}, + {"src": {"id": "c"}}, ] } } @@ -187,9 +184,6 @@ def test_graph_edge_sort_by_dst(graph): edges { sorted(sortBys: [{ dst: true }]) { list { - src { - id - } dst { id } @@ -204,11 +198,11 @@ def test_graph_edge_sort_by_dst(graph): "edges": { "sorted": { "list": [ - {"src": {"id": "a"}, "dst": {"id": "b"}}, - {"src": {"id": "b"}, "dst": {"id": "c"}}, - {"src": {"id": "a"}, "dst": {"id": "d"}}, - {"src": {"id": "b"}, "dst": {"id": "d"}}, - {"src": {"id": "c"}, "dst": {"id": "d"}}, + {"dst": {"id": "b"}}, + {"dst": {"id": "c"}}, + {"dst": {"id": "d"}}, + {"dst": {"id": "d"}}, + {"dst": {"id": "d"}}, ] } } @@ -450,33 +444,32 @@ def test_graph_edge_sort_by_eprop2(graph): @pytest.mark.parametrize("graph", [EVENT_GRAPH, PERSISTENT_GRAPH]) def test_graph_edge_sort_by_eprop3(graph): query = """ - query { - graph(path: "g") { - edges { - sorted(sortBys: [{ property: "eprop3" }]) { - list { - src { - id - } - dst { - id + query { + graph(path: "g") { + edges { + sorted(sortBys: [{ property: "eprop3" }]) { + list { + properties { + get(key: "eprop3") { + value + } + } + } } } } } - } - } """ expected_output = { "graph": { "edges": { "sorted": { "list": [ - {"src": {"id": "b"}, "dst": {"id": "c"}}, - {"src": {"id": "a"}, "dst": {"id": "d"}}, - {"src": {"id": "b"}, "dst": {"id": "d"}}, - {"src": {"id": "c"}, "dst": {"id": "d"}}, - {"src": {"id": "a"}, "dst": {"id": "b"}}, + {"properties": {"get": {"value": "ayz123"}}}, + {"properties": {"get": {"value": "xyz123"}}}, + {"properties": {"get": {"value": "xyz123"}}}, + {"properties": {"get": {"value": "xyz123"}}}, + {"properties": {"get": {"value": "xyz1234"}}}, ] } } @@ -493,11 +486,10 @@ def test_graph_edge_sort_by_eprop4(graph): edges { sorted(sortBys: [{ property: "eprop4" }]) { list { - src { - id - } - dst { - id + properties { + get(key: "eprop4") { + value + } } } } @@ -510,11 +502,11 @@ def test_graph_edge_sort_by_eprop4(graph): "edges": { "sorted": { "list": [ - {"src": {"id": "b"}, "dst": {"id": "c"}}, - {"src": {"id": "c"}, "dst": {"id": "d"}}, - {"src": {"id": "a"}, "dst": {"id": "b"}}, - {"src": {"id": "a"}, "dst": {"id": "d"}}, - {"src": {"id": "b"}, "dst": {"id": "d"}}, + {"properties": {"get": None}}, + {"properties": {"get": {"value": False}}}, + {"properties": {"get": {"value": False}}}, + {"properties": {"get": {"value": True}}}, + {"properties": {"get": {"value": True}}}, ] } } diff --git a/python/tests/test_base_install/test_graphql/test_filters/test_edge_filter_gql.py b/python/tests/test_base_install/test_graphql/test_filters/test_edge_filter_gql.py index ecf47baf31..0f0cdca85b 100644 --- a/python/tests/test_base_install/test_graphql/test_filters/test_edge_filter_gql.py +++ b/python/tests/test_base_install/test_graphql/test_filters/test_edge_filter_gql.py @@ -42,7 +42,7 @@ def test_filter_edges_with_str_ids_for_node_id_eq_gql(graph): } } } - run_graphql_test(query, expected_output, graph) + run_graphql_test(query, expected_output, graph, sort_output=True) EVENT_GRAPH = init_graph2(Graph()) @@ -153,7 +153,7 @@ def test_edges_filter_window_is_active(graph): } } } - run_graphql_test(query, expected_output, graph) + run_graphql_test(query, expected_output, graph, sort_output=True) @pytest.mark.parametrize("graph", [EVENT_GRAPH, PERSISTENT_GRAPH]) diff --git a/python/tests/test_base_install/test_graphql/test_filters/test_graph_edges_property_filter.py b/python/tests/test_base_install/test_graphql/test_filters/test_graph_edges_property_filter.py index dffbf30320..f3279b530d 100644 --- a/python/tests/test_base_install/test_graphql/test_filters/test_graph_edges_property_filter.py +++ b/python/tests/test_base_install/test_graphql/test_filters/test_graph_edges_property_filter.py @@ -198,7 +198,7 @@ def test_graph_edge_property_filter_less_than_or_equal(graph): } } } - run_graphql_test(query, expected_output, graph) + run_graphql_test(query, expected_output, graph, sort_output=True) @pytest.mark.parametrize("graph", [EVENT_GRAPH, PERSISTENT_GRAPH]) @@ -347,7 +347,7 @@ def test_graph_edge_property_filter_is_some(graph): } } } - run_graphql_test(query, expected_output, graph) + run_graphql_test(query, expected_output, graph, sort_output=True) @pytest.mark.parametrize("graph", [EVENT_GRAPH, PERSISTENT_GRAPH]) @@ -375,7 +375,7 @@ def test_graph_edge_property_filter_is_in(graph): } } } - run_graphql_test(query, expected_output, graph) + run_graphql_test(query, expected_output, graph, sort_output=True) @pytest.mark.parametrize("graph", [EVENT_GRAPH, PERSISTENT_GRAPH]) @@ -463,7 +463,7 @@ def test_graph_edge_property_filter_is_not_in_empty_list(graph): } } } - run_graphql_test(query, expected_output, graph) + run_graphql_test(query, expected_output, graph, sort_output=True) @pytest.mark.parametrize("graph", [EVENT_GRAPH, PERSISTENT_GRAPH]) @@ -518,7 +518,7 @@ def test_graph_edge_not_property_filter(graph): } } } - run_graphql_test(query, expected_output, graph) + run_graphql_test(query, expected_output, graph, sort_output=True) @pytest.mark.parametrize("graph", [EVENT_GRAPH, PERSISTENT_GRAPH]) @@ -550,7 +550,7 @@ def test_edges_property_filter_starts_with(graph): } } } - run_graphql_test(query, expected_output, graph) + run_graphql_test(query, expected_output, graph, sort_output=True) @pytest.mark.parametrize("graph", [EVENT_GRAPH, PERSISTENT_GRAPH]) @@ -582,7 +582,7 @@ def test_edges_property_filter_ends_with(graph): } } } - run_graphql_test(query, expected_output, graph) + run_graphql_test(query, expected_output, graph, sort_output=True) EVENT_GRAPH = init_graph2(Graph()) @@ -612,7 +612,7 @@ def test_edges_selection(graph): } } } - run_graphql_test(query, expected_output, graph) + run_graphql_test(query, expected_output, graph, sort_output=True) # The inner edges filter has no effect on the list of edges returned from selection filter @@ -645,7 +645,7 @@ def test_edges_selection_edges_filter_paired(graph): } } } - run_graphql_test(query, expected_output, graph) + run_graphql_test(query, expected_output, graph, sort_output=True) @pytest.mark.parametrize("graph", [EVENT_GRAPH, PERSISTENT_GRAPH]) @@ -1070,4 +1070,4 @@ def test_edges_graph_filter_gql(graph): } } } - run_graphql_test(query, expected, graph) + run_graphql_test(query, expected, graph, sort_output=True) diff --git a/python/tests/test_base_install/test_graphql/test_filters/test_graph_nodes_property_filter.py b/python/tests/test_base_install/test_graphql/test_filters/test_graph_nodes_property_filter.py index a8f088b278..4160af99f5 100644 --- a/python/tests/test_base_install/test_graphql/test_filters/test_graph_nodes_property_filter.py +++ b/python/tests/test_base_install/test_graphql/test_filters/test_graph_nodes_property_filter.py @@ -74,7 +74,7 @@ def test_graph_node_property_filter_not_equal(graph): expected_output = { "graph": {"filterNodes": {"nodes": {"list": [{"name": "b"}, {"name": "d"}]}}} } - run_graphql_test(query, expected_output, graph) + run_graphql_test(query, expected_output, graph, sort_output=True) @pytest.mark.parametrize("graph", [EVENT_GRAPH, PERSISTENT_GRAPH]) @@ -169,7 +169,7 @@ def test_graph_node_property_filter_less_than_or_equal(graph): } } } - run_graphql_test(query, expected_output, graph) + run_graphql_test(query, expected_output, graph, sort_output=True) @pytest.mark.parametrize("graph", [EVENT_GRAPH, PERSISTENT_GRAPH]) @@ -243,7 +243,7 @@ def test_graph_node_property_filter_less_than(graph): expected_output = { "graph": {"filterNodes": {"nodes": {"list": [{"name": "b"}, {"name": "c"}]}}} } - run_graphql_test(query, expected_output, graph) + run_graphql_test(query, expected_output, graph, sort_output=True) @pytest.mark.parametrize("graph", [EVENT_GRAPH, PERSISTENT_GRAPH]) @@ -281,7 +281,7 @@ def test_graph_node_property_filter_is_none(graph): expected_output = { "graph": {"filterNodes": {"nodes": {"list": [{"name": "b"}, {"name": "d"}]}}} } - run_graphql_test(query, expected_output, graph) + run_graphql_test(query, expected_output, graph, sort_output=True) @pytest.mark.parametrize("graph", [EVENT_GRAPH, PERSISTENT_GRAPH]) @@ -319,7 +319,7 @@ def test_graph_node_property_filter_is_in(graph): expected_output = { "graph": {"filterNodes": {"nodes": {"list": [{"name": "b"}, {"name": "d"}]}}} } - run_graphql_test(query, expected_output, graph) + run_graphql_test(query, expected_output, graph, sort_output=True) @pytest.mark.parametrize("graph", [EVENT_GRAPH, PERSISTENT_GRAPH]) @@ -421,7 +421,7 @@ def test_node_property_filter_not_is_not_in_empty_list(graph): } } } - run_graphql_test(query, expected_output, graph) + run_graphql_test(query, expected_output, graph, sort_output=True) @pytest.mark.parametrize("graph", [EVENT_GRAPH, PERSISTENT_GRAPH]) @@ -472,7 +472,7 @@ def test_graph_node_not_property_filter(graph): } } } - run_graphql_test(query, expected_output, graph) + run_graphql_test(query, expected_output, graph, sort_output=True) @pytest.mark.parametrize("graph", [EVENT_GRAPH, PERSISTENT_GRAPH]) @@ -514,7 +514,7 @@ def test_graph_node_type_and_property_filter(graph): } } } - run_graphql_test(query, expected_output, graph) + run_graphql_test(query, expected_output, graph, sort_output=True) @pytest.mark.parametrize("graph", [EVENT_GRAPH, PERSISTENT_GRAPH]) @@ -542,7 +542,7 @@ def test_graph_nodes_property_filter_starts_with(graph): } } } - run_graphql_test(query, expected_output, graph) + run_graphql_test(query, expected_output, graph, sort_output=True) @pytest.mark.parametrize("graph", [EVENT_GRAPH, PERSISTENT_GRAPH]) diff --git a/python/tests/test_base_install/test_graphql/test_filters/test_neighbours_filter.py b/python/tests/test_base_install/test_graphql/test_filters/test_neighbours_filter.py index 7079132e49..aa0804d114 100644 --- a/python/tests/test_base_install/test_graphql/test_filters/test_neighbours_filter.py +++ b/python/tests/test_base_install/test_graphql/test_filters/test_neighbours_filter.py @@ -218,7 +218,7 @@ def test_neighbours_found(graph): "node": {"filter": {"neighbours": {"list": [{"name": "b"}, {"name": "c"}]}}} } } - run_graphql_test(query, expected_output, graph) + run_graphql_test(query, expected_output, graph, sort_output=True) @pytest.mark.parametrize("graph", [EVENT_GRAPH, PERSISTENT_GRAPH]) @@ -321,11 +321,13 @@ def test_neighbours_neighbours_filtering(graph): graph(path: "g") { nodes(select: { property: { name: "p100", where: { gt: { i64: 30 } } } }) { list { + name neighbours { filter(expr: { property: { name: "p2", where: { gt: { i64: 3 } } } }) { list { + name neighbours { list { name @@ -344,28 +346,42 @@ def test_neighbours_neighbours_filtering(graph): "nodes": { "list": [ { + "name": "1", "neighbours": { "filter": { "list": [ - {"neighbours": {"list": [{"name": "3"}]}}, - {"neighbours": {"list": []}}, + { + "name": "2", + "neighbours": {"list": [{"name": "3"}]}, + }, + {"name": "3", "neighbours": {"list": []}}, ] } - } + }, }, { + "name": "3", "neighbours": { "filter": { "list": [ - {"neighbours": {"list": [{"name": "3"}]}}, - {"neighbours": {"list": [{"name": "3"}]}}, - {"neighbours": {"list": [{"name": "3"}]}}, + { + "name": "1", + "neighbours": {"list": [{"name": "3"}]}, + }, + { + "name": "2", + "neighbours": {"list": [{"name": "3"}]}, + }, + { + "name": "4", + "neighbours": {"list": [{"name": "3"}]}, + }, ] } - } + }, }, ] } } } - run_graphql_test(query, expected_output, graph) + run_graphql_test(query, expected_output, graph, sort_output=True) diff --git a/python/tests/test_base_install/test_graphql/test_filters/test_node_filter_gql.py b/python/tests/test_base_install/test_graphql/test_filters/test_node_filter_gql.py index 3678f8dbfd..f9cd815905 100644 --- a/python/tests/test_base_install/test_graphql/test_filters/test_node_filter_gql.py +++ b/python/tests/test_base_install/test_graphql/test_filters/test_node_filter_gql.py @@ -140,7 +140,7 @@ def test_nodes_filter_windowed_is_active(graph): } } } - run_graphql_test(query, expected, graph) + run_graphql_test(query, expected, graph, sort_output=True) @pytest.mark.parametrize("graph", [EVENT_GRAPH, PERSISTENT_GRAPH]) diff --git a/python/tests/test_base_install/test_graphql/test_filters/test_nodes_property_filter.py b/python/tests/test_base_install/test_graphql/test_filters/test_nodes_property_filter.py index a26d27f4cf..72526d6ee2 100644 --- a/python/tests/test_base_install/test_graphql/test_filters/test_nodes_property_filter.py +++ b/python/tests/test_base_install/test_graphql/test_filters/test_nodes_property_filter.py @@ -34,12 +34,13 @@ def test_node_property_filter_equal2(graph): } ) { list { - neighbours { - list { - name - } + name + neighbours { + list { + name } - } + } + } } } } @@ -50,16 +51,16 @@ def test_node_property_filter_equal2(graph): "nodes": { "filter": { "list": [ - {"neighbours": {"list": []}}, - {"neighbours": {"list": []}}, - {"neighbours": {"list": []}}, - {"neighbours": {"list": [{"name": "a"}]}}, + {"name": "a", "neighbours": {"list": []}}, + {"name": "b", "neighbours": {"list": []}}, + {"name": "c", "neighbours": {"list": []}}, + {"name": "d", "neighbours": {"list": [{"name": "a"}]}}, ] } } } } - run_graphql_test(query, expected_output, graph) + run_graphql_test(query, expected_output, graph, sort_output=True) @pytest.mark.parametrize("graph", [EVENT_GRAPH, PERSISTENT_GRAPH]) @@ -145,7 +146,7 @@ def test_node_property_filter_not_equal(graph): expected_output = { "graph": {"nodes": {"select": {"list": [{"name": "b"}, {"name": "d"}]}}} } - run_graphql_test(query, expected_output, graph) + run_graphql_test(query, expected_output, graph, sort_output=True) @pytest.mark.parametrize("graph", [EVENT_GRAPH, PERSISTENT_GRAPH]) @@ -265,7 +266,7 @@ def test_node_property_filter_less_than_or_equal(graph): "nodes": {"select": {"list": [{"name": "b"}, {"name": "c"}, {"name": "d"}]}} } } - run_graphql_test(query, expected_output, graph) + run_graphql_test(query, expected_output, graph, sort_output=True) @pytest.mark.parametrize("graph", [EVENT_GRAPH, PERSISTENT_GRAPH]) @@ -367,7 +368,7 @@ def test_node_property_filter_less_than(graph): expected_output = { "graph": {"nodes": {"select": {"list": [{"name": "b"}, {"name": "c"}]}}} } - run_graphql_test(query, expected_output, graph) + run_graphql_test(query, expected_output, graph, sort_output=True) @pytest.mark.parametrize("graph", [EVENT_GRAPH, PERSISTENT_GRAPH]) @@ -419,7 +420,7 @@ def test_node_property_filter_is_none(graph): expected_output = { "graph": {"nodes": {"select": {"list": [{"name": "b"}, {"name": "d"}]}}} } - run_graphql_test(query, expected_output, graph) + run_graphql_test(query, expected_output, graph, sort_output=True) @pytest.mark.parametrize("graph", [EVENT_GRAPH, PERSISTENT_GRAPH]) @@ -445,7 +446,7 @@ def test_node_property_filter_is_some(graph): expected_output = { "graph": {"nodes": {"select": {"list": [{"name": "a"}, {"name": "c"}]}}} } - run_graphql_test(query, expected_output, graph) + run_graphql_test(query, expected_output, graph, sort_output=True) @pytest.mark.parametrize("graph", [EVENT_GRAPH, PERSISTENT_GRAPH]) @@ -471,7 +472,7 @@ def test_node_property_filter_is_in(graph): expected_output = { "graph": {"nodes": {"select": {"list": [{"name": "b"}, {"name": "d"}]}}} } - run_graphql_test(query, expected_output, graph) + run_graphql_test(query, expected_output, graph, sort_output=True) @pytest.mark.parametrize("graph", [EVENT_GRAPH, PERSISTENT_GRAPH]) @@ -604,7 +605,7 @@ def test_node_property_filter_is_not_in_empty_list(graph): } } } - run_graphql_test(query, expected_output, graph) + run_graphql_test(query, expected_output, graph, sort_output=True) @pytest.mark.parametrize("graph", [EVENT_GRAPH, PERSISTENT_GRAPH]) @@ -682,7 +683,7 @@ def test_nodes_property_filter_starts_with(graph): } } } - run_graphql_test(query, expected_output, graph) + run_graphql_test(query, expected_output, graph, sort_output=True) @pytest.mark.parametrize("graph", [EVENT_GRAPH, PERSISTENT_GRAPH]) @@ -734,7 +735,7 @@ def test_nodes_property_filter_temporal_first_starts_with(graph): } } } - run_graphql_test(query, expected_output, graph) + run_graphql_test(query, expected_output, graph, sort_output=True) @pytest.mark.parametrize("graph", [EVENT_GRAPH, PERSISTENT_GRAPH]) @@ -823,7 +824,7 @@ def test_nodes_temporal_property_filter_agg(graph): expected_output = { "graph": {"filterNodes": {"nodes": {"list": [{"name": "2"}, {"name": "3"}]}}} } - run_graphql_test(query, expected_output, graph) + run_graphql_test(query, expected_output, graph, sort_output=True) EVENT_GRAPH = create_test_graph(Graph()) @@ -864,6 +865,7 @@ def test_nodes_neighbours_selection_with_prop_filter(graph): graph(path: "g") { nodes(select: { property: { name: "p100", where: { gt: { i64: 30 } } } }) { list { + name neighbours { list { name @@ -878,17 +880,21 @@ def test_nodes_neighbours_selection_with_prop_filter(graph): "graph": { "nodes": { "list": [ - {"neighbours": {"list": [{"name": "2"}, {"name": "3"}]}}, { + "name": "1", + "neighbours": {"list": [{"name": "2"}, {"name": "3"}]}, + }, + { + "name": "3", "neighbours": { "list": [{"name": "1"}, {"name": "2"}, {"name": "4"}] - } + }, }, ] } } } - run_graphql_test(query, expected_output, graph) + run_graphql_test(query, expected_output, graph, sort_output=True) @pytest.mark.parametrize("graph", [EVENT_GRAPH, PERSISTENT_GRAPH]) diff --git a/python/tests/test_base_install/test_graphql/test_gql_graph_surface.py b/python/tests/test_base_install/test_graphql/test_gql_graph_surface.py new file mode 100644 index 0000000000..3b9815cbc2 --- /dev/null +++ b/python/tests/test_base_install/test_graphql/test_gql_graph_surface.py @@ -0,0 +1,370 @@ +"""Tests for `Graph`-level fields that previously had no GraphQL coverage: + +- `countEdges`, `countNodes`, `countTemporalEdges` +- `hasNode`, `hasEdge` (+ optional `layer:` arg on `hasEdge`) +- `earliestEdgeTime`, `latestEdgeTime` +- `uniqueLayers` +- `sharedNeighbours` + +Each field is tested under a combination of base / window / layer views to +exercise the composition plumbing, not just the field itself. + +`searchEdges` is left untested here — it's marked experimental in the schema +and requires an index-creation path that these fixtures don't exercise. +""" + +from utils import run_group_graphql_test +from raphtory import Graph + + +def create_graph() -> Graph: + graph = Graph() + + # Nodes with a node_type so `typeFilter` paths could be reused + graph.add_node(10, "A", node_type="person") + graph.add_node(10, "B", node_type="person") + graph.add_node(15, "C", node_type="org") + graph.add_node(40, "D", node_type="org") + + # Edges across two layers, including a self-loop and a reverse edge + graph.add_edge(10, "A", "B", properties={"weight": 1.0}, layer="layer1") + graph.add_edge(20, "A", "B", properties={"weight": 2.0}, layer="layer1") + graph.add_edge(30, "A", "B", properties={"weight": 3.0}, layer="layer2") + graph.add_edge(15, "A", "C", layer="layer1") + graph.add_edge(25, "A", "C", layer="layer2") + graph.add_edge(40, "C", "D", layer="layer1") + graph.add_edge(50, "B", "A", layer="layer2") + graph.add_edge(25, "A", "A", layer="layer1") # self-loop + + return graph + + +def test_graph_counts(): + """`countNodes`, `countEdges`, `countTemporalEdges` under base / window / + layer / layer+window views.""" + graph = create_graph() + queries_and_expected = [] + + # base: 4 nodes, 5 unique edges (AB, AC, CD, BA, AA), 8 temporal edge events + query = """ + { + graph(path: "g") { + countNodes + countEdges + countTemporalEdges + } + } + """ + queries_and_expected.append( + ( + query, + { + "graph": { + "countNodes": 4, + "countEdges": 5, + "countTemporalEdges": 8, + } + }, + ) + ) + + # windowed [10, 30): drops t=30, 40, 50, keeps t=10..25. + # nodes visible: A, B, C (D not yet). edges: AB@layer1, AC@layer1, AC@layer2, AA@layer1 => 3 unique edges (AB, AC, AA). + # temporal edges in window: AB@10, AB@20, AC@15, AC@25, AA@25 => 5 + query = """ + { + graph(path: "g") { + window(start: 10, end: 30) { + countNodes + countEdges + countTemporalEdges + } + } + } + """ + queries_and_expected.append( + ( + query, + { + "graph": { + "window": { + "countNodes": 3, + "countEdges": 3, + "countTemporalEdges": 5, + } + } + }, + ) + ) + + # layer(layer1) only: edges AB (2 updates), AC, CD, AA => 4 unique, 5 temporal + query = """ + { + graph(path: "g") { + layer(name: "layer1") { + countNodes + countEdges + countTemporalEdges + } + } + } + """ + queries_and_expected.append( + ( + query, + { + "graph": { + "layer": { + "countNodes": 4, + "countEdges": 4, + "countTemporalEdges": 5, + } + } + }, + ) + ) + + # layer(layer1) + window [10, 30): AB@10, AB@20, AC@15, AA@25 => 3 unique, 4 temporal + query = """ + { + graph(path: "g") { + layer(name: "layer1") { + window(start: 10, end: 30) { + countNodes + countEdges + countTemporalEdges + } + } + } + } + """ + queries_and_expected.append( + ( + query, + { + "graph": { + "layer": { + "window": { + "countNodes": 3, + "countEdges": 3, + "countTemporalEdges": 4, + } + } + } + }, + ) + ) + + run_group_graphql_test(queries_and_expected, graph) + + +def test_has_node_and_has_edge(): + """`hasNode` / `hasEdge` under base / window / layer views. + + `hasEdge` also accepts an optional `layer:` arg. + """ + graph = create_graph() + queries_and_expected = [] + + # base: all exist + query = """ + { + graph(path: "g") { + hasA: hasNode(name: "A") + hasZ: hasNode(name: "Z") + edgeAB: hasEdge(src: "A", dst: "B") + edgeBA: hasEdge(src: "B", dst: "A") + edgeAD: hasEdge(src: "A", dst: "D") + edgeAB_layer1: hasEdge(src: "A", dst: "B", layer: "layer1") + edgeAB_layer2: hasEdge(src: "A", dst: "B", layer: "layer2") + edgeBA_layer1: hasEdge(src: "B", dst: "A", layer: "layer1") + } + } + """ + queries_and_expected.append( + ( + query, + { + "graph": { + "hasA": True, + "hasZ": False, + "edgeAB": True, + "edgeBA": True, + "edgeAD": False, + "edgeAB_layer1": True, + "edgeAB_layer2": True, + "edgeBA_layer1": False, # B->A only exists on layer2 + } + }, + ) + ) + + # windowed [10, 30): D not yet present, edge CD not yet either + query = """ + { + graph(path: "g") { + window(start: 10, end: 30) { + hasD: hasNode(name: "D") + edgeCD: hasEdge(src: "C", dst: "D") + edgeAB: hasEdge(src: "A", dst: "B") + } + } + } + """ + queries_and_expected.append( + ( + query, + { + "graph": { + "window": { + "hasD": False, + "edgeCD": False, + "edgeAB": True, + } + } + }, + ) + ) + + # layer(layer2): edge AB exists, BA exists on layer2, CD doesn't (layer1 only) + query = """ + { + graph(path: "g") { + layer(name: "layer2") { + edgeAB: hasEdge(src: "A", dst: "B") + edgeBA: hasEdge(src: "B", dst: "A") + edgeCD: hasEdge(src: "C", dst: "D") + } + } + } + """ + queries_and_expected.append( + ( + query, + { + "graph": { + "layer": { + "edgeAB": True, + "edgeBA": True, + "edgeCD": False, + } + } + }, + ) + ) + + run_group_graphql_test(queries_and_expected, graph) + + +def test_edge_time_bounds_and_unique_layers(): + """`earliestEdgeTime`, `latestEdgeTime`, `uniqueLayers` base + window + layer.""" + graph = create_graph() + queries_and_expected = [] + + # base: edges from t=10 to t=50; layers layer1 + layer2 + query = """ + { + graph(path: "g") { + earliestEdgeTime { timestamp } + latestEdgeTime { timestamp } + uniqueLayers + } + } + """ + queries_and_expected.append( + ( + query, + { + "graph": { + "earliestEdgeTime": {"timestamp": 10}, + "latestEdgeTime": {"timestamp": 50}, + "uniqueLayers": ["layer1", "layer2"], + } + }, + ) + ) + + # windowed: edges from t=15 to t=25 only + query = """ + { + graph(path: "g") { + window(start: 15, end: 30) { + earliestEdgeTime { timestamp } + latestEdgeTime { timestamp } + } + } + } + """ + queries_and_expected.append( + ( + query, + { + "graph": { + "window": { + "earliestEdgeTime": {"timestamp": 15}, + "latestEdgeTime": {"timestamp": 25}, + } + } + }, + ) + ) + + # layer(layer2): edges at t=25, 30, 50 + query = """ + { + graph(path: "g") { + layer(name: "layer2") { + earliestEdgeTime { timestamp } + latestEdgeTime { timestamp } + } + } + } + """ + queries_and_expected.append( + ( + query, + { + "graph": { + "layer": { + "earliestEdgeTime": {"timestamp": 25}, + "latestEdgeTime": {"timestamp": 50}, + } + } + }, + ) + ) + + run_group_graphql_test(queries_and_expected, graph) + + +def test_shared_neighbours(): + """`sharedNeighbours` returns the intersection of neighbour sets.""" + graph = create_graph() + queries_and_expected = [] + + # A's neighbours (undirected): {A (self-loop), B, C} + # B's neighbours: {A} -> shared(A,B) = {A} + # C's neighbours: {A, D} -> shared(A,C) = {A}, shared(B,C) = {} + query = """ + { + graph(path: "g") { + AB: sharedNeighbours(selectedNodes: ["A", "B"]) { name } + AC: sharedNeighbours(selectedNodes: ["A", "C"]) { name } + BC: sharedNeighbours(selectedNodes: ["B", "C"]) { name } + } + } + """ + queries_and_expected.append( + ( + query, + { + "graph": { + "AB": [{"name": "A"}], + "AC": [{"name": "A"}], + "BC": [{"name": "A"}], + } + }, + ) + ) + + run_group_graphql_test(queries_and_expected, graph, sort_output=True) diff --git a/python/tests/test_base_install/test_graphql/test_gql_graph_type.py b/python/tests/test_base_install/test_graphql/test_gql_graph_type.py new file mode 100644 index 0000000000..a9f9463b60 --- /dev/null +++ b/python/tests/test_base_install/test_graphql/test_gql_graph_type.py @@ -0,0 +1,273 @@ +"""`graph(path:, graphType:)` accepts an optional `graphType` argument that +re-interprets the stored graph at query time. + +- `EVENT` semantics: each update is a point-in-time event; windows only see + updates whose timestamps fall inside them. +- `PERSISTENT` semantics: values carry forward until overwritten or deleted; + an edge added at t=1 is visible in a window starting at t=5. + +These tests confirm both forms of override work both ways, and that omitting +the argument preserves the graph's native type. +""" + +import json +import tempfile + +from raphtory import Graph, PersistentGraph +from raphtory.graphql import GraphServer + +from utils import PORT + + +def _query(server, q: str) -> dict: + response = server.get_client().query(q) + return json.loads(response) if isinstance(response, str) else response + + +def test_event_graph_default_uses_event_semantics(): + """Without `graphType`, an event-stored graph keeps event semantics — a + window after the addition event sees no edge.""" + work_dir = tempfile.mkdtemp() + g = Graph() + g.add_edge(1, "a", "b") + with GraphServer(work_dir).start(PORT) as server: + server.get_client().send_graph(path="g", graph=g) + + result = _query( + server, + """ + { + graph(path: "g") { + window(start: 5, end: 10) { + hasEdge(src: "a", dst: "b") + } + } + } + """, + ) + assert result["graph"]["window"]["hasEdge"] is False + + +def test_event_graph_read_as_persistent_carries_value_forward(): + """Reading an event graph through `graphType: PERSISTENT` makes the edge + visible in a window that starts after the addition event.""" + work_dir = tempfile.mkdtemp() + g = Graph() + g.add_edge(1, "a", "b") + with GraphServer(work_dir).start(PORT) as server: + server.get_client().send_graph(path="g", graph=g) + + result = _query( + server, + """ + { + graph(path: "g", graphType: PERSISTENT) { + window(start: 5, end: 10) { + hasEdge(src: "a", dst: "b") + } + } + } + """, + ) + assert result["graph"]["window"]["hasEdge"] is True + + +def test_persistent_graph_default_carries_value_forward(): + """Without `graphType`, a persistent-stored graph keeps persistent + semantics — the edge is alive in a window after the add.""" + work_dir = tempfile.mkdtemp() + g = PersistentGraph() + g.add_edge(1, "a", "b") + with GraphServer(work_dir).start(PORT) as server: + server.get_client().send_graph(path="g", graph=g) + + result = _query( + server, + """ + { + graph(path: "g") { + window(start: 5, end: 10) { + hasEdge(src: "a", dst: "b") + } + } + } + """, + ) + assert result["graph"]["window"]["hasEdge"] is True + + +def test_persistent_graph_read_as_event_drops_carried_values(): + """Reading a persistent graph through `graphType: EVENT` makes the edge + invisible in a window that starts after the addition event.""" + work_dir = tempfile.mkdtemp() + g = PersistentGraph() + g.add_edge(1, "a", "b") + with GraphServer(work_dir).start(PORT) as server: + server.get_client().send_graph(path="g", graph=g) + + result = _query( + server, + """ + { + graph(path: "g", graphType: EVENT) { + window(start: 5, end: 10) { + hasEdge(src: "a", dst: "b") + } + } + } + """, + ) + assert result["graph"]["window"]["hasEdge"] is False + + +def test_mutable_event_graph_default_uses_event_semantics(): + """`updateGraph(path).graph` without `graphType` keeps event semantics.""" + work_dir = tempfile.mkdtemp() + g = Graph() + g.add_edge(1, "a", "b") + with GraphServer(work_dir).start(PORT) as server: + server.get_client().send_graph(path="g", graph=g) + + result = _query( + server, + """ + { + updateGraph(path: "g") { + graph { + window(start: 5, end: 10) { hasEdge(src: "a", dst: "b") } + } + } + } + """, + ) + assert result["updateGraph"]["graph"]["window"]["hasEdge"] is False + + +def test_mutable_event_graph_read_as_persistent_carries_value_forward(): + """`updateGraph(path).graph(graphType: PERSISTENT)` re-interprets an + event-stored graph through persistent semantics.""" + work_dir = tempfile.mkdtemp() + g = Graph() + g.add_edge(1, "a", "b") + with GraphServer(work_dir).start(PORT) as server: + server.get_client().send_graph(path="g", graph=g) + + result = _query( + server, + """ + { + updateGraph(path: "g") { + graph(graphType: PERSISTENT) { + window(start: 5, end: 10) { hasEdge(src: "a", dst: "b") } + } + } + } + """, + ) + assert result["updateGraph"]["graph"]["window"]["hasEdge"] is True + + +def test_mutable_persistent_graph_default_carries_value_forward(): + """`updateGraph(path).graph` on a persistent graph keeps persistent + semantics by default.""" + work_dir = tempfile.mkdtemp() + g = PersistentGraph() + g.add_edge(1, "a", "b") + with GraphServer(work_dir).start(PORT) as server: + server.get_client().send_graph(path="g", graph=g) + + result = _query( + server, + """ + { + updateGraph(path: "g") { + graph { + window(start: 5, end: 10) { hasEdge(src: "a", dst: "b") } + } + } + } + """, + ) + assert result["updateGraph"]["graph"]["window"]["hasEdge"] is True + + +def test_mutable_persistent_graph_read_as_event_drops_carried_values(): + """`updateGraph(path).graph(graphType: EVENT)` re-interprets a + persistent graph through event semantics.""" + work_dir = tempfile.mkdtemp() + g = PersistentGraph() + g.add_edge(1, "a", "b") + with GraphServer(work_dir).start(PORT) as server: + server.get_client().send_graph(path="g", graph=g) + + result = _query( + server, + """ + { + updateGraph(path: "g") { + graph(graphType: EVENT) { + window(start: 5, end: 10) { hasEdge(src: "a", dst: "b") } + } + } + } + """, + ) + assert result["updateGraph"]["graph"]["window"]["hasEdge"] is False + + +def test_mutable_graph_reads_pending_mutations_through_override(): + """Mutate via `updateGraph(path)`, then read back via the graph accessor + with a `graphType` override — both the existing data and the new edge + should be visible under the chosen semantics.""" + work_dir = tempfile.mkdtemp() + g = Graph() + g.add_edge(1, "a", "b") + with GraphServer(work_dir).start(PORT) as server: + server.get_client().send_graph(path="g", graph=g) + + result = _query( + server, + """ + { + updateGraph(path: "g") { + addEdge(time: 2, src: "b", dst: "c") { success } + asPersistent: graph(graphType: PERSISTENT) { + window(start: 5, end: 10) { + abEdge: hasEdge(src: "a", dst: "b") + bcEdge: hasEdge(src: "b", dst: "c") + } + } + } + } + """, + ) + win = result["updateGraph"]["asPersistent"]["window"] + assert win["abEdge"] is True + assert win["bcEdge"] is True + + +def test_persistent_deletes_visible_via_persistent_view(): + """A delete event in a persistent graph propagates: a window after the + deletion shows the edge as gone under persistent semantics.""" + work_dir = tempfile.mkdtemp() + g = PersistentGraph() + g.add_edge(1, "a", "b") + g.delete_edge(5, "a", "b") + with GraphServer(work_dir).start(PORT) as server: + server.get_client().send_graph(path="g", graph=g) + + result = _query( + server, + """ + { + before: graph(path: "g") { + window(start: 2, end: 4) { hasEdge(src: "a", dst: "b") } + } + after: graph(path: "g") { + window(start: 6, end: 10) { hasEdge(src: "a", dst: "b") } + } + } + """, + ) + assert result["before"]["window"]["hasEdge"] is True + assert result["after"]["window"]["hasEdge"] is False diff --git a/python/tests/test_base_install/test_graphql/test_gql_misc_surface.py b/python/tests/test_base_install/test_graphql/test_gql_misc_surface.py new file mode 100644 index 0000000000..8e31ccc630 --- /dev/null +++ b/python/tests/test_base_install/test_graphql/test_gql_misc_surface.py @@ -0,0 +1,361 @@ +"""Tests for smaller GraphQL surface fields that previously had no coverage: + +- `Nodes.ids` +- `Edges.explode`, `Edges.explodeLayers` +- `PathFromNode.ids` +- `History.isEmpty` +- `TemporalProperty.orderedDedupe` +- `MetaGraph.nodeCount`, `MetaGraph.edgeCount`, `QueryRoot.graphMetadata` +""" + +import json +import tempfile + +from utils import PORT, run_group_graphql_test +from raphtory import Graph +from raphtory.graphql import GraphServer + + +def create_graph() -> Graph: + graph = Graph() + graph.add_node(10, "A", node_type="person") + graph.add_node(10, "B", node_type="person") + graph.add_node(15, "C", node_type="org") + graph.add_node(40, "D", node_type="org") + + graph.add_edge(10, "A", "B", properties={"weight": 1.0}, layer="layer1") + graph.add_edge(20, "A", "B", properties={"weight": 2.0}, layer="layer1") + graph.add_edge(30, "A", "B", properties={"weight": 3.0}, layer="layer2") + graph.add_edge(15, "A", "C", layer="layer1") + graph.add_edge(25, "A", "C", layer="layer2") + graph.add_edge(40, "C", "D", layer="layer1") + graph.add_edge(50, "B", "A", layer="layer2") + graph.add_edge(25, "A", "A", layer="layer1") + + return graph + + +def test_nodes_ids(): + """`nodes.ids` on base / window / layer views. Order isn't guaranteed + by the resolver, so each result is compared as a set.""" + graph = create_graph() + + cases = [ + # base: all 4 nodes present. + ( + """{ graph(path: "g") { nodes { ids } } }""", + ("graph", "nodes"), + {"A", "B", "C", "D"}, + ), + # window [10, 25): D (added at 40) excluded. + ( + """{ graph(path: "g") { window(start: 10, end: 25) { nodes { ids } } } }""", + ("graph", "window", "nodes"), + {"A", "B", "C"}, + ), + # layer(layer2): D only has a layer1 edge (C->D) but is still present + # because base-layer (non-layered) node events are always included in + # any layer view — D was added via `add_node(40, "D", ...)` with no + # layer arg. + ( + """{ graph(path: "g") { layer(name: "layer2") { nodes { ids } } } }""", + ("graph", "layer", "nodes"), + {"A", "B", "C", "D"}, + ), + ] + + tmp = tempfile.mkdtemp() + with GraphServer(tmp, create_index=True).start(PORT) as server: + client = server.get_client() + client.send_graph(path="g", graph=graph) + for query, path, expected in cases: + response = client.query(query) + data = json.loads(response) if isinstance(response, str) else response + for key in path: + data = data[key] + assert set(data["ids"]) == expected, f"query: {query}" + + +def test_edges_explode_and_explode_layers(): + """`edges.explode` / `explodeLayers` — collection-level explosion.""" + graph = create_graph() + queries_and_expected = [] + + # Restrict to layer(layer1) + window [10, 25) so the output stays small: + # edges in scope: A->B@10, A->B@20, A->C@15. 3 explode events. + # explodeLayers: one per (edge, layer) pair => 2 entries (A->B on layer1, A->C on layer1). + query = """ + { + graph(path: "g") { + layer(name: "layer1") { + window(start: 10, end: 25) { + edges { + explode { list { src { name } dst { name } time { timestamp } layerName } } + explodeLayers { list { src { name } dst { name } layerName } } + } + } + } + } + } + """ + expected = { + "graph": { + "layer": { + "window": { + "edges": { + "explode": { + "list": [ + { + "src": {"name": "A"}, + "dst": {"name": "B"}, + "time": {"timestamp": 10}, + "layerName": "layer1", + }, + { + "src": {"name": "A"}, + "dst": {"name": "B"}, + "time": {"timestamp": 20}, + "layerName": "layer1", + }, + { + "src": {"name": "A"}, + "dst": {"name": "C"}, + "time": {"timestamp": 15}, + "layerName": "layer1", + }, + ] + }, + "explodeLayers": { + "list": [ + { + "src": {"name": "A"}, + "dst": {"name": "B"}, + "layerName": "layer1", + }, + { + "src": {"name": "A"}, + "dst": {"name": "C"}, + "layerName": "layer1", + }, + ] + }, + } + } + } + } + } + queries_and_expected.append((query, expected)) + + run_group_graphql_test(queries_and_expected, graph, sort_output=True) + + +def test_path_from_node_ids(): + """`pathFromNode.ids` via `neighbours` / `inNeighbours` / `outNeighbours`.""" + graph = create_graph() + queries_and_expected = [] + + # A's neighbours (undirected): B, C, A (self-loop). + # A's outNeighbours: B, C, A + # A's inNeighbours: A, B + query = """ + { + graph(path: "g") { + node(name: "A") { + neighbours { ids } + outNeighbours { ids } + inNeighbours { ids } + } + } + } + """ + queries_and_expected.append( + ( + query, + { + "graph": { + "node": { + "neighbours": {"ids": ["A", "B", "C"]}, + "outNeighbours": {"ids": ["A", "B", "C"]}, + "inNeighbours": {"ids": ["A", "B"]}, + } + } + }, + ) + ) + + # layer(layer2) changes A's neighbourhood: A->B, A->C, B->A are the only + # layer2 edges touching A => neighbours={B, C}, outNeighbours={B, C}, inNeighbours={B}. + query = """ + { + graph(path: "g") { + layer(name: "layer2") { + node(name: "A") { + neighbours { ids } + outNeighbours { ids } + inNeighbours { ids } + } + } + } + } + """ + queries_and_expected.append( + ( + query, + { + "graph": { + "layer": { + "node": { + "neighbours": {"ids": ["B", "C"]}, + "outNeighbours": {"ids": ["B", "C"]}, + "inNeighbours": {"ids": ["B"]}, + } + } + } + }, + ) + ) + + run_group_graphql_test(queries_and_expected, graph, sort_output=True) + + +def test_history_is_empty(): + """`history.isEmpty` is true on an empty window, false otherwise.""" + graph = create_graph() + queries_and_expected = [] + + # A has history (created at t=10) + query = """{ graph(path: "g") { node(name: "A") { history { isEmpty } } } }""" + queries_and_expected.append( + (query, {"graph": {"node": {"history": {"isEmpty": False}}}}) + ) + + # Windowing the node (not the graph) keeps the node reachable but empties + # its history => isEmpty = True. + query = """ + { + graph(path: "g") { + node(name: "A") { + window(start: 0, end: 5) { history { isEmpty } } + } + } + } + """ + queries_and_expected.append( + ( + query, + {"graph": {"node": {"window": {"history": {"isEmpty": True}}}}}, + ) + ) + + # Same trick for an edge: pick a window with no A->B updates (the only + # A->B updates are at t=10, 20, 30). + query = """ + { + graph(path: "g") { + edge(src: "A", dst: "B") { + window(start: 40, end: 45) { history { isEmpty } } + } + } + } + """ + queries_and_expected.append( + ( + query, + {"graph": {"edge": {"window": {"history": {"isEmpty": True}}}}}, + ) + ) + + # Edge A->B's full history is non-empty + query = """ + { + graph(path: "g") { + edge(src: "A", dst: "B") { history { isEmpty } } + } + } + """ + queries_and_expected.append( + (query, {"graph": {"edge": {"history": {"isEmpty": False}}}}) + ) + + run_group_graphql_test(queries_and_expected, graph) + + +def test_temporal_property_ordered_dedupe(): + """`TemporalProperty.orderedDedupe` — collapses consecutive-equal updates.""" + g = Graph() + # state timeline: a(1), a(2), b(3), a(4), a(5) + # latestTime=True => keeps the latest timestamp of each run: (2,'a'), (3,'b'), (5,'a') + # latestTime=False => keeps the first timestamp of each run: (1,'a'), (3,'b'), (4,'a') + g.add_node(1, "X", properties={"state": "a"}) + g.add_node(2, "X", properties={"state": "a"}) + g.add_node(3, "X", properties={"state": "b"}) + g.add_node(4, "X", properties={"state": "a"}) + g.add_node(5, "X", properties={"state": "a"}) + + queries_and_expected = [] + + query = """ + { + graph(path: "g") { + node(name: "X") { + properties { + temporal { + get(key: "state") { + latest: orderedDedupe(latestTime: true) { time { timestamp } value } + first: orderedDedupe(latestTime: false) { time { timestamp } value } + } + } + } + } + } + } + """ + expected = { + "graph": { + "node": { + "properties": { + "temporal": { + "get": { + "latest": [ + {"time": {"timestamp": 2}, "value": "a"}, + {"time": {"timestamp": 3}, "value": "b"}, + {"time": {"timestamp": 5}, "value": "a"}, + ], + "first": [ + {"time": {"timestamp": 1}, "value": "a"}, + {"time": {"timestamp": 3}, "value": "b"}, + {"time": {"timestamp": 4}, "value": "a"}, + ], + } + } + } + } + } + } + queries_and_expected.append((query, expected)) + + run_group_graphql_test(queries_and_expected, g) + + +def test_meta_graph_counts(): + """`graphMetadata` → `MetaGraph.nodeCount` / `edgeCount` report persisted + counts for a stored graph without loading it.""" + graph = create_graph() + queries_and_expected = [] + + query = """{ graphMetadata(path: "g") { nodeCount edgeCount name path } }""" + queries_and_expected.append( + ( + query, + { + "graphMetadata": { + "nodeCount": 4, + "edgeCount": 5, + "name": "g", + "path": "g", + } + }, + ) + ) + + run_group_graphql_test(queries_and_expected, graph) diff --git a/python/tests/test_base_install/test_graphql/test_gql_mutation_time_input.py b/python/tests/test_base_install/test_graphql/test_gql_mutation_time_input.py new file mode 100644 index 0000000000..c3af13c853 --- /dev/null +++ b/python/tests/test_base_install/test_graphql/test_gql_mutation_time_input.py @@ -0,0 +1,303 @@ +"""Mutation arguments that take a time now accept the full `TimeInput` shape: + +- An `Int` (epoch milliseconds). +- An RFC3339 / ISO-8601 datetime string. +- An `{timestamp, eventId}` object. + +These tests verify each form on every mutation surface that takes a time, and +confirm the resulting graph state is identical regardless of which input form +was used. +""" + +import json +import tempfile + +from raphtory import Graph +from raphtory.graphql import GraphServer + +from utils import PORT + + +def _query(server, q: str) -> dict: + response = server.get_client().query(q) + return json.loads(response) if isinstance(response, str) else response + + +def test_add_node_accepts_int_string_and_object_time(): + """`addNode` accepts every `TimeInput` shape — Int, RFC3339 string, and + `{timestamp, eventId}` object. Each insertion lands at its expected + timestamp and is queryable afterwards.""" + work_dir = tempfile.mkdtemp() + with GraphServer(work_dir, create_index=True).start(PORT) as server: + client = server.get_client() + client.new_graph("g", "EVENT") + + # Three forms, three different nodes. + client.query(""" + { + updateGraph(path: "g") { + int_form: addNode(time: 100, name: "intNode") { success } + str_form: addNode(time: "1970-01-01T00:00:00.200Z", name: "strNode") { success } + obj_form: addNode(time: {timestamp: 300, eventId: 0}, name: "objNode") { success } + } + } + """) + + # Verify each landed at the expected timestamp. + result = _query( + server, + """ + { + graph(path: "g") { + intNode: node(name: "intNode") { earliestTime { timestamp } } + strNode: node(name: "strNode") { earliestTime { timestamp } } + objNode: node(name: "objNode") { earliestTime { timestamp } } + } + } + """, + ) + assert result["graph"]["intNode"]["earliestTime"]["timestamp"] == 100 + assert result["graph"]["strNode"]["earliestTime"]["timestamp"] == 200 + assert result["graph"]["objNode"]["earliestTime"]["timestamp"] == 300 + + +def test_add_edge_and_delete_edge_accept_time_input_shapes(): + """`addEdge` / `deleteEdge` accept the same forms; verify on a persistent + graph so the deletion is visible via `isValid`.""" + work_dir = tempfile.mkdtemp() + with GraphServer(work_dir, create_index=True).start(PORT) as server: + client = server.get_client() + client.new_graph("g", "PERSISTENT") + + client.query(""" + { + updateGraph(path: "g") { + int_add: addEdge(time: 10, src: "a", dst: "b") { success } + str_add: addEdge(time: "1970-01-01T00:00:00.020Z", src: "a", dst: "b") { success } + obj_del: deleteEdge(time: {timestamp: 30, eventId: 0}, src: "a", dst: "b") { success } + } + } + """) + + result = _query( + server, + """ + { + graph(path: "g") { + edge(src: "a", dst: "b") { + earliestTime { timestamp } + latestTime { timestamp } + isValid + } + } + } + """, + ) + edge = result["graph"]["edge"] + assert edge["earliestTime"]["timestamp"] == 10 + # On a persistent graph, the deletion sets the latest valid time. + assert edge["latestTime"]["timestamp"] == 30 + # Edge was deleted at t=30 with no later re-addition, so it's invalid now. + assert edge["isValid"] is False + + +def test_add_properties_accepts_time_input_shapes(): + """`addProperties` (graph-level temporal properties) accepts Int, string, + and object.""" + work_dir = tempfile.mkdtemp() + with GraphServer(work_dir, create_index=True).start(PORT) as server: + client = server.get_client() + client.new_graph("g", "EVENT") + + client.query(""" + { + updateGraph(path: "g") { + p1: addProperties(t: 100, properties: [{key: "score", value: {i64: 1}}]) + p2: addProperties(t: "1970-01-01T00:00:00.200Z", properties: [{key: "score", value: {i64: 2}}]) + p3: addProperties(t: {timestamp: 300, eventId: 0}, properties: [{key: "score", value: {i64: 3}}]) + } + } + """) + + result = _query( + server, + """ + { + graph(path: "g") { + properties { + temporal { + get(key: "score") { + history { list { timestamp } } + values + } + } + } + } + } + """, + ) + score = result["graph"]["properties"]["temporal"]["get"] + timestamps = [h["timestamp"] for h in score["history"]["list"]] + # History is returned in temporal order; not sorting on purpose so a + # regression that returns events out of order would fail this test. + assert timestamps == [100, 200, 300] + assert score["values"] == [1, 2, 3] + + +def test_temporal_property_input_accepts_time_input_in_batch(): + """Inside `addNodes` / `addEdges`, the `time` field on each per-update + `TemporalPropertyInput` accepts every `TimeInput` shape.""" + work_dir = tempfile.mkdtemp() + with GraphServer(work_dir, create_index=True).start(PORT) as server: + client = server.get_client() + client.new_graph("g", "EVENT") + + client.query(""" + { + updateGraph(path: "g") { + addNodes(nodes: [ + { + name: "n", + updates: [ + { time: 10, properties: [{key: "v", value: {i64: 1}}] }, + { time: "1970-01-01T00:00:00.020Z", properties: [{key: "v", value: {i64: 2}}] }, + { time: {timestamp: 30, eventId: 0}, properties: [{key: "v", value: {i64: 3}}] } + ] + } + ]) + } + } + """) + + result = _query( + server, + """ + { + graph(path: "g") { + node(name: "n") { + properties { temporal { get(key: "v") { + history { list { timestamp } } + values + } } } + } + } + } + """, + ) + v = result["graph"]["node"]["properties"]["temporal"]["get"] + timestamps = [h["timestamp"] for h in v["history"]["list"]] + assert timestamps == [10, 20, 30] + assert v["values"] == [1, 2, 3] + + +def test_add_edges_batch_accepts_time_input_shapes(): + """`addEdges` is the batch counterpart of `addNodes`. Each per-update + `time` field on its `TemporalPropertyInput` entries accepts every + `TimeInput` shape — Int, RFC3339 string, and `{timestamp, eventId}` object.""" + work_dir = tempfile.mkdtemp() + with GraphServer(work_dir, create_index=True).start(PORT) as server: + client = server.get_client() + client.new_graph("g", "EVENT") + + client.query(""" + { + updateGraph(path: "g") { + addEdges(edges: [ + { + src: "a", dst: "b", + updates: [ + { time: 10, properties: [{key: "w", value: {i64: 1}}] }, + { time: "1970-01-01T00:00:00.020Z", properties: [{key: "w", value: {i64: 2}}] }, + { time: {timestamp: 30, eventId: 0}, properties: [{key: "w", value: {i64: 3}}] } + ] + } + ]) + } + } + """) + + result = _query( + server, + """ + { + graph(path: "g") { + edge(src: "a", dst: "b") { + properties { temporal { get(key: "w") { + history { list { timestamp } } + values + } } } + } + } + } + """, + ) + w = result["graph"]["edge"]["properties"]["temporal"]["get"] + timestamps = [h["timestamp"] for h in w["history"]["list"]] + assert timestamps == [10, 20, 30] + assert w["values"] == [1, 2, 3] + + +def test_mutable_node_and_edge_add_updates_accept_time_input(): + """`MutableNode.addUpdates` and `MutableEdge.addUpdates` / `delete` accept + every `TimeInput` shape via the `node()` / `edge()` lookups.""" + work_dir = tempfile.mkdtemp() + with GraphServer(work_dir, create_index=True).start(PORT) as server: + client = server.get_client() + client.new_graph("g", "PERSISTENT") + + # Seed the node and edge so we can look them up below. + client.query(""" + { + updateGraph(path: "g") { + addNode(time: 0, name: "n") { success } + addEdge(time: 0, src: "a", dst: "b") { success } + } + } + """) + + client.query(""" + { + updateGraph(path: "g") { + node(name: "n") { + i: addUpdates(time: 100, properties: [{key: "v", value: {i64: 1}}]) + s: addUpdates(time: "1970-01-01T00:00:00.200Z", properties: [{key: "v", value: {i64: 2}}]) + o: addUpdates(time: {timestamp: 300, eventId: 0}, properties: [{key: "v", value: {i64: 3}}]) + } + edge(src: "a", dst: "b") { + i: addUpdates(time: 10, properties: [{key: "w", value: {i64: 1}}]) + s: addUpdates(time: "1970-01-01T00:00:00.020Z", properties: [{key: "w", value: {i64: 2}}]) + d: delete(time: {timestamp: 30, eventId: 0}) + } + } + } + """) + + result = _query( + server, + """ + { + graph(path: "g") { + node(name: "n") { + properties { temporal { get(key: "v") { values } } } + } + edge(src: "a", dst: "b") { + properties { temporal { get(key: "w") { values } } } + isValid + } + } + } + """, + ) + # Values come back in temporal order — assert the sequence as-is so + # an out-of-order regression would fail. + assert result["graph"]["node"]["properties"]["temporal"]["get"]["values"] == [ + 1, + 2, + 3, + ] + assert result["graph"]["edge"]["properties"]["temporal"]["get"]["values"] == [ + 1, + 2, + ] + # delete at t=30 with no later re-add → edge is invalid at the latest time. + assert result["graph"]["edge"]["isValid"] is False diff --git a/python/tests/test_base_install/test_graphql/test_gql_node_edge_surface.py b/python/tests/test_base_install/test_graphql/test_gql_node_edge_surface.py new file mode 100644 index 0000000000..eecf8b1590 --- /dev/null +++ b/python/tests/test_base_install/test_graphql/test_gql_node_edge_surface.py @@ -0,0 +1,588 @@ +"""Tests for `Node` and `Edge` fields that previously had no GraphQL coverage. + +Node fields covered: +- `firstUpdate`, `lastUpdate` +- `edgeHistoryCount` +- `inDegree`, `outDegree` +- `inEdges`, `outEdges` + +Edge fields covered: +- `firstUpdate`, `lastUpdate` +- `layerNames` +- `layerName` (with error case) +- `explode`, `explodeLayers` +- `isValid`, `isSelfLoop` +- `nbr` on an exploded edge + +All tested under base + window + layer composition where applicable. +""" + +from utils import run_group_graphql_test, run_graphql_error_test_contains +from raphtory import Graph + + +def create_graph() -> Graph: + graph = Graph() + + graph.add_node(10, "A", node_type="person") + graph.add_node(10, "B", node_type="person") + graph.add_node(15, "C", node_type="org") + graph.add_node(40, "D", node_type="org") + + graph.add_edge(10, "A", "B", properties={"weight": 1.0}, layer="layer1") + graph.add_edge(20, "A", "B", properties={"weight": 2.0}, layer="layer1") + graph.add_edge(30, "A", "B", properties={"weight": 3.0}, layer="layer2") + graph.add_edge(15, "A", "C", layer="layer1") + graph.add_edge(25, "A", "C", layer="layer2") + graph.add_edge(40, "C", "D", layer="layer1") + graph.add_edge(50, "B", "A", layer="layer2") + graph.add_edge(25, "A", "A", layer="layer1") # self-loop + + return graph + + +def test_node_update_times_and_edge_history_count(): + """`firstUpdate`, `lastUpdate`, `edgeHistoryCount` under base / window / layer.""" + graph = create_graph() + queries_and_expected = [] + + # Base: A has events from t=10 (add_node + A->B) to t=50 (B->A). + # Edge events touching A: A->B @10, @20, @30; A->C @15, @25; A->A @25; B->A @50 => 7. + query = """ + { + graph(path: "g") { + node(name: "A") { + firstUpdate { timestamp } + lastUpdate { timestamp } + edgeHistoryCount + } + } + } + """ + queries_and_expected.append( + ( + query, + { + "graph": { + "node": { + "firstUpdate": {"timestamp": 10}, + "lastUpdate": {"timestamp": 50}, + "edgeHistoryCount": 7, + } + } + }, + ) + ) + + # Windowed [15, 40): first event for A is at 15 (A->C), last is at 30 (A->B). + # Events touching A in window: A->B@20, A->B@30, A->C@15, A->C@25, A->A@25 => 5. + query = """ + { + graph(path: "g") { + window(start: 15, end: 40) { + node(name: "A") { + firstUpdate { timestamp } + lastUpdate { timestamp } + edgeHistoryCount + } + } + } + } + """ + queries_and_expected.append( + ( + query, + { + "graph": { + "window": { + "node": { + "firstUpdate": {"timestamp": 15}, + "lastUpdate": {"timestamp": 30}, + "edgeHistoryCount": 5, + } + } + } + }, + ) + ) + + # Windowed [16, 40): firstUpdate must reflect the first event *inside* the + # window (t=20, A->B), not the window start (t=16). The previous case had + # an event at exactly the window start which would mask a bug returning + # the window start instead of the first real update. + # Events touching A in window: A->B@20, A->C@25, A->A@25, A->B@30 => 4. + query = """ + { + graph(path: "g") { + window(start: 16, end: 40) { + node(name: "A") { + firstUpdate { timestamp } + lastUpdate { timestamp } + edgeHistoryCount + } + } + } + } + """ + queries_and_expected.append( + ( + query, + { + "graph": { + "window": { + "node": { + "firstUpdate": {"timestamp": 20}, + "lastUpdate": {"timestamp": 30}, + "edgeHistoryCount": 4, + } + } + } + }, + ) + ) + + # layer(layer2) on A: node events (add_node @ t=10) aren't layer-scoped so + # firstUpdate still sees t=10. Edge events on layer2 touching A are at 25, + # 30, 50 => lastUpdate=50, edgeHistoryCount=3. + query = """ + { + graph(path: "g") { + layer(name: "layer2") { + node(name: "A") { + firstUpdate { timestamp } + lastUpdate { timestamp } + edgeHistoryCount + } + } + } + } + """ + queries_and_expected.append( + ( + query, + { + "graph": { + "layer": { + "node": { + "firstUpdate": {"timestamp": 10}, + "lastUpdate": {"timestamp": 50}, + "edgeHistoryCount": 3, + } + } + } + }, + ) + ) + + run_group_graphql_test(queries_and_expected, graph) + + +def test_node_directed_degrees_and_edges(): + """`inDegree`, `outDegree`, `inEdges`, `outEdges` under base / window / layer.""" + graph = create_graph() + queries_and_expected = [] + + # Base on A: + # out-edges: A->B, A->C, A->A => outDegree=3 + # in-edges: A->A, B->A => inDegree=2 + query = """ + { + graph(path: "g") { + node(name: "A") { + inDegree + outDegree + inEdges { list { src { name } dst { name } } } + outEdges { list { src { name } dst { name } } } + } + } + } + """ + queries_and_expected.append( + ( + query, + { + "graph": { + "node": { + "inDegree": 2, + "outDegree": 3, + "inEdges": { + "list": [ + {"src": {"name": "A"}, "dst": {"name": "A"}}, + {"src": {"name": "B"}, "dst": {"name": "A"}}, + ] + }, + "outEdges": { + "list": [ + {"src": {"name": "A"}, "dst": {"name": "B"}}, + {"src": {"name": "A"}, "dst": {"name": "C"}}, + {"src": {"name": "A"}, "dst": {"name": "A"}}, + ] + }, + } + } + }, + ) + ) + + # layer(layer2): only A->B@30, A->C@25, B->A@50 touch A. + # outEdges(A): A->B, A->C (no A->A since self-loop is layer1) + # inEdges(A): B->A + query = """ + { + graph(path: "g") { + layer(name: "layer2") { + node(name: "A") { + inDegree + outDegree + inEdges { list { src { name } dst { name } } } + outEdges { list { src { name } dst { name } } } + } + } + } + } + """ + queries_and_expected.append( + ( + query, + { + "graph": { + "layer": { + "node": { + "inDegree": 1, + "outDegree": 2, + "inEdges": { + "list": [ + {"src": {"name": "B"}, "dst": {"name": "A"}}, + ] + }, + "outEdges": { + "list": [ + {"src": {"name": "A"}, "dst": {"name": "B"}}, + {"src": {"name": "A"}, "dst": {"name": "C"}}, + ] + }, + } + } + } + }, + ) + ) + + # windowed [10, 30): A->A@25 + A->B@10, A->B@20 + A->C@15 => A has 3 out-edges (to A, B, C), 1 in-edge (A->A) + query = """ + { + graph(path: "g") { + window(start: 10, end: 30) { + node(name: "A") { + inDegree + outDegree + } + } + } + } + """ + queries_and_expected.append( + ( + query, + { + "graph": { + "window": { + "node": { + "inDegree": 1, + "outDegree": 3, + } + } + } + }, + ) + ) + + run_group_graphql_test(queries_and_expected, graph, sort_output=True) + + +def test_edge_update_times(): + """`firstUpdate` / `lastUpdate` on edges under base / window / layer.""" + graph = create_graph() + queries_and_expected = [] + + # A->B: base updates at 10, 20, 30. first=10, last=30. + query = """ + { + graph(path: "g") { + edge(src: "A", dst: "B") { + firstUpdate { timestamp } + lastUpdate { timestamp } + } + } + } + """ + queries_and_expected.append( + ( + query, + { + "graph": { + "edge": { + "firstUpdate": {"timestamp": 10}, + "lastUpdate": {"timestamp": 30}, + } + } + }, + ) + ) + + # layer(layer1) on A->B: updates at 10 and 20 only. + query = """ + { + graph(path: "g") { + layer(name: "layer1") { + edge(src: "A", dst: "B") { + firstUpdate { timestamp } + lastUpdate { timestamp } + } + } + } + } + """ + queries_and_expected.append( + ( + query, + { + "graph": { + "layer": { + "edge": { + "firstUpdate": {"timestamp": 10}, + "lastUpdate": {"timestamp": 20}, + } + } + } + }, + ) + ) + + # window [15, 25) on A->B: only update at 20. + query = """ + { + graph(path: "g") { + window(start: 15, end: 25) { + edge(src: "A", dst: "B") { + firstUpdate { timestamp } + lastUpdate { timestamp } + } + } + } + } + """ + queries_and_expected.append( + ( + query, + { + "graph": { + "window": { + "edge": { + "firstUpdate": {"timestamp": 20}, + "lastUpdate": {"timestamp": 20}, + } + } + } + }, + ) + ) + + run_group_graphql_test(queries_and_expected, graph) + + +def test_edge_layers_and_explode(): + """`layerNames`, `layerName`, `explode`, `explodeLayers`, `isSelfLoop`, `isValid`.""" + graph = create_graph() + queries_and_expected = [] + + # A->B spans layer1 + layer2 + query = """ + { + graph(path: "g") { + edge(src: "A", dst: "B") { + layerNames + isSelfLoop + isValid + explode { list { src { name } dst { name } time { timestamp } layerName } } + explodeLayers { list { layerName } } + } + } + } + """ + queries_and_expected.append( + ( + query, + { + "graph": { + "edge": { + "layerNames": ["layer1", "layer2"], + "isSelfLoop": False, + "isValid": True, + "explode": { + "list": [ + { + "src": {"name": "A"}, + "dst": {"name": "B"}, + "time": {"timestamp": 10}, + "layerName": "layer1", + }, + { + "src": {"name": "A"}, + "dst": {"name": "B"}, + "time": {"timestamp": 20}, + "layerName": "layer1", + }, + { + "src": {"name": "A"}, + "dst": {"name": "B"}, + "time": {"timestamp": 30}, + "layerName": "layer2", + }, + ] + }, + "explodeLayers": { + "list": [ + {"layerName": "layer1"}, + {"layerName": "layer2"}, + ] + }, + } + } + }, + ) + ) + + # A->A self-loop (layer1 only) + query = """ + { + graph(path: "g") { + edge(src: "A", dst: "A") { + isSelfLoop + layerNames + } + } + } + """ + queries_and_expected.append( + ( + query, + { + "graph": { + "edge": { + "isSelfLoop": True, + "layerNames": ["layer1"], + } + } + }, + ) + ) + + # `layerName` only works on edges that have been exploded (either fully or + # per-layer). Verified via explodeLayers on a multi-layer edge. + query = """ + { + graph(path: "g") { + edge(src: "A", dst: "B") { + explodeLayers { list { layerName layerNames } } + } + } + } + """ + queries_and_expected.append( + ( + query, + { + "graph": { + "edge": { + "explodeLayers": { + "list": [ + {"layerName": "layer1", "layerNames": ["layer1"]}, + {"layerName": "layer2", "layerNames": ["layer2"]}, + ] + } + } + } + }, + ) + ) + + run_group_graphql_test(queries_and_expected, graph) + + +def test_edge_layer_name_errors_on_non_exploded_edge(): + """`layerName` errors on any edge that hasn't been exploded — the + single-layer form is only available after `.explode()` or + `.explodeLayers()`.""" + query = """ + { + graph(path: "g") { + edge(src: "A", dst: "B") { + layerName + } + } + } + """ + run_graphql_error_test_contains( + query, ["layer_name function is only available", "exploded"], create_graph() + ) + + +def test_edge_nbr_on_exploded_edge(): + """`nbr` on the exploded form of an out-edge returns `dst`.""" + graph = create_graph() + queries_and_expected = [] + + # Explode A->B and ask for `nbr` on each: each should be B (the other end). + query = """ + { + graph(path: "g") { + node(name: "A") { + outEdges { + list { + explode { list { nbr { name } } } + } + } + } + } + } + """ + # A has three out-edges (A->A, A->B, A->C). nbr of each exploded event is + # the "other" node — for A->A that's A (both ends), for A->B it's B, etc. + # A->A: 1 exploded event -> [A] + # A->B: 3 exploded events -> [B, B, B] + # A->C: 2 exploded events -> [C, C] + expected = { + "graph": { + "node": { + "outEdges": { + "list": [ + { + "explode": { + "list": [{"nbr": {"name": "A"}}], + } + }, + { + "explode": { + "list": [ + {"nbr": {"name": "B"}}, + {"nbr": {"name": "B"}}, + {"nbr": {"name": "B"}}, + ], + } + }, + { + "explode": { + "list": [ + {"nbr": {"name": "C"}}, + {"nbr": {"name": "C"}}, + ], + } + }, + ] + } + } + } + } + queries_and_expected.append((query, expected)) + run_group_graphql_test(queries_and_expected, graph, sort_output=True) diff --git a/python/tests/test_base_install/test_graphql/test_gql_node_id.py b/python/tests/test_base_install/test_graphql/test_gql_node_id.py new file mode 100644 index 0000000000..626662d6ff --- /dev/null +++ b/python/tests/test_base_install/test_graphql/test_gql_node_id.py @@ -0,0 +1,318 @@ +"""Node-id arguments now accept the full `NodeId` shape: + +- A `String` (e.g. `"alice"`). +- A non-negative `Int` (e.g. `42`). + +These tests exercise both forms across the major lookup, mutation, and +view-transform surfaces, and confirm a graph indexed by integers can be +queried and mutated through the GraphQL server. +""" + +import json +import tempfile + +from raphtory import Graph +from raphtory.graphql import GraphServer + +from utils import PORT + + +def _query(server, q: str) -> dict: + response = server.get_client().query(q) + return json.loads(response) if isinstance(response, str) else response + + +def test_addnode_and_node_lookup_with_integer_ids(): + """A graph with integer node ids can be added and queried via the + GraphQL server. Raphtory enforces a single id type per graph, so this + test uses integers throughout.""" + work_dir = tempfile.mkdtemp() + with GraphServer(work_dir, create_index=True).start(PORT) as server: + client = server.get_client() + client.new_graph("g", "EVENT") + + client.query(""" + { + updateGraph(path: "g") { + a: addNode(time: 1, name: 1) { success } + b: addNode(time: 2, name: 2) { success } + c: addNode(time: 3, name: 42) { success } + } + } + """) + + result = _query( + server, + """ + { + graph(path: "g") { + hasInt: hasNode(name: 1) + hasOther: hasNode(name: 42) + hasMissingInt: hasNode(name: 999) + int_node: node(name: 1) { earliestTime { timestamp } } + int_node2: node(name: 42) { earliestTime { timestamp } } + } + } + """, + ) + graph = result["graph"] + assert graph["hasInt"] is True + assert graph["hasOther"] is True + assert graph["hasMissingInt"] is False + assert graph["int_node"]["earliestTime"]["timestamp"] == 1 + assert graph["int_node2"]["earliestTime"]["timestamp"] == 3 + + +def test_addedge_and_edge_lookup_with_integer_endpoints(): + """Edge mutations and lookups accept integer ids on src/dst.""" + work_dir = tempfile.mkdtemp() + with GraphServer(work_dir, create_index=True).start(PORT) as server: + client = server.get_client() + client.new_graph("g", "EVENT") + + client.query(""" + { + updateGraph(path: "g") { + e1: addEdge(time: 10, src: 1, dst: 2) { success } + e2: addEdge(time: 20, src: 2, dst: 3) { success } + } + } + """) + + result = _query( + server, + """ + { + graph(path: "g") { + hasIntEdge: hasEdge(src: 1, dst: 2) + hasIntEdge2: hasEdge(src: 2, dst: 3) + hasNoEdge: hasEdge(src: 1, dst: 3) + e1: edge(src: 1, dst: 2) { earliestTime { timestamp } } + e2: edge(src: 2, dst: 3) { earliestTime { timestamp } } + } + } + """, + ) + graph = result["graph"] + assert graph["hasIntEdge"] is True + assert graph["hasIntEdge2"] is True + assert graph["hasNoEdge"] is False + assert graph["e1"]["earliestTime"]["timestamp"] == 10 + assert graph["e2"]["earliestTime"]["timestamp"] == 20 + + +def test_view_transforms_with_integer_node_ids(): + """`subgraph`, `excludeNodes`, and `sharedNeighbours` accept integer + node ids.""" + work_dir = tempfile.mkdtemp() + with GraphServer(work_dir, create_index=True).start(PORT) as server: + client = server.get_client() + client.new_graph("g", "EVENT") + + # Build a small integer-id graph: 1 → 2, 1 → 3, 4 → 2 (so 1 and 4 share neighbour 2). + client.query(""" + { + updateGraph(path: "g") { + e1: addEdge(time: 1, src: 1, dst: 2) { success } + e2: addEdge(time: 2, src: 1, dst: 3) { success } + e3: addEdge(time: 3, src: 4, dst: 2) { success } + } + } + """) + + result = _query( + server, + """ + { + graph(path: "g") { + sub: subgraph(nodes: [1, 2]) { countNodes } + exclude: excludeNodes(nodes: [3]) { countNodes } + shared: sharedNeighbours(selectedNodes: [1, 4]) { id } + } + } + """, + ) + graph = result["graph"] + assert graph["sub"]["countNodes"] == 2 + assert graph["exclude"]["countNodes"] == 3 # 1, 2, 4 (3 removed) + # `1` and `4` both connect to `2`, so 2 is the shared neighbour. + # Integer-indexed graph → `id` comes back as a number. + shared_ids = sorted(s["id"] for s in graph["shared"]) + assert shared_ids == [2] + + +def test_batch_addnodes_addedges_with_integer_ids(): + """`addNodes` and `addEdges` accept integer ids in `name`/`src`/`dst`.""" + work_dir = tempfile.mkdtemp() + with GraphServer(work_dir, create_index=True).start(PORT) as server: + client = server.get_client() + client.new_graph("g", "EVENT") + + client.query(""" + { + updateGraph(path: "g") { + addNodes(nodes: [ + { name: 1, updates: [{ time: 1, properties: [{key: "v", value: {i64: 10}}] }] } + { name: 42, updates: [{ time: 2, properties: [{key: "v", value: {i64: 20}}] }] } + ]) + addEdges(edges: [ + { src: 1, dst: 2, updates: [{ time: 3, properties: [{key: "w", value: {f64: 1.5}}] }] } + { src: 2, dst: 42, updates: [{ time: 4, properties: [{key: "w", value: {f64: 2.5}}] }] } + ]) + } + } + """) + + result = _query( + server, + """ + { + graph(path: "g") { + n1: node(name: 1) { properties { temporal { get(key: "v") { values } } } } + n2: node(name: 42) { properties { temporal { get(key: "v") { values } } } } + e1: edge(src: 1, dst: 2) { properties { temporal { get(key: "w") { values } } } } + e2: edge(src: 2, dst: 42) { properties { temporal { get(key: "w") { values } } } } + } + } + """, + ) + graph = result["graph"] + assert graph["n1"]["properties"]["temporal"]["get"]["values"] == [10] + assert graph["n2"]["properties"]["temporal"]["get"]["values"] == [20] + assert graph["e1"]["properties"]["temporal"]["get"]["values"] == [1.5] + assert graph["e2"]["properties"]["temporal"]["get"]["values"] == [2.5] + + +def test_view_transforms_with_string_node_ids(): + """`subgraph`, `excludeNodes`, and `sharedNeighbours` accept string node ids.""" + work_dir = tempfile.mkdtemp() + with GraphServer(work_dir, create_index=True).start(PORT) as server: + client = server.get_client() + client.new_graph("g", "EVENT") + + # alice → bob, alice → carol, dave → bob (alice and dave share bob). + client.query(""" + { + updateGraph(path: "g") { + e1: addEdge(time: 1, src: "alice", dst: "bob") { success } + e2: addEdge(time: 2, src: "alice", dst: "carol") { success } + e3: addEdge(time: 3, src: "dave", dst: "bob") { success } + } + } + """) + + result = _query( + server, + """ + { + graph(path: "g") { + sub: subgraph(nodes: ["alice", "bob"]) { countNodes } + exclude: excludeNodes(nodes: ["carol"]) { countNodes } + shared: sharedNeighbours(selectedNodes: ["alice", "dave"]) { id } + } + } + """, + ) + graph = result["graph"] + assert graph["sub"]["countNodes"] == 2 + assert graph["exclude"]["countNodes"] == 3 # alice, bob, dave + shared_ids = sorted(s["id"] for s in graph["shared"]) + assert shared_ids == ["bob"] + + +def test_batch_addnodes_addedges_with_string_ids(): + """`addNodes` and `addEdges` accept string ids in `name`/`src`/`dst`.""" + work_dir = tempfile.mkdtemp() + with GraphServer(work_dir, create_index=True).start(PORT) as server: + client = server.get_client() + client.new_graph("g", "EVENT") + + client.query(""" + { + updateGraph(path: "g") { + addNodes(nodes: [ + { name: "alice", updates: [{ time: 1, properties: [{key: "v", value: {i64: 10}}] }] } + { name: "bob", updates: [{ time: 2, properties: [{key: "v", value: {i64: 20}}] }] } + ]) + addEdges(edges: [ + { src: "alice", dst: "bob", updates: [{ time: 3, properties: [{key: "w", value: {f64: 1.5}}] }] } + { src: "bob", dst: "carol", updates: [{ time: 4, properties: [{key: "w", value: {f64: 2.5}}] }] } + ]) + } + } + """) + + result = _query( + server, + """ + { + graph(path: "g") { + n1: node(name: "alice") { properties { temporal { get(key: "v") { values } } } } + n2: node(name: "bob") { properties { temporal { get(key: "v") { values } } } } + e1: edge(src: "alice", dst: "bob") { properties { temporal { get(key: "w") { values } } } } + e2: edge(src: "bob", dst: "carol") { properties { temporal { get(key: "w") { values } } } } + } + } + """, + ) + graph = result["graph"] + assert graph["n1"]["properties"]["temporal"]["get"]["values"] == [10] + assert graph["n2"]["properties"]["temporal"]["get"]["values"] == [20] + assert graph["e1"]["properties"]["temporal"]["get"]["values"] == [1.5] + assert graph["e2"]["properties"]["temporal"]["get"]["values"] == [2.5] + + +def test_string_ids_remain_unchanged_for_existing_clients(): + """Existing clients passing string node ids continue to work without + modification — the schema change is wire-compatible for strings.""" + work_dir = tempfile.mkdtemp() + with GraphServer(work_dir, create_index=True).start(PORT) as server: + client = server.get_client() + client.new_graph("g", "EVENT") + + client.query(""" + { + updateGraph(path: "g") { + addNode(time: 1, name: "alice") { success } + addEdge(time: 2, src: "alice", dst: "bob") { success } + } + } + """) + + result = _query( + server, + """ + { + graph(path: "g") { + node(name: "alice") { earliestTime { timestamp } } + edge(src: "alice", dst: "bob") { earliestTime { timestamp } } + } + } + """, + ) + assert result["graph"]["node"]["earliestTime"]["timestamp"] == 1 + assert result["graph"]["edge"]["earliestTime"]["timestamp"] == 2 + + +def test_negative_integer_rejected(): + """Schema rejects negative integers — `NodeId` only accepts non-negative.""" + work_dir = tempfile.mkdtemp() + with GraphServer(work_dir, create_index=True).start(PORT) as server: + client = server.get_client() + client.new_graph("g", "EVENT") + + try: + client.query(""" + { + updateGraph(path: "g") { + addNode(time: 1, name: -1) { success } + } + } + """) + raise AssertionError( + "Expected schema-level rejection for negative integer NodeId" + ) + except Exception as e: + assert "NodeId" in str(e) or "non-negative" in str( + e + ), f"Expected NodeId rejection, got: {e}" diff --git a/python/tests/test_base_install/test_graphql/test_gql_same_timestamp_writes.py b/python/tests/test_base_install/test_graphql/test_gql_same_timestamp_writes.py new file mode 100644 index 0000000000..d576a83a43 --- /dev/null +++ b/python/tests/test_base_install/test_graphql/test_gql_same_timestamp_writes.py @@ -0,0 +1,411 @@ +"""Every mutation that takes a `TimeInput` must auto-allocate a fresh +`event_id` when the user passes only a timestamp (no `eventId`). Two writes +at the same millisecond should therefore produce two distinct history +entries, not be deduplicated. + +Before the `GqlTimeInput` refactor, every Number/String time input was +coerced to `EventTime { t, event_id: 0 }`, so two same-ms writes shared the +same composite key and the second silently overwrote the first. + +These tests pin down the fix on every mutation surface that takes a time. +Each test checks both the timestamps in the history and the values, so a +regression that drops a timestamp entry (not just a value) would still +fail. +""" + +import json +import tempfile + +from raphtory.graphql import GraphServer + +from utils import PORT + + +def _query(server, q: str) -> dict: + response = server.get_client().query(q) + return json.loads(response) if isinstance(response, str) else response + + +def _new_event_graph(server): + server.get_client().new_graph("g", "EVENT") + + +def _temporal_history(get_block: dict) -> tuple[list, list]: + """Return (timestamps, values) for a `temporal.get(key:)` GraphQL block + that selected `history { list { timestamp } } values`.""" + timestamps = [h["timestamp"] for h in get_block["history"]["list"]] + return timestamps, get_block["values"] + + +# ----- Top-level graph properties -------------------------------------------- + + +def test_add_properties_same_timestamp_appends(): + """`addProperties` three times at the same ms → three history entries + with the same timestamp and distinct values.""" + work_dir = tempfile.mkdtemp() + with GraphServer(work_dir).start(PORT) as server: + _new_event_graph(server) + server.get_client().query(""" + { + updateGraph(path: "g") { + a: addProperties(t: 100, properties: [{key: "x", value: {i64: 1}}]) + b: addProperties(t: 100, properties: [{key: "x", value: {i64: 2}}]) + c: addProperties(t: 100, properties: [{key: "x", value: {i64: 3}}]) + } + } + """) + result = _query( + server, + """ + { graph(path: "g") { properties { temporal { get(key: "x") { + history { list { timestamp } } + values + } } } } } + """, + ) + ts, values = _temporal_history(result["graph"]["properties"]["temporal"]["get"]) + assert ts == [100, 100, 100] + assert values == [1, 2, 3] + + +# ----- Single-call addNode / addEdge / createNode ---------------------------- + + +def test_add_node_same_timestamp_appends(): + """`addNode` twice at the same ms with different per-event properties → + both updates land. Verified on the node's `history` and on the + temporal property's `history`.""" + work_dir = tempfile.mkdtemp() + with GraphServer(work_dir).start(PORT) as server: + _new_event_graph(server) + server.get_client().query(""" + { + updateGraph(path: "g") { + a: addNode(time: 100, name: "n", properties: [{key: "v", value: {i64: 1}}]) { success } + b: addNode(time: 100, name: "n", properties: [{key: "v", value: {i64: 2}}]) { success } + } + } + """) + result = _query( + server, + """ + { graph(path: "g") { + node(name: "n") { + history { list { timestamp } } + properties { temporal { get(key: "v") { + history { list { timestamp } } + values + } } } + } + } } + """, + ) + node = result["graph"]["node"] + node_ts = [h["timestamp"] for h in node["history"]["list"]] + assert node_ts == [100, 100] + ts, values = _temporal_history(node["properties"]["temporal"]["get"]) + assert ts == [100, 100] + assert values == [1, 2] + + +def test_add_edge_same_timestamp_appends(): + """`addEdge` twice at the same ms → both edge updates land. Verified on + the edge's `history` and on the temporal property's `history`.""" + work_dir = tempfile.mkdtemp() + with GraphServer(work_dir).start(PORT) as server: + _new_event_graph(server) + server.get_client().query(""" + { + updateGraph(path: "g") { + a: addEdge(time: 100, src: "a", dst: "b", properties: [{key: "w", value: {i64: 1}}]) { success } + b: addEdge(time: 100, src: "a", dst: "b", properties: [{key: "w", value: {i64: 2}}]) { success } + } + } + """) + result = _query( + server, + """ + { graph(path: "g") { + edge(src: "a", dst: "b") { + history { list { timestamp } } + properties { temporal { get(key: "w") { + history { list { timestamp } } + values + } } } + } + } } + """, + ) + edge = result["graph"]["edge"] + edge_ts = [h["timestamp"] for h in edge["history"]["list"]] + assert edge_ts == [100, 100] + ts, values = _temporal_history(edge["properties"]["temporal"]["get"]) + assert ts == [100, 100] + assert values == [1, 2] + + +def test_create_node_then_add_node_same_timestamp_appends(): + """`createNode` followed by `addNode` at the same ms → both updates + land (createNode creates the node, addNode appends an update).""" + work_dir = tempfile.mkdtemp() + with GraphServer(work_dir).start(PORT) as server: + _new_event_graph(server) + server.get_client().query(""" + { + updateGraph(path: "g") { + a: createNode(time: 100, name: "n", properties: [{key: "v", value: {i64: 1}}]) { success } + b: addNode(time: 100, name: "n", properties: [{key: "v", value: {i64: 2}}]) { success } + } + } + """) + result = _query( + server, + """ + { graph(path: "g") { + node(name: "n") { + history { list { timestamp } } + properties { temporal { get(key: "v") { + history { list { timestamp } } + values + } } } + } + } } + """, + ) + node = result["graph"]["node"] + node_ts = [h["timestamp"] for h in node["history"]["list"]] + assert node_ts == [100, 100] + ts, values = _temporal_history(node["properties"]["temporal"]["get"]) + assert ts == [100, 100] + assert values == [1, 2] + + +# ----- MutableNode.addUpdates / MutableEdge.addUpdates ----------------------- + + +def test_mutable_node_add_updates_same_timestamp_appends(): + work_dir = tempfile.mkdtemp() + with GraphServer(work_dir).start(PORT) as server: + _new_event_graph(server) + server.get_client().query(""" + { updateGraph(path: "g") { addNode(time: 0, name: "n") { success } } } + """) + server.get_client().query(""" + { + updateGraph(path: "g") { + node(name: "n") { + a: addUpdates(time: 100, properties: [{key: "v", value: {i64: 1}}]) + b: addUpdates(time: 100, properties: [{key: "v", value: {i64: 2}}]) + } + } + } + """) + result = _query( + server, + """ + { graph(path: "g") { + node(name: "n") { + history { list { timestamp } } + properties { temporal { get(key: "v") { + history { list { timestamp } } + values + } } } + } + } } + """, + ) + node = result["graph"]["node"] + node_ts = [h["timestamp"] for h in node["history"]["list"]] + # t=0 from the seed addNode plus two t=100 updates. + assert node_ts == [0, 100, 100] + ts, values = _temporal_history(node["properties"]["temporal"]["get"]) + assert ts == [100, 100] + assert values == [1, 2] + + +def test_mutable_edge_add_updates_same_timestamp_appends(): + work_dir = tempfile.mkdtemp() + with GraphServer(work_dir).start(PORT) as server: + _new_event_graph(server) + server.get_client().query(""" + { updateGraph(path: "g") { addEdge(time: 0, src: "a", dst: "b") { success } } } + """) + server.get_client().query(""" + { + updateGraph(path: "g") { + edge(src: "a", dst: "b") { + x: addUpdates(time: 100, properties: [{key: "w", value: {i64: 1}}]) + y: addUpdates(time: 100, properties: [{key: "w", value: {i64: 2}}]) + } + } + } + """) + result = _query( + server, + """ + { graph(path: "g") { + edge(src: "a", dst: "b") { + history { list { timestamp } } + properties { temporal { get(key: "w") { + history { list { timestamp } } + values + } } } + } + } } + """, + ) + edge = result["graph"]["edge"] + edge_ts = [h["timestamp"] for h in edge["history"]["list"]] + # t=0 from the seed addEdge plus two t=100 updates. + assert edge_ts == [0, 100, 100] + ts, values = _temporal_history(edge["properties"]["temporal"]["get"]) + assert ts == [100, 100] + assert values == [1, 2] + + +# ----- Batch addNodes / addEdges -------------------------------------------- + + +def test_add_nodes_batch_same_timestamp_appends(): + """A single batch `addNodes` with three updates at the same ms on the + same node should produce three history entries.""" + work_dir = tempfile.mkdtemp() + with GraphServer(work_dir).start(PORT) as server: + _new_event_graph(server) + server.get_client().query(""" + { + updateGraph(path: "g") { + addNodes(nodes: [ + { name: "n", updates: [ + { time: 100, properties: [{key: "v", value: {i64: 1}}] } + { time: 100, properties: [{key: "v", value: {i64: 2}}] } + { time: 100, properties: [{key: "v", value: {i64: 3}}] } + ] } + ]) + } + } + """) + result = _query( + server, + """ + { graph(path: "g") { + node(name: "n") { + history { list { timestamp } } + properties { temporal { get(key: "v") { + history { list { timestamp } } + values + } } } + } + } } + """, + ) + node = result["graph"]["node"] + node_ts = [h["timestamp"] for h in node["history"]["list"]] + assert node_ts == [100, 100, 100] + ts, values = _temporal_history(node["properties"]["temporal"]["get"]) + assert ts == [100, 100, 100] + assert values == [1, 2, 3] + + +def test_add_edges_batch_same_timestamp_appends(): + work_dir = tempfile.mkdtemp() + with GraphServer(work_dir).start(PORT) as server: + _new_event_graph(server) + server.get_client().query(""" + { + updateGraph(path: "g") { + addEdges(edges: [ + { src: "a", dst: "b", updates: [ + { time: 100, properties: [{key: "w", value: {i64: 1}}] } + { time: 100, properties: [{key: "w", value: {i64: 2}}] } + { time: 100, properties: [{key: "w", value: {i64: 3}}] } + ] } + ]) + } + } + """) + result = _query( + server, + """ + { graph(path: "g") { + edge(src: "a", dst: "b") { + history { list { timestamp } } + properties { temporal { get(key: "w") { + history { list { timestamp } } + values + } } } + } + } } + """, + ) + edge = result["graph"]["edge"] + edge_ts = [h["timestamp"] for h in edge["history"]["list"]] + assert edge_ts == [100, 100, 100] + ts, values = _temporal_history(edge["properties"]["temporal"]["get"]) + assert ts == [100, 100, 100] + assert values == [1, 2, 3] + + +# ----- Persistent graph: deleteEdge ----------------------------------------- + + +def test_delete_edge_same_timestamp_appends(): + """Multiple `deleteEdge` calls at the same ms on a persistent graph all + land in the deletion history with the same timestamp.""" + work_dir = tempfile.mkdtemp() + with GraphServer(work_dir).start(PORT) as server: + client = server.get_client() + client.new_graph("g", "PERSISTENT") + client.query(""" + { + updateGraph(path: "g") { + a: addEdge(time: 1, src: "a", dst: "b") { success } + d1: deleteEdge(time: 100, src: "a", dst: "b") { success } + d2: deleteEdge(time: 100, src: "a", dst: "b") { success } + } + } + """) + result = _query( + server, + """ + { graph(path: "g") { + edge(src: "a", dst: "b") { deletions { list { timestamp } } } + } } + """, + ) + timestamps = [ + d["timestamp"] for d in result["graph"]["edge"]["deletions"]["list"] + ] + assert timestamps == [100, 100] + + +# ----- Object form pins event_id explicitly --------------------------------- + + +def test_object_time_input_distinct_event_ids_append(): + """Two writes at the same timestamp with distinct explicit event_ids + both land — the user-provided event_ids partition the events.""" + work_dir = tempfile.mkdtemp() + with GraphServer(work_dir).start(PORT) as server: + _new_event_graph(server) + server.get_client().query(""" + { + updateGraph(path: "g") { + a: addProperties(t: {timestamp: 100, eventId: 0}, properties: [{key: "x", value: {i64: 1}}]) + b: addProperties(t: {timestamp: 100, eventId: 1}, properties: [{key: "x", value: {i64: 2}}]) + } + } + """) + result = _query( + server, + """ + { graph(path: "g") { properties { temporal { get(key: "x") { + history { list { timestamp } } + values + } } } } } + """, + ) + ts, values = _temporal_history(result["graph"]["properties"]["temporal"]["get"]) + assert ts == [100, 100] + assert values == [1, 2] diff --git a/python/tests/test_base_install/test_graphql/test_gql_temporal_aggregates.py b/python/tests/test_base_install/test_graphql/test_gql_temporal_aggregates.py new file mode 100644 index 0000000000..628745e629 --- /dev/null +++ b/python/tests/test_base_install/test_graphql/test_gql_temporal_aggregates.py @@ -0,0 +1,597 @@ +import json +import tempfile + +from utils import PORT, run_group_graphql_test +from raphtory import Graph +from raphtory.graphql import GraphServer + + +def create_graph() -> Graph: + graph = Graph() + + # Bare node event so "A" exists at t=50 before any score updates + graph.add_node(50, "A") + # Node "A" with a numeric temporal property "score" at 4 timestamps + graph.add_node(100, "A", properties={"score": 10}) + graph.add_node(200, "A", properties={"score": 20}) + graph.add_node(300, "A", properties={"score": 30}) + graph.add_node(400, "A", properties={"score": 40}) + + # Edge "A -> B" with "weight" on two layers + graph.add_edge(100, "A", "B", properties={"weight": 1.0}, layer="layer1") + graph.add_edge(200, "A", "B", properties={"weight": 2.0}, layer="layer1") + graph.add_edge(300, "A", "B", properties={"weight": 3.0}, layer="layer2") + graph.add_edge(400, "A", "B", properties={"weight": 4.0}, layer="layer2") + + return graph + + +def test_node_temporal_aggregates(): + graph = create_graph() + queries_and_expected_outputs = [] + + # full timeline: score = [10, 20, 30, 40] + query = """ + { + graph(path: "g") { + node(name: "A") { + properties { + temporal { + get(key: "score") { + sum + mean + average + count + min { time { timestamp } value } + max { time { timestamp } value } + median { time { timestamp } value } + } + } + } + } + } + } + """ + expected_output = { + "graph": { + "node": { + "properties": { + "temporal": { + "get": { + "sum": 100, + "mean": 25.0, + "average": 25.0, + "count": 4, + "min": {"time": {"timestamp": 100}, "value": 10}, + "max": {"time": {"timestamp": 400}, "value": 40}, + # lower median on even-length input: sorted[(4-1)/2] = index 1 + "median": {"time": {"timestamp": 200}, "value": 20}, + } + } + } + } + } + } + queries_and_expected_outputs.append((query, expected_output)) + + # windowed [150, 350): score = [20, 30] + query = """ + { + graph(path: "g") { + window(start: 150, end: 350) { + node(name: "A") { + properties { + temporal { + get(key: "score") { + sum + mean + count + min { time { timestamp } value } + max { time { timestamp } value } + median { time { timestamp } value } + } + } + } + } + } + } + } + """ + expected_output = { + "graph": { + "window": { + "node": { + "properties": { + "temporal": { + "get": { + "sum": 50, + "mean": 25.0, + "count": 2, + "min": {"time": {"timestamp": 200}, "value": 20}, + "max": {"time": {"timestamp": 300}, "value": 30}, + # lower median of [20, 30] => index 0 + "median": {"time": {"timestamp": 200}, "value": 20}, + } + } + } + } + } + } + } + queries_and_expected_outputs.append((query, expected_output)) + + # window with no updates in range: score property doesn't exist => get is null + query = """ + { + graph(path: "g") { + window(start: 40, end: 60) { + node(name: "A") { + properties { + temporal { + get(key: "score") { + sum + mean + count + } + } + } + } + } + } + } + """ + expected_output = { + "graph": { + "window": { + "node": { + "properties": { + "temporal": {"get": {"sum": None, "mean": None, "count": 0}} + } + } + } + } + } + queries_and_expected_outputs.append((query, expected_output)) + + run_group_graphql_test(queries_and_expected_outputs, graph) + + +def create_non_numeric_graph() -> Graph: + graph = Graph() + # string-valued temporal property (insertion order != sorted order, so + # min/max/median land on interior timestamps) + graph.add_node(1, "A", properties={"name": "cherry"}) + graph.add_node(2, "A", properties={"name": "apple"}) + graph.add_node(3, "A", properties={"name": "banana"}) + graph.add_node(4, "A", properties={"name": "date"}) + # bool-valued temporal property, mixed so True/False appear multiple times + graph.add_node(1, "A", properties={"flag": True}) + graph.add_node(2, "A", properties={"flag": False}) + graph.add_node(3, "A", properties={"flag": True}) + graph.add_node(4, "A", properties={"flag": False}) + graph.add_node(5, "A", properties={"flag": True}) + return graph + + +def test_temporal_aggregates_on_non_numeric(): + """Pin down semantics for aggregates on non-numeric temporal properties. + + - Strings: `sum` concatenates (strings are additive), `min/max/median` work + lexicographically, `mean` is null (not f64-convertible). + - Bools: `sum`/`mean` are null (not additive, not f64-convertible), but + `min/max/median` work (False < True). + """ + graph = create_non_numeric_graph() + queries_and_expected_outputs = [] + + # strings + query = """ + { + graph(path: "g") { + node(name: "A") { + properties { + temporal { + get(key: "name") { + sum + mean + average + count + min { time { timestamp } value } + max { time { timestamp } value } + median { time { timestamp } value } + } + } + } + } + } + } + """ + # timeline: cherry(t=1), apple(t=2), banana(t=3), date(t=4) + # sorted lex: apple(t=2) < banana(t=3) < cherry(t=1) < date(t=4) + expected_output = { + "graph": { + "node": { + "properties": { + "temporal": { + "get": { + # concatenation in insertion order + "sum": "cherryapplebananadate", + "mean": None, + "average": None, + "count": 4, + "min": {"time": {"timestamp": 2}, "value": "apple"}, + "max": {"time": {"timestamp": 4}, "value": "date"}, + # lower median of len=4 is sorted[(4-1)/2] = index 1 + "median": {"time": {"timestamp": 3}, "value": "banana"}, + } + } + } + } + } + } + queries_and_expected_outputs.append((query, expected_output)) + + # bools + query = """ + { + graph(path: "g") { + node(name: "A") { + properties { + temporal { + get(key: "flag") { + sum + mean + average + count + min { time { timestamp } value } + max { time { timestamp } value } + median { time { timestamp } value } + } + } + } + } + } + } + """ + # timeline: True(t=1), False(t=2), True(t=3), False(t=4), True(t=5) + # min fold keeps first smaller value encountered => first False at t=2 + # max fold keeps first larger-or-equal value encountered => True at t=1 + # median: stable sort by value gives [False@2, False@4, True@1, True@3, True@5] + # lower median of len=5 => sorted[(5-1)/2] = index 2 = True at t=1 + expected_output = { + "graph": { + "node": { + "properties": { + "temporal": { + "get": { + "sum": None, + "mean": None, + "average": None, + "count": 5, + "min": {"time": {"timestamp": 2}, "value": False}, + "max": {"time": {"timestamp": 1}, "value": True}, + "median": {"time": {"timestamp": 1}, "value": True}, + } + } + } + } + } + } + queries_and_expected_outputs.append((query, expected_output)) + + run_group_graphql_test(queries_and_expected_outputs, graph) + + +def test_edge_temporal_aggregates_across_layers(): + graph = create_graph() + queries_and_expected_outputs = [] + + # full (both layers): weight = [1.0, 2.0, 3.0, 4.0] + query = """ + { + graph(path: "g") { + edge(src: "A", dst: "B") { + properties { + temporal { + get(key: "weight") { + sum + mean + count + min { time { timestamp } value } + max { time { timestamp } value } + median { time { timestamp } value } + } + } + } + } + } + } + """ + expected_output = { + "graph": { + "edge": { + "properties": { + "temporal": { + "get": { + "sum": 10.0, + "mean": 2.5, + "count": 4, + "min": {"time": {"timestamp": 100}, "value": 1.0}, + "max": {"time": {"timestamp": 400}, "value": 4.0}, + "median": {"time": {"timestamp": 200}, "value": 2.0}, + } + } + } + } + } + } + queries_and_expected_outputs.append((query, expected_output)) + + # layer1 only: weight = [1.0, 2.0] + query = """ + { + graph(path: "g") { + layer(name: "layer1") { + edge(src: "A", dst: "B") { + properties { + temporal { + get(key: "weight") { + sum + mean + count + min { time { timestamp } value } + max { time { timestamp } value } + } + } + } + } + } + } + } + """ + expected_output = { + "graph": { + "layer": { + "edge": { + "properties": { + "temporal": { + "get": { + "sum": 3.0, + "mean": 1.5, + "count": 2, + "min": {"time": {"timestamp": 100}, "value": 1.0}, + "max": {"time": {"timestamp": 200}, "value": 2.0}, + } + } + } + } + } + } + } + queries_and_expected_outputs.append((query, expected_output)) + + # layer2 only: weight = [3.0, 4.0] + query = """ + { + graph(path: "g") { + layer(name: "layer2") { + edge(src: "A", dst: "B") { + properties { + temporal { + get(key: "weight") { + sum + mean + count + min { time { timestamp } value } + max { time { timestamp } value } + } + } + } + } + } + } + } + """ + expected_output = { + "graph": { + "layer": { + "edge": { + "properties": { + "temporal": { + "get": { + "sum": 7.0, + "mean": 3.5, + "count": 2, + "min": {"time": {"timestamp": 300}, "value": 3.0}, + "max": {"time": {"timestamp": 400}, "value": 4.0}, + } + } + } + } + } + } + } + queries_and_expected_outputs.append((query, expected_output)) + + # combined: windowed + layer-filtered (layer1, window [150, 400)) + # => weight = [2.0] + query = """ + { + graph(path: "g") { + layer(name: "layer1") { + window(start: 150, end: 400) { + edge(src: "A", dst: "B") { + properties { + temporal { + get(key: "weight") { + sum + mean + count + min { time { timestamp } value } + max { time { timestamp } value } + median { time { timestamp } value } + } + } + } + } + } + } + } + } + """ + expected_output = { + "graph": { + "layer": { + "window": { + "edge": { + "properties": { + "temporal": { + "get": { + "sum": 2.0, + "mean": 2.0, + "count": 1, + "min": {"time": {"timestamp": 200}, "value": 2.0}, + "max": {"time": {"timestamp": 200}, "value": 2.0}, + "median": { + "time": {"timestamp": 200}, + "value": 2.0, + }, + } + } + } + } + } + } + } + } + queries_and_expected_outputs.append((query, expected_output)) + + run_group_graphql_test(queries_and_expected_outputs, graph) + + +def _run_typed_accessors_cases(graph, cases): + """Run queries against a fresh server. + + `cases` is a list of `(query, expected, transform)` where `transform` is + applied to both the response and the expected value before comparison + (needed for fields like `unique` whose ordering is non-deterministic). + """ + tmp_work_dir = tempfile.mkdtemp() + with GraphServer(tmp_work_dir, create_index=True).start(PORT) as server: + client = server.get_client() + client.send_graph(path="g", graph=graph) + for query, expected, transform in cases: + response = client.query(query) + response_dict = ( + json.loads(response) if isinstance(response, str) else response + ) + actual = transform(response_dict) if transform else response_dict + expected_t = transform(expected) if transform else expected + assert actual == expected_t, f"Expected:\n{expected_t}\nGot:\n{actual}" + + +def _sort_unique(path): + """Returns a transform that sorts the `unique` list at the given dict path.""" + + def transform(d): + d = json.loads(json.dumps(d)) # deep copy + cur = d + for step in path: + cur = cur[step] + cur["unique"] = sorted(cur["unique"]) + return d + + return transform + + +def test_temporal_property_typed_accessors(): + """`values`, `latest`, `at`, and `unique` on `TemporalProperty` return + properly typed values (numbers stay numbers, bools stay bools, etc.).""" + graph = create_graph() # node "A" with score [10, 20, 30, 40] at t=100..400 + # bool property on the same node so we exercise non-numeric typing too + graph.add_node(100, "A", properties={"flag": True}) + graph.add_node(200, "A", properties={"flag": False}) + graph.add_node(300, "A", properties={"flag": True}) + + cases = [] + path = ["graph", "node", "properties", "temporal", "get"] + + # numeric values stay numeric; `at(t)` returns the latest value at-or-before t + query = """ + { + graph(path: "g") { + node(name: "A") { + properties { + temporal { + get(key: "score") { + values + latest + atEarly: at(t: 50) + atMid: at(t: 250) + atExact: at(t: 200) + atLate: at(t: 1000) + unique + } + } + } + } + } + } + """ + expected = { + "graph": { + "node": { + "properties": { + "temporal": { + "get": { + "values": [10, 20, 30, 40], + "latest": 40, + "atEarly": None, + "atMid": 20, + "atExact": 20, + "atLate": 40, + "unique": [10, 20, 30, 40], + } + } + } + } + } + } + cases.append((query, expected, _sort_unique(path))) + + # bools stay bools through values/latest/unique + query = """ + { + graph(path: "g") { + node(name: "A") { + properties { + temporal { + get(key: "flag") { + values + latest + unique + } + } + } + } + } + } + """ + expected = { + "graph": { + "node": { + "properties": { + "temporal": { + "get": { + "values": [True, False, True], + "latest": True, + "unique": [False, True], + } + } + } + } + } + } + cases.append((query, expected, _sort_unique(path))) + + _run_typed_accessors_cases(graph, cases) diff --git a/python/tests/test_base_install/test_graphql/test_graph_file_time_stats.py b/python/tests/test_base_install/test_graphql/test_graph_file_time_stats.py index 6157f48258..63e6de6dba 100644 --- a/python/tests/test_base_install/test_graphql/test_graph_file_time_stats.py +++ b/python/tests/test_base_install/test_graphql/test_graph_file_time_stats.py @@ -26,11 +26,13 @@ def test_graph_file_time_stats(): gql_last_opened_time = result["graph"]["lastOpened"] gql_last_updated_time = result["graph"]["lastUpdated"] - graph_file_path = os.path.join(graph_file_path, "graph") - file_stats = os.stat(graph_file_path) - created_time_fs = file_stats.st_ctime * 1000 - last_opened_time_fs = file_stats.st_atime * 1000 - last_updated_time_fs = file_stats.st_mtime * 1000 + raph_file_path = os.path.join(graph_file_path, ".raph") + raph_file_stats = os.stat(raph_file_path) + meta_file_path = os.path.join(graph_file_path, "data0", ".meta") + meta_file_stats = os.stat(meta_file_path) + created_time_fs = raph_file_stats.st_ctime * 1000 + last_opened_time_fs = meta_file_stats.st_atime * 1000 + last_updated_time_fs = meta_file_stats.st_mtime * 1000 assert ( abs(gql_created_time - created_time_fs) < 1000 diff --git a/python/tests/test_base_install/test_graphql/test_node_sorting.py b/python/tests/test_base_install/test_graphql/test_node_sorting.py index 9a091dd17c..52ebc6f07b 100644 --- a/python/tests/test_base_install/test_graphql/test_node_sorting.py +++ b/python/tests/test_base_install/test_graphql/test_node_sorting.py @@ -88,7 +88,7 @@ def test_graph_node_sort_by_nothing(graph): } } } - run_graphql_test(query, expected_output, graph) + run_graphql_test(query, expected_output, graph, sort_output=True) @pytest.mark.parametrize("graph", [EVENT_GRAPH, PERSISTENT_GRAPH]) @@ -245,12 +245,16 @@ def test_graph_nodes_sort_by_prop2(graph): @pytest.mark.parametrize("graph", [EVENT_GRAPH, PERSISTENT_GRAPH]) def test_graph_nodes_sort_by_prop3(graph): query = """ - { + query { graph(path: "g") { nodes { - sorted(sortBys: [{property: "prop3"}]) { + sorted(sortBys: [{ property: "prop3" }]) { list { - id + properties { + get(key: "prop3") { + value + } + } } } } @@ -260,7 +264,14 @@ def test_graph_nodes_sort_by_prop3(graph): expected_output = { "graph": { "nodes": { - "sorted": {"list": [{"id": "c"}, {"id": "a"}, {"id": "b"}, {"id": "d"}]} + "sorted": { + "list": [ + {"properties": {"get": {"value": "ayz123"}}}, + {"properties": {"get": {"value": "xyz123"}}}, + {"properties": {"get": {"value": "xyz123"}}}, + {"properties": {"get": {"value": "xyz123"}}}, + ] + } } } } diff --git a/python/tests/test_base_install/test_graphql/test_rolling_expanding.py b/python/tests/test_base_install/test_graphql/test_rolling_expanding.py index 62a9b82a2c..1ae7a29005 100644 --- a/python/tests/test_base_install/test_graphql/test_rolling_expanding.py +++ b/python/tests/test_base_install/test_graphql/test_rolling_expanding.py @@ -627,7 +627,7 @@ def test_nodes(): { "page": [ { - "id": "1", + "id": 1, "degree": 1, "start": {"timestamp": 1}, "end": {"timestamp": 2}, @@ -638,7 +638,7 @@ def test_nodes(): { "page": [ { - "id": "1", + "id": 1, "degree": 2, "start": {"timestamp": 2}, "end": {"timestamp": 3}, @@ -649,7 +649,7 @@ def test_nodes(): { "page": [ { - "id": "1", + "id": 1, "degree": 2, "start": {"timestamp": 3}, "end": {"timestamp": 4}, @@ -660,7 +660,7 @@ def test_nodes(): { "page": [ { - "id": "1", + "id": 1, "degree": 1, "start": {"timestamp": 4}, "end": {"timestamp": 5}, @@ -671,7 +671,7 @@ def test_nodes(): { "page": [ { - "id": "1", + "id": 1, "degree": 0, "start": {"timestamp": 5}, "end": {"timestamp": 6}, @@ -685,7 +685,7 @@ def test_nodes(): { "page": [ { - "id": "1", + "id": 1, "degree": 1, "start": {"timestamp": 4}, "end": {"timestamp": 5}, @@ -696,7 +696,7 @@ def test_nodes(): { "page": [ { - "id": "1", + "id": 1, "degree": 0, "start": {"timestamp": 5}, "end": {"timestamp": 6}, @@ -712,7 +712,7 @@ def test_nodes(): { "page": [ { - "id": "1", + "id": 1, "degree": 2, "start": {"timestamp": 2}, "end": {"timestamp": 3}, @@ -724,7 +724,7 @@ def test_nodes(): { "page": [ { - "id": "1", + "id": 1, "degree": 2, "start": {"timestamp": 2}, "end": {"timestamp": 4}, @@ -736,7 +736,7 @@ def test_nodes(): { "page": [ { - "id": "1", + "id": 1, "degree": 2, "start": {"timestamp": 2}, "end": {"timestamp": 5}, @@ -748,7 +748,7 @@ def test_nodes(): { "page": [ { - "id": "1", + "id": 1, "degree": 2, "start": {"timestamp": 2}, "end": {"timestamp": 6}, @@ -763,7 +763,7 @@ def test_nodes(): { "page": [ { - "id": "1", + "id": 1, "degree": 2, "start": {"timestamp": 2}, "end": {"timestamp": 5}, @@ -775,7 +775,7 @@ def test_nodes(): { "page": [ { - "id": "1", + "id": 1, "degree": 2, "start": {"timestamp": 2}, "end": {"timestamp": 6}, @@ -890,7 +890,7 @@ def test_path(): { "page": [ { - "id": "2", + "id": 2, "degree": 1, "start": {"timestamp": 1}, "end": {"timestamp": 2}, @@ -901,7 +901,7 @@ def test_path(): { "page": [ { - "id": "2", + "id": 2, "degree": 1, "start": {"timestamp": 2}, "end": {"timestamp": 3}, @@ -912,7 +912,7 @@ def test_path(): { "page": [ { - "id": "2", + "id": 2, "degree": 1, "start": {"timestamp": 3}, "end": {"timestamp": 4}, @@ -923,7 +923,7 @@ def test_path(): { "page": [ { - "id": "2", + "id": 2, "degree": 0, "start": {"timestamp": 4}, "end": {"timestamp": 5}, @@ -934,7 +934,7 @@ def test_path(): { "page": [ { - "id": "2", + "id": 2, "degree": 0, "start": {"timestamp": 5}, "end": {"timestamp": 6}, @@ -948,7 +948,7 @@ def test_path(): { "page": [ { - "id": "2", + "id": 2, "degree": 0, "start": {"timestamp": 4}, "end": {"timestamp": 5}, @@ -959,7 +959,7 @@ def test_path(): { "page": [ { - "id": "2", + "id": 2, "degree": 0, "start": {"timestamp": 5}, "end": {"timestamp": 6}, @@ -975,7 +975,7 @@ def test_path(): { "page": [ { - "id": "2", + "id": 2, "degree": 1, "start": {"timestamp": 2}, "end": {"timestamp": 3}, @@ -987,7 +987,7 @@ def test_path(): { "page": [ { - "id": "2", + "id": 2, "degree": 1, "start": {"timestamp": 2}, "end": {"timestamp": 4}, @@ -999,7 +999,7 @@ def test_path(): { "page": [ { - "id": "2", + "id": 2, "degree": 1, "start": {"timestamp": 2}, "end": {"timestamp": 5}, @@ -1011,7 +1011,7 @@ def test_path(): { "page": [ { - "id": "2", + "id": 2, "degree": 1, "start": {"timestamp": 2}, "end": {"timestamp": 6}, @@ -1026,7 +1026,7 @@ def test_path(): { "page": [ { - "id": "2", + "id": 2, "degree": 1, "start": {"timestamp": 2}, "end": {"timestamp": 5}, @@ -1038,7 +1038,7 @@ def test_path(): { "page": [ { - "id": "2", + "id": 2, "degree": 1, "start": {"timestamp": 2}, "end": {"timestamp": 6}, @@ -1340,7 +1340,7 @@ def test_edges(): { "page": [ { - "id": ["1", "2"], + "id": [1, 2], "start": {"timestamp": 1}, "end": {"timestamp": 2}, "earliestTime": {"timestamp": 1}, @@ -1350,7 +1350,7 @@ def test_edges(): { "page": [ { - "id": ["1", "2"], + "id": [1, 2], "start": {"timestamp": 2}, "end": {"timestamp": 3}, "earliestTime": {"timestamp": 2}, @@ -1360,7 +1360,7 @@ def test_edges(): { "page": [ { - "id": ["1", "2"], + "id": [1, 2], "start": {"timestamp": 3}, "end": {"timestamp": 4}, "earliestTime": {"timestamp": 3}, @@ -1370,7 +1370,7 @@ def test_edges(): { "page": [ { - "id": ["1", "2"], + "id": [1, 2], "start": {"timestamp": 4}, "end": {"timestamp": 5}, "earliestTime": {"timestamp": None}, @@ -1380,7 +1380,7 @@ def test_edges(): { "page": [ { - "id": ["1", "2"], + "id": [1, 2], "start": {"timestamp": 5}, "end": {"timestamp": 6}, "earliestTime": {"timestamp": None}, @@ -1393,7 +1393,7 @@ def test_edges(): { "page": [ { - "id": ["1", "2"], + "id": [1, 2], "start": {"timestamp": 4}, "end": {"timestamp": 5}, "earliestTime": {"timestamp": None}, @@ -1403,7 +1403,7 @@ def test_edges(): { "page": [ { - "id": ["1", "2"], + "id": [1, 2], "start": {"timestamp": 5}, "end": {"timestamp": 6}, "earliestTime": {"timestamp": None}, @@ -1418,7 +1418,7 @@ def test_edges(): { "page": [ { - "id": ["1", "2"], + "id": [1, 2], "start": {"timestamp": 2}, "end": {"timestamp": 3}, "earliestTime": {"timestamp": 2}, @@ -1429,7 +1429,7 @@ def test_edges(): { "page": [ { - "id": ["1", "2"], + "id": [1, 2], "start": {"timestamp": 2}, "end": {"timestamp": 4}, "earliestTime": {"timestamp": 2}, @@ -1440,7 +1440,7 @@ def test_edges(): { "page": [ { - "id": ["1", "2"], + "id": [1, 2], "start": {"timestamp": 2}, "end": {"timestamp": 5}, "earliestTime": {"timestamp": 2}, @@ -1451,7 +1451,7 @@ def test_edges(): { "page": [ { - "id": ["1", "2"], + "id": [1, 2], "start": {"timestamp": 2}, "end": {"timestamp": 6}, "earliestTime": {"timestamp": 2}, @@ -1465,7 +1465,7 @@ def test_edges(): { "page": [ { - "id": ["1", "2"], + "id": [1, 2], "start": {"timestamp": 2}, "end": {"timestamp": 5}, "earliestTime": {"timestamp": 2}, @@ -1476,7 +1476,7 @@ def test_edges(): { "page": [ { - "id": ["1", "2"], + "id": [1, 2], "start": {"timestamp": 2}, "end": {"timestamp": 6}, "earliestTime": {"timestamp": 2}, diff --git a/python/tests/test_base_install/test_graphql/test_server_flags.py b/python/tests/test_base_install/test_graphql/test_server_flags.py new file mode 100644 index 0000000000..1c80de56f1 --- /dev/null +++ b/python/tests/test_base_install/test_graphql/test_server_flags.py @@ -0,0 +1,381 @@ +import json +import tempfile +import urllib.error +import urllib.request + +import pytest +from raphtory import Graph +from raphtory.graphql import GraphServer, RaphtoryClient + +SERVER_URL = "http://localhost:1736" + + +def batch_query(body): + """POST a raw JSON body (needed for batch requests — the client only sends single queries).""" + data = json.dumps(body).encode("utf-8") + req = urllib.request.Request( + SERVER_URL + "/", + data=data, + headers={"Content-Type": "application/json"}, + method="POST", + ) + try: + with urllib.request.urlopen(req) as resp: + return resp.status, json.loads(resp.read()) + except urllib.error.HTTPError as e: + raw = e.read() + try: + return e.code, json.loads(raw) + except ValueError: + return e.code, raw.decode("utf-8", errors="replace") + + +def make_graph(client, path="g"): + g = Graph() + g.add_edge(1, "ben", "hamza") + g.add_edge(2, "lucas", "hamza") + g.add_edge(3, "ben", "lucas") + client.send_graph(path, g, overwrite=True) + + +def test_introspection_enabled_by_default(): + work_dir = tempfile.mkdtemp() + with GraphServer(work_dir).start(): + client = RaphtoryClient(SERVER_URL) + result = client.query("{ __schema { queryType { name } } }") + assert result["__schema"]["queryType"]["name"] + + +def test_disable_introspection(): + work_dir = tempfile.mkdtemp() + with GraphServer(work_dir, disable_introspection=True).start(): + client = RaphtoryClient(SERVER_URL) + client.query("{ version }") + + with pytest.raises(Exception) as excinfo: + client.query("{ __schema { queryType { name } } }") + msg = str(excinfo.value) + assert "Unknown field" in msg and "__schema" in msg + + +def test_max_query_depth(): + work_dir = tempfile.mkdtemp() + with GraphServer(work_dir, max_query_depth=3).start(): + client = RaphtoryClient(SERVER_URL) + make_graph(client) + + client.query('{ graph(path: "g") { created } }') + + with pytest.raises(Exception) as excinfo: + client.query( + '{ graph(path: "g") { nodes { page(limit: 5) { edges { page(limit: 5) { src { name } } } } } } }' + ) + assert "Query is nested too deep." in str(excinfo.value) + + +def test_max_query_complexity(): + work_dir = tempfile.mkdtemp() + with GraphServer(work_dir, max_query_complexity=3).start(): + client = RaphtoryClient(SERVER_URL) + make_graph(client) + + client.query("{ version }") + + with pytest.raises(Exception) as excinfo: + client.query( + '{ graph(path: "g") { nodes { page(limit: 5) { name id earliestTime latestTime } } } }' + ) + assert "Query is too complex." in str(excinfo.value) + + +# (field path, query) pairs covering every list-returning resolver: +# GqlCollection, GqlNodes, GqlEdges, GqlPathFromNode, GqlHistory, GqlHistoryTimestamp, +# GqlHistoryDateTime, GqlHistoryEventId, GqlIntervals, and the six WindowSet types. +LIST_QUERIES = [ + ("collection (namespaces)", "{ namespaces { list { path } } }"), + ("GqlNodes", '{ graph(path: "g") { nodes { list { name } } } }'), + ("GqlNodes.ids", '{ graph(path: "g") { nodes { ids } } }'), + ("GqlEdges", '{ graph(path: "g") { edges { list { src { name } } } } }'), + ( + "GqlPathFromNode", + '{ graph(path: "g") { node(name: "ben") { neighbours { list { name } } } } }', + ), + ( + "GqlPathFromNode.ids", + '{ graph(path: "g") { node(name: "ben") { neighbours { ids } } } }', + ), + ( + "GqlHistory", + '{ graph(path: "g") { node(name: "ben") { history { list { timestamp } } } } }', + ), + ( + "GqlHistoryTimestamp", + '{ graph(path: "g") { node(name: "ben") { history { timestamps { list } } } } }', + ), + ( + "GqlHistoryDateTime", + '{ graph(path: "g") { node(name: "ben") { history { datetimes { list } } } } }', + ), + ( + "GqlHistoryEventId", + '{ graph(path: "g") { node(name: "ben") { history { eventId { list } } } } }', + ), + ( + "GqlIntervals", + '{ graph(path: "g") { node(name: "ben") { history { intervals { list } } } } }', + ), + ( + "GqlHistory.listRev", + '{ graph(path: "g") { node(name: "ben") { history { listRev { timestamp } } } } }', + ), + ( + "GqlHistoryTimestamp.listRev", + '{ graph(path: "g") { node(name: "ben") { history { timestamps { listRev } } } } }', + ), + ( + "GqlHistoryDateTime.listRev", + '{ graph(path: "g") { node(name: "ben") { history { datetimes { listRev } } } } }', + ), + ( + "GqlHistoryEventId.listRev", + '{ graph(path: "g") { node(name: "ben") { history { eventId { listRev } } } } }', + ), + ( + "GqlIntervals.listRev", + '{ graph(path: "g") { node(name: "ben") { history { intervals { listRev } } } } }', + ), + ( + "GqlGraphWindowSet", + '{ graph(path: "g") { rolling(window: {epoch: 1}) { list { earliestTime { timestamp } } } } }', + ), + ( + "GqlNodeWindowSet", + '{ graph(path: "g") { node(name: "ben") { rolling(window: {epoch: 1}) { list { name } } } } }', + ), + ( + "GqlNodesWindowSet", + '{ graph(path: "g") { nodes { rolling(window: {epoch: 1}) { list { count } } } } }', + ), + ( + "GqlPathFromNodeWindowSet", + '{ graph(path: "g") { node(name: "ben") { neighbours { rolling(window: {epoch: 1}) { list { count } } } } } }', + ), + ( + "GqlEdgeWindowSet", + '{ graph(path: "g") { edge(src: "ben", dst: "hamza") { rolling(window: {epoch: 1}) { list { src { name } } } } } }', + ), + ( + "GqlEdgesWindowSet", + '{ graph(path: "g") { edges { rolling(window: {epoch: 1}) { list { count } } } } }', + ), +] + +# Same resolvers reached via `page(limit: 50)` — chosen so we can compare against a small +# `max_page_size` limit and trigger the exceeded-size error. +PAGE_QUERIES = [ + ("collection (namespaces)", "{ namespaces { page(limit: 50) { path } } }"), + ("GqlNodes", '{ graph(path: "g") { nodes { page(limit: 50) { name } } } }'), + ( + "GqlEdges", + '{ graph(path: "g") { edges { page(limit: 50) { src { name } } } } }', + ), + ( + "GqlPathFromNode", + '{ graph(path: "g") { node(name: "ben") { neighbours { page(limit: 50) { name } } } } }', + ), + ( + "GqlHistory", + '{ graph(path: "g") { node(name: "ben") { history { page(limit: 50) { timestamp } } } } }', + ), + ( + "GqlHistoryTimestamp", + '{ graph(path: "g") { node(name: "ben") { history { timestamps { page(limit: 50) } } } } }', + ), + ( + "GqlHistoryDateTime", + '{ graph(path: "g") { node(name: "ben") { history { datetimes { page(limit: 50) } } } } }', + ), + ( + "GqlHistoryEventId", + '{ graph(path: "g") { node(name: "ben") { history { eventId { page(limit: 50) } } } } }', + ), + ( + "GqlIntervals", + '{ graph(path: "g") { node(name: "ben") { history { intervals { page(limit: 50) } } } } }', + ), + ( + "GqlHistory.pageRev", + '{ graph(path: "g") { node(name: "ben") { history { pageRev(limit: 50) { timestamp } } } } }', + ), + ( + "GqlHistoryTimestamp.pageRev", + '{ graph(path: "g") { node(name: "ben") { history { timestamps { pageRev(limit: 50) } } } } }', + ), + ( + "GqlHistoryDateTime.pageRev", + '{ graph(path: "g") { node(name: "ben") { history { datetimes { pageRev(limit: 50) } } } } }', + ), + ( + "GqlHistoryEventId.pageRev", + '{ graph(path: "g") { node(name: "ben") { history { eventId { pageRev(limit: 50) } } } } }', + ), + ( + "GqlIntervals.pageRev", + '{ graph(path: "g") { node(name: "ben") { history { intervals { pageRev(limit: 50) } } } } }', + ), + ( + "GqlGraphWindowSet", + '{ graph(path: "g") { rolling(window: {epoch: 1}) { page(limit: 50) { earliestTime { timestamp } } } } }', + ), + ( + "GqlNodeWindowSet", + '{ graph(path: "g") { node(name: "ben") { rolling(window: {epoch: 1}) { page(limit: 50) { name } } } } }', + ), + ( + "GqlNodesWindowSet", + '{ graph(path: "g") { nodes { rolling(window: {epoch: 1}) { page(limit: 50) { count } } } } }', + ), + ( + "GqlPathFromNodeWindowSet", + '{ graph(path: "g") { node(name: "ben") { neighbours { rolling(window: {epoch: 1}) { page(limit: 50) { count } } } } } }', + ), + ( + "GqlEdgeWindowSet", + '{ graph(path: "g") { edge(src: "ben", dst: "hamza") { rolling(window: {epoch: 1}) { page(limit: 50) { src { name } } } } } }', + ), + ( + "GqlEdgesWindowSet", + '{ graph(path: "g") { edges { rolling(window: {epoch: 1}) { page(limit: 50) { count } } } } }', + ), +] + + +def test_disable_lists_all_resolvers(): + """Every `list` endpoint across every paginated type rejects with the same error.""" + work_dir = tempfile.mkdtemp() + with GraphServer(work_dir, disable_lists=True).start(): + client = RaphtoryClient(SERVER_URL) + make_graph(client) + + for name, query in LIST_QUERIES: + with pytest.raises(Exception) as excinfo: + client.query(query) + assert ( + "Bulk list endpoints are disabled on this server. Use `page` instead." + in str(excinfo.value) + ), f"{name} did not reject with the expected error: {excinfo.value}" + + +def test_disable_lists_page_still_works(): + """Even with `disable_lists=True`, `page` queries still succeed.""" + work_dir = tempfile.mkdtemp() + with GraphServer(work_dir, disable_lists=True).start(): + client = RaphtoryClient(SERVER_URL) + make_graph(client) + result = client.query( + '{ graph(path: "g") { nodes { page(limit: 10) { name } } } }' + ) + assert len(result["graph"]["nodes"]["page"]) == 3 + + +def test_max_page_size_all_resolvers(): + """Every `page` endpoint across every paginated type enforces max_page_size.""" + work_dir = tempfile.mkdtemp() + with GraphServer(work_dir, max_page_size=2).start(): + client = RaphtoryClient(SERVER_URL) + make_graph(client) + + for name, query in PAGE_QUERIES: + with pytest.raises(Exception) as excinfo: + client.query(query) + assert "page limit 50 exceeds the maximum allowed page size 2" in str( + excinfo.value + ), f"{name} did not reject with the expected error: {excinfo.value}" + + +def test_max_page_size_under_cap_works(): + """Pages at or below max_page_size still succeed.""" + work_dir = tempfile.mkdtemp() + with GraphServer(work_dir, max_page_size=2).start(): + client = RaphtoryClient(SERVER_URL) + make_graph(client) + result = client.query( + '{ graph(path: "g") { nodes { page(limit: 2) { name } } } }' + ) + assert len(result["graph"]["nodes"]["page"]) == 2 + + +def test_disable_batching(): + work_dir = tempfile.mkdtemp() + with GraphServer(work_dir, disable_batching=True).start(): + RaphtoryClient(SERVER_URL).query("{ version }") + + status, body = batch_query([{"query": "{ version }"}, {"query": "{ version }"}]) + assert status == 400 + assert "Query batching is disabled on this server" in str(body) + + +def test_max_batch_size(): + work_dir = tempfile.mkdtemp() + with GraphServer(work_dir, max_batch_size=2).start(): + status, body = batch_query([{"query": "{ version }"}] * 2) + assert status == 200 + assert isinstance(body, list) and len(body) == 2 + + status, body = batch_query([{"query": "{ version }"}] * 3) + assert status == 400 + assert "Batch size 3 exceeds the maximum allowed 2" in str(body) + + +def test_max_recursive_depth(): + work_dir = tempfile.mkdtemp() + with GraphServer(work_dir, max_recursive_depth=2).start(): + client = RaphtoryClient(SERVER_URL) + make_graph(client) + + # depth 2: { graph { created } } — root selection set is depth 0, graph{...} pushes to 1 + client.query('{ graph(path: "g") { created } }') + + with pytest.raises(Exception) as excinfo: + client.query('{ graph(path: "g") { nodes { page(limit: 1) { name } } } }') + assert "recursion depth of the query cannot be greater than `2`" in str( + excinfo.value + ) + + +def test_max_directives_per_field(): + work_dir = tempfile.mkdtemp() + with GraphServer(work_dir, max_directives_per_field=1).start(): + client = RaphtoryClient(SERVER_URL) + + # 1 directive — allowed + client.query("{ version @skip(if: false) }") + + # 2 directives on one field — rejected + with pytest.raises(Exception) as excinfo: + client.query("{ version @skip(if: false) @include(if: true) }") + assert ( + "number of directives on the field `version` cannot be greater than `1`" + in str(excinfo.value) + ) + + +# heavy_query_limit and exclusive_writes are concurrency knobs. Their effects only show +# up under parallel load (semaphore-parked queries, write/read serialization), and +# timing-based tests are flaky in CI. This smoke test at least verifies the flags are +# accepted and normal queries still pass through. +def test_concurrency_flags_smoke(): + work_dir = tempfile.mkdtemp() + with GraphServer( + work_dir, + heavy_query_limit=4, + exclusive_writes=True, + ).start(): + client = RaphtoryClient(SERVER_URL) + make_graph(client) + # Read path: works under exclusive_writes's read lock. + assert client.query('{ graph(path: "g") { nodes { count } } }') + # Heavy traversal: goes through the semaphore. + assert client.query( + '{ graph(path: "g") { nodes { page(limit: 10) { neighbours { page(limit: 10) { name } } } } } }' + ) diff --git a/python/tests/test_base_install/test_graphql/update_graph/test_batch_updates.py b/python/tests/test_base_install/test_graphql/update_graph/test_batch_updates.py index bc7e9b5ec4..35988b24a3 100644 --- a/python/tests/test_base_install/test_graphql/update_graph/test_batch_updates.py +++ b/python/tests/test_base_install/test_graphql/update_graph/test_batch_updates.py @@ -1,9 +1,8 @@ import tempfile from datetime import datetime, timezone from typing import List -from dateutil import parser from numpy.testing import assert_equal as check_arr -from utils import assert_set_eq, assert_has_metadata +from utils import assert_set_eq, assert_has_metadata, truncate_dt_to_ms from raphtory.graphql import ( GraphServer, RaphtoryClient, @@ -15,8 +14,8 @@ def make_props(): - current_datetime = datetime.now(timezone.utc) - naive_datetime = datetime.now() + current_datetime = truncate_dt_to_ms(datetime.now(timezone.utc)) + naive_datetime = truncate_dt_to_ms(datetime.now()) return { "prop_string": "blah", "prop_float": 2.0, @@ -41,8 +40,8 @@ def make_props(): def make_props2(): - current_datetime = datetime.now(timezone.utc) - naive_datetime = datetime.now() + current_datetime = truncate_dt_to_ms(datetime.now(timezone.utc)) + naive_datetime = truncate_dt_to_ms(datetime.now()) return { "prop_string": "blah2", "prop_float": 3.0, diff --git a/python/tests/test_base_install/test_graphql/update_graph/test_edge_updates.py b/python/tests/test_base_install/test_graphql/update_graph/test_edge_updates.py index 731e2a6c60..f7c0181542 100644 --- a/python/tests/test_base_install/test_graphql/update_graph/test_edge_updates.py +++ b/python/tests/test_base_install/test_graphql/update_graph/test_edge_updates.py @@ -1,14 +1,14 @@ import tempfile from datetime import datetime, timezone import pytest -from utils import assert_has_metadata, assert_has_properties +from utils import assert_has_metadata, assert_has_properties, truncate_dt_to_ms from raphtory.graphql import GraphServer, RaphtoryClient from numpy.testing import assert_equal as check_arr def make_props(): - current_datetime = datetime.now(timezone.utc) - naive_datetime = datetime.now() + current_datetime = truncate_dt_to_ms(datetime.now(timezone.utc)) + naive_datetime = truncate_dt_to_ms(datetime.now()) return { "prop_string": "blah", "prop_float": 2.0, @@ -33,8 +33,8 @@ def make_props(): def make_props2(): - current_datetime = datetime.now(timezone.utc) - naive_datetime = datetime.now() + current_datetime = truncate_dt_to_ms(datetime.now(timezone.utc)) + naive_datetime = truncate_dt_to_ms(datetime.now()) return { "prop_string": "blah2", "prop_float": 3.0, @@ -95,7 +95,7 @@ def test_add_metadata(): with pytest.raises(Exception) as excinfo: rg.edge("ben", "hamza").add_metadata({"prop_float": 3.0}) - assert "Attempted to change value of metadata" in str(excinfo.value) + assert "Cannot set previous value" in str(excinfo.value) def test_update_metadata(): @@ -139,5 +139,5 @@ def test_delete(): edge = rg.add_edge(1, "ben", "lucas", layer="colleagues") edge.delete(2, layer="colleagues") g = client.receive_graph("path/to/persistent_graph") - assert g.edge("ben", "hamza").deletions == [(2, 1)] - assert g.edge("ben", "lucas").deletions == [(2, 3)] + assert g.edge("ben", "hamza").deletions.t == [2] + assert g.edge("ben", "lucas").deletions.t == [2] diff --git a/python/tests/test_base_install/test_graphql/update_graph/test_graph_updates.py b/python/tests/test_base_install/test_graphql/update_graph/test_graph_updates.py index 36f7bdd5e9..4ca5df2c6e 100644 --- a/python/tests/test_base_install/test_graphql/update_graph/test_graph_updates.py +++ b/python/tests/test_base_install/test_graphql/update_graph/test_graph_updates.py @@ -4,12 +4,17 @@ from raphtory.graphql import GraphServer, RaphtoryClient from datetime import datetime, timezone from numpy.testing import assert_equal as check_arr -from utils import assert_set_eq, assert_has_metadata, assert_has_properties +from utils import ( + assert_set_eq, + assert_has_metadata, + assert_has_properties, + truncate_dt_to_ms, +) def make_props(): - current_datetime = datetime.now(timezone.utc) - naive_datetime = datetime.now() + current_datetime = truncate_dt_to_ms(datetime.now(timezone.utc)) + naive_datetime = truncate_dt_to_ms(datetime.now()) return { "prop_string": "blah", "prop_float": 2.0, @@ -46,7 +51,7 @@ def test_add_metadata(): with pytest.raises(Exception) as excinfo: rg.add_metadata({"prop_float": 3.0}) - assert "Attempted to change value of metadata" in str(excinfo.value) + assert "Cannot set previous value" in str(excinfo.value) def test_update_metadata(): @@ -73,20 +78,24 @@ def test_add_properties(): client.new_graph("path/to/event_graph", "EVENT") rg = client.remote_graph("path/to/event_graph") props = make_props() + # Both datetimes deliberately resolve to the same millisecond — Raphtory + # gives each write its own event_id, so 3 history entries should land + # (one at t=1 and two at the shared datetime ms). Using fixed values + # rather than `datetime.now()` so the test is deterministic. + aware_dt = datetime(2024, 1, 1, 12, 0, 0, tzinfo=timezone.utc) + naive_dt = datetime(2024, 1, 1, 12, 0, 0) rg.add_property(1, props) - current_datetime = datetime.now(timezone.utc) - naive_datetime = datetime.now() - rg.add_property(current_datetime, props) - rg.add_property(naive_datetime, props) + rg.add_property(aware_dt, props) + rg.add_property(naive_dt, props) g = client.receive_graph("path/to/event_graph") assert_has_properties(g, props) - localized_datetime = naive_datetime.replace(tzinfo=timezone.utc) + localized_dt = naive_dt.replace(tzinfo=timezone.utc) timestamps = sorted( [ 1, - int(current_datetime.timestamp() * 1000), - int(localized_datetime.timestamp() * 1000), + int(aware_dt.timestamp() * 1000), + int(localized_dt.timestamp() * 1000), ] ) @@ -142,7 +151,7 @@ def test_delete_edge(): rg.delete_edge(2, "ben", "hamza") g = client.receive_graph("path/to/event_graph") assert g.edge("ben", "hamza").history.t.collect() == [1] - assert g.edge("ben", "hamza").deletions == [(2, 1)] + assert g.edge("ben", "hamza").deletions.t.collect() == [2] client.new_graph("path/to/persistent_graph", "PERSISTENT") rg = client.remote_graph("path/to/persistent_graph") @@ -151,5 +160,5 @@ def test_delete_edge(): rg.add_edge(1, "ben", "lucas", layer="colleagues") rg.delete_edge(2, "ben", "lucas", layer="colleagues") g = client.receive_graph("path/to/persistent_graph") - assert g.edge("ben", "hamza").deletions == [(2, 1)] - assert g.edge("ben", "lucas").deletions == [(2, 3)] + assert g.edge("ben", "hamza").deletions.t.collect() == [2] + assert g.edge("ben", "lucas").deletions.t.collect() == [2] diff --git a/python/tests/test_base_install/test_graphql/update_graph/test_node_updates.py b/python/tests/test_base_install/test_graphql/update_graph/test_node_updates.py index 37ca6c0150..c394dd657d 100644 --- a/python/tests/test_base_install/test_graphql/update_graph/test_node_updates.py +++ b/python/tests/test_base_install/test_graphql/update_graph/test_node_updates.py @@ -3,14 +3,14 @@ import pytest from dateutil import parser -from utils import assert_has_properties, assert_has_metadata +from utils import assert_has_properties, assert_has_metadata, truncate_dt_to_ms from raphtory.graphql import GraphServer, RaphtoryClient from numpy.testing import assert_equal as check_arr def make_props(): - current_datetime = datetime.now(timezone.utc) - naive_datetime = datetime.now() + current_datetime = truncate_dt_to_ms(datetime.now(timezone.utc)) + naive_datetime = truncate_dt_to_ms(datetime.now()) return { "prop_string": "blah", "prop_float": 2.0, @@ -79,7 +79,7 @@ def test_add_metadata(): with pytest.raises(Exception) as excinfo: rg.node("ben").add_metadata({"prop_float": 3.0}) - assert "Attempted to change value of metadata" in str(excinfo.value) + assert "Cannot set previous value" in str(excinfo.value) def test_update_metadata(): diff --git a/python/tests/test_base_install/test_loaders/test_load_from_pandas.py b/python/tests/test_base_install/test_loaders/test_load_from_pandas.py index 47554b7fb7..87d838a8e6 100644 --- a/python/tests/test_base_install/test_loaders/test_load_from_pandas.py +++ b/python/tests/test_base_install/test_loaders/test_load_from_pandas.py @@ -272,6 +272,14 @@ def test_load_from_pandas_with_types(): } ) + nodes_meta_df = pd.DataFrame( + { + "id": [3, 4, 666, 6], + "name": ["Carol", "Dave", "Bowser", "Frank"], + "coins": [100, 150, 9999, 200], + } + ) + def assertions1(g): assert g.nodes.node_type == [ "Person", @@ -301,6 +309,19 @@ def assertions1(g): ) assertions1(g) + assert g.node(666) is None + assert g.node(3) is not None + g.load_node_metadata( + nodes_meta_df, + "id", + metadata=["name", "coins"], + ) + + assert g.node(666) is None + assert g.node(3) is not None + assert g.node(3).metadata.get("name") == "Carol" + assert g.node(3).metadata.get("coins") == 100 + g = PersistentGraph() g.load_nodes( nodes_df, @@ -907,7 +928,7 @@ def test_loading_list_as_properties(): properties=["marbles"], ) - assert g.edge(1, 2).properties["marbles"] == ["red"] + assert g.edge(1, 2).properties["marbles"].tolist() == ["red"] df = pd.DataFrame( { @@ -925,7 +946,7 @@ def test_loading_list_as_properties(): properties=["marbles"], ) - assert g.node(2).properties["marbles"] == ["blue"] + assert g.node(2).properties["marbles"].tolist() == ["blue"] def test_unparsable_props(): @@ -1737,3 +1758,285 @@ def test_load_edges_csv_c_engine_time_utf8(tmp_path): assert { v.id: v.properties["time"] for v in g.nodes if "time" in v.properties } == expected_node_time_props + + +def test_load_nodes_with_fixed_layer(): + """load_nodes(layer=...) assigns all node updates to the named layer.""" + nodes_df = pd.DataFrame( + { + "id": [1, 2, 3, 4, 5, 6], + "name": ["Alice", "Bob", "Carol", "Dave", "Eve", "Frank"], + "time": [1, 2, 3, 4, 5, 6], + } + ) + + def assertions(g): + assert g.unique_layers == ["node_layer"] + assert set(g.layers(["node_layer"]).nodes.id) == {1, 2, 3, 4, 5, 6} + assert dict( + zip( + g.layers(["node_layer"]).nodes.id, + g.layers(["node_layer"]).nodes.properties.get("name"), + ) + ) == {1: "Alice", 2: "Bob", 3: "Carol", 4: "Dave", 5: "Eve", 6: "Frank"} + + g = Graph() + g.load_nodes( + data=nodes_df, + time="time", + id="id", + properties=["name"], + layer="node_layer", + ) + assertions(g) + + g = PersistentGraph() + g.load_nodes( + data=nodes_df, + time="time", + id="id", + properties=["name"], + layer="node_layer", + ) + assertions(g) + + +def test_load_nodes_with_layer_col(): + """load_nodes(layer_col=...) assigns each node update to its row's layer.""" + nodes_df = pd.DataFrame( + { + "id": [1, 2, 3, 4, 5, 6], + "name": ["Alice", "Bob", "Carol", "Dave", "Eve", "Frank"], + "time": [1, 2, 3, 4, 5, 6], + "layer": ["layer A", "layer A", "layer B", "layer B", "layer C", "layer C"], + } + ) + + def assertions(g): + assert set(g.unique_layers) == {"layer A", "layer B", "layer C"} + assert set(g.layers(["layer A"]).nodes.id) == {1, 2} + assert set(g.layers(["layer B"]).nodes.id) == {3, 4} + assert set(g.layers(["layer C"]).nodes.id) == {5, 6} + assert set(g.layers(["layer A", "layer B"]).nodes.id) == {1, 2, 3, 4} + + g = Graph() + g.load_nodes( + data=nodes_df, + time="time", + id="id", + properties=["name"], + layer_col="layer", + ) + assertions(g) + + g = PersistentGraph() + g.load_nodes( + data=nodes_df, + time="time", + id="id", + properties=["name"], + layer_col="layer", + ) + assertions(g) + + +def test_load_nodes_layer_and_edges_layer_combined(): + """Nodes and edges can live in independent layers without cross-contamination.""" + edges_df = pd.DataFrame( + { + "src": [1, 2, 3], + "dst": [2, 3, 4], + "time": [1, 2, 3], + } + ) + nodes_df = pd.DataFrame( + { + "id": [1, 2, 3, 4], + "name": ["A", "B", "C", "D"], + "time": [1, 2, 3, 4], + } + ) + + def assertions(g): + assert set(g.unique_layers) == {"node_layer", "edge_layer"} + assert set(g.layers(["node_layer"]).nodes.id) == {1, 2, 3, 4} + assert list(g.layers(["node_layer"]).edges.id) == [] + assert set(g.layers(["edge_layer"]).edges.id) == {(1, 2), (2, 3), (3, 4)} + + g = Graph() + g.load_nodes( + data=nodes_df, time="time", id="id", properties=["name"], layer="node_layer" + ) + g.load_edges(edges_df, time="time", src="src", dst="dst", layer="edge_layer") + assertions(g) + + g = PersistentGraph() + g.load_nodes( + data=nodes_df, time="time", id="id", properties=["name"], layer="node_layer" + ) + g.load_edges(edges_df, time="time", src="src", dst="dst", layer="edge_layer") + assertions(g) + + +def test_load_nodes_invalid_layer_reference(): + """Querying a layer that was never loaded raises an error.""" + nodes_df = pd.DataFrame( + { + "id": [1, 2], + "time": [1, 2], + } + ) + g = Graph() + g.load_nodes(data=nodes_df, time="time", id="id", layer="real_layer") + + with pytest.raises(Exception, match="Invalid layer: nonexistent_layer"): + g.layers(["nonexistent_layer"]) + + +# --- Schema loading: Decimal & datetime PropTypes ----------------------------- + + +def test_load_nodes_with_decimal_schema_from_string_column(): + """`schema={"col": PropType.decimal(N)}` casts a string column to Decimal.""" + from decimal import Decimal + + df = pd.DataFrame( + { + "id": ["s1", "s2"], + "time": [1, 2], + "price": ["19.99", "0.50"], + } + ) + g = Graph() + g.load_nodes( + data=df, + id="id", + time="time", + properties=["price"], + schema={"price": PropType.decimal(2)}, + ) + assert g.node("s1").properties["price"] == Decimal("19.99") + assert g.node("s2").properties["price"] == Decimal("0.50") + + +def test_load_nodes_with_decimal_schema_from_arrow_decimal_column(): + """An Arrow Decimal128 column maps onto `PropType.decimal(scale)`.""" + from decimal import Decimal + + arr = pa.array( + [Decimal("1.23"), Decimal("4.56")], + type=pa.decimal128(precision=10, scale=2), + ) + df = pd.DataFrame( + { + "id": ["s1", "s2"], + "time": [1, 2], + "price": arr.to_pandas(), + } + ) + g = Graph() + g.load_nodes( + data=df, + id="id", + time="time", + properties=["price"], + schema={"price": PropType.decimal(2)}, + ) + assert g.node("s1").properties["price"] == Decimal("1.23") + assert g.node("s2").properties["price"] == Decimal("4.56") + + +def test_load_edges_with_decimal_schema(): + from decimal import Decimal + + df = pd.DataFrame( + { + "src": ["a", "b"], + "dst": ["b", "c"], + "time": [1, 2], + "weight": ["3.14", "2.71"], + } + ) + g = Graph() + g.load_edges( + df, + time="time", + src="src", + dst="dst", + properties=["weight"], + schema={"weight": PropType.decimal(2)}, + ) + assert g.edge("a", "b").properties["weight"] == Decimal("3.14") + assert g.edge("b", "c").properties["weight"] == Decimal("2.71") + + +def test_load_nodes_with_naive_datetime_schema(): + """`PropType.naive_datetime()` casts a string column to NDTime.""" + df = pd.DataFrame( + { + "id": ["s1", "s2"], + "time": [1, 2], + "ts": ["2024-06-01T12:00:00", "2024-06-02T13:30:00"], + } + ) + g = Graph() + g.load_nodes( + data=df, + id="id", + time="time", + properties=["ts"], + schema={"ts": PropType.naive_datetime()}, + ) + assert g.node("s1").properties["ts"] == datetime.datetime(2024, 6, 1, 12, 0, 0) + assert g.node("s2").properties["ts"] == datetime.datetime(2024, 6, 2, 13, 30, 0) + + +def test_load_nodes_with_aware_datetime_schema(): + """`PropType.datetime()` casts to DTime (timezone-aware UTC).""" + df = pd.DataFrame( + { + "id": ["s1", "s2"], + "time": [1, 2], + "ts": ["2024-06-01T12:00:00+00:00", "2024-06-02T13:30:00+00:00"], + } + ) + g = Graph() + g.load_nodes( + data=df, + id="id", + time="time", + properties=["ts"], + schema={"ts": PropType.datetime()}, + ) + assert g.node("s1").properties["ts"] == datetime.datetime( + 2024, 6, 1, 12, 0, 0, tzinfo=datetime.timezone.utc + ) + assert g.node("s2").properties["ts"] == datetime.datetime( + 2024, 6, 2, 13, 30, 0, tzinfo=datetime.timezone.utc + ) + + +def test_load_edges_with_datetime_schema(): + df = pd.DataFrame( + { + "src": ["a", "b"], + "dst": ["b", "c"], + "time": [1, 2], + "scheduled_at": [ + "2024-06-01T09:00:00+00:00", + "2024-06-02T17:00:00+00:00", + ], + } + ) + g = Graph() + g.load_edges( + df, + time="time", + src="src", + dst="dst", + properties=["scheduled_at"], + schema={"scheduled_at": PropType.datetime()}, + ) + assert g.edge("a", "b").properties["scheduled_at"] == datetime.datetime( + 2024, 6, 1, 9, 0, 0, tzinfo=datetime.timezone.utc + ) diff --git a/python/tests/test_base_install/test_loaders/test_load_from_parquet.py b/python/tests/test_base_install/test_loaders/test_load_from_parquet.py index 8956ab3d61..238f0a17fa 100644 --- a/python/tests/test_base_install/test_loaders/test_load_from_parquet.py +++ b/python/tests/test_base_install/test_loaders/test_load_from_parquet.py @@ -1,12 +1,12 @@ import datetime import os import re +import tempfile + +import pandas as pd import pyarrow as pa import pyarrow.parquet as pq import pytest -import tempfile -import pandas as pd - from raphtory import Graph, PersistentGraph @@ -68,7 +68,11 @@ def parquet_files(): ) ) - yield nodes_parquet_file_path, edges_parquet_file_path, edge_deletions_parquet_file_path + yield ( + nodes_parquet_file_path, + edges_parquet_file_path, + edge_deletions_parquet_file_path, + ) # Cleanup the temporary directory after tests dirname.cleanup() @@ -515,7 +519,7 @@ def test_edge_both_option_failures_parquet(parquet_files): g = Graph() with pytest.raises( Exception, - match=r"Failed to load graph: You cannot set ‘layer_name’ and ‘layer_col’ at the same time. Please pick one or the other.", + match=r"You cannot set ‘layer_name’ and ‘layer_col’ at the same time. Please pick one or the other.", ): g.load_edges( edges_parquet_file_path, @@ -528,7 +532,7 @@ def test_edge_both_option_failures_parquet(parquet_files): with pytest.raises( Exception, - match=r"Failed to load graph: You cannot set ‘layer_name’ and ‘layer_col’ at the same time. Please pick one or the other.", + match=r"You cannot set ‘layer_name’ and ‘layer_col’ at the same time. Please pick one or the other.", ): g.load_edge_metadata( edges_parquet_file_path, "src", "dst", layer="blah", layer_col="marbles" @@ -625,7 +629,7 @@ def test_edge_both_option_failures_parquet(parquet_files): g = PersistentGraph() with pytest.raises( Exception, - match=r"Failed to load graph: You cannot set ‘layer_name’ and ‘layer_col’ at the same time. Please pick one or the other.", + match=r"You cannot set ‘layer_name’ and ‘layer_col’ at the same time. Please pick one or the other.", ): g.load_edges( edges_parquet_file_path, @@ -638,7 +642,7 @@ def test_edge_both_option_failures_parquet(parquet_files): with pytest.raises( Exception, - match=r"Failed to load graph: You cannot set ‘layer_name’ and ‘layer_col’ at the same time. Please pick one or the other.", + match=r"You cannot set ‘layer_name’ and ‘layer_col’ at the same time. Please pick one or the other.", ): g.load_edge_metadata( edges_parquet_file_path, "src", "dst", layer="blah", layer_col="marbles" @@ -646,7 +650,7 @@ def test_edge_both_option_failures_parquet(parquet_files): with pytest.raises( Exception, - match=r"Failed to load graph: You cannot set ‘layer_name’ and ‘layer_col’ at the same time. Please pick one or the other.", + match=r"You cannot set ‘layer_name’ and ‘layer_col’ at the same time. Please pick one or the other.", ): g.load_edge_deletions( edges_parquet_file_path, @@ -787,7 +791,7 @@ def test_node_both_option_failures_parquet(parquet_files): with pytest.raises( Exception, match=re.escape( - r"Failed to load graph: You cannot set ‘node_type_name’ and ‘node_type_col’ at the same time. Please pick one or the other." + r"You cannot set ‘node_type_name’ and ‘node_type_col’ at the same time. Please pick one or the other." ), ): g = Graph() @@ -802,7 +806,7 @@ def test_node_both_option_failures_parquet(parquet_files): with pytest.raises( Exception, match=re.escape( - r"Failed to load graph: You cannot set ‘node_type_name’ and ‘node_type_col’ at the same time. Please pick one or the other." + r"You cannot set ‘node_type_name’ and ‘node_type_col’ at the same time. Please pick one or the other." ), ): g = Graph() diff --git a/python/tests/test_base_install/test_props.py b/python/tests/test_base_install/test_props.py index 76db5d61e7..5582dd162f 100644 --- a/python/tests/test_base_install/test_props.py +++ b/python/tests/test_base_install/test_props.py @@ -1,6 +1,7 @@ -from raphtory import Prop +from raphtory import Graph, Prop from utils import expect_unify_error, assert_in_all from decimal import Decimal +from datetime import datetime, timezone import pytest @@ -158,3 +159,128 @@ def test_map_with_nested_list_that_is_heterogeneous_rejected(): } ).dtype() ) + + +def test_aware_datetime(): + dt = datetime(2024, 6, 1, 12, 30, 45, tzinfo=timezone.utc) + p = Prop.aware_datetime(dt) + assert str(p.dtype()) == "DTime" + assert "2024-06-01" in repr(p) + + +def test_aware_datetime_treats_naive_as_utc(): + """Naive datetimes are accepted and interpreted as UTC, consistent with + how `EventTime` and other Raphtory time inputs handle them.""" + naive = datetime(2024, 6, 1, 12, 30, 45) + aware = datetime(2024, 6, 1, 12, 30, 45, tzinfo=timezone.utc) + assert Prop.aware_datetime(naive) == Prop.aware_datetime(aware) + + +def test_naive_datetime(): + dt = datetime(2024, 6, 1, 12, 30, 45) + p = Prop.naive_datetime(dt) + assert str(p.dtype()) == "NDTime" + assert "2024-06-01" in repr(p) + + +def test_decimal_from_string(): + p = Prop.decimal("1234.5678") + # Decimal stores scale; dtype reports it. + assert str(p.dtype()) == "Decimal { scale: 4 }" + + +def test_decimal_from_negative_string(): + p = Prop.decimal("-0.001") + assert str(p.dtype()) == "Decimal { scale: 3 }" + + +def test_decimal_from_string_zero_scale(): + p = Prop.decimal("42") + assert str(p.dtype()) == "Decimal { scale: 0 }" + + +def test_decimal_from_python_decimal(): + p = Prop.decimal(Decimal("99.99")) + assert str(p.dtype()) == "Decimal { scale: 2 }" + + +def test_decimal_from_python_decimal_high_precision(): + """`decimal.Decimal` preserves precision regardless of float limits.""" + p = Prop.decimal(Decimal("1.234567890123456789012345")) + assert str(p.dtype()) == "Decimal { scale: 24 }" + + +def test_decimal_from_int(): + p = Prop.decimal(7) + assert str(p.dtype()) == "Decimal { scale: 0 }" + + +def test_decimal_from_negative_int(): + p = Prop.decimal(-42) + assert str(p.dtype()) == "Decimal { scale: 0 }" + + +def test_decimal_from_large_int(): + p = Prop.decimal(2**62) + assert str(p.dtype()) == "Decimal { scale: 0 }" + + +def test_decimal_from_float(): + p = Prop.decimal(1.5) + assert "Decimal" in str(p.dtype()) + + +def test_decimal_from_negative_float(): + p = Prop.decimal(-3.25) + assert "Decimal" in str(p.dtype()) + + +def test_decimal_rejects_non_numeric_string(): + with pytest.raises(TypeError): + Prop.decimal("not a number") + + +def test_decimal_rejects_unsupported_type(): + with pytest.raises(TypeError): + Prop.decimal([1, 2, 3]) + + +def test_decimal_in_graph_roundtrips(): + """Decimal Props attach to graph entities and are readable back.""" + g = Graph() + g.add_node(1, "n", properties={"price": Prop.decimal("19.99")}) + val = g.node("n").properties.get("price") + assert val == Decimal("19.99") + + +def test_decimal_in_graph_from_int_then_read_back(): + g = Graph() + g.add_node(1, "n", properties={"count": Prop.decimal(42)}) + val = g.node("n").properties.get("count") + assert val == Decimal("42") + + +def test_decimal_in_graph_from_float_then_read_back(): + g = Graph() + g.add_node(1, "n", properties={"ratio": Prop.decimal(1.5)}) + val = g.node("n").properties.get("ratio") + assert val == Decimal("1.5") + + +def test_decimal_list_in_graph(): + """Lists of Decimal Props inherit a unified scale.""" + g = Graph() + g.add_node( + 1, + "n", + properties={"prices": Prop.list([Prop.decimal("1.25"), Prop.decimal("2.50")])}, + ) + vals = g.node("n").properties.get("prices") + assert vals == [Decimal("1.25"), Decimal("2.50")] + + +def test_decimal_list_rejects_mixed_scales(): + """Mixing decimal scales in a list errors at unification time.""" + expect_unify_error( + lambda: Prop.list([Prop.decimal("1.25"), Prop.decimal("2.5")]).dtype() + ) diff --git a/python/tests/test_export.py b/python/tests/test_export.py index 181cdef376..b212606c58 100644 --- a/python/tests/test_export.py +++ b/python/tests/test_export.py @@ -107,7 +107,7 @@ def test_graph_timestamp_list_properties(): ], ) - assert g.node("a")["array_column"] == [1, 2, 3] + assert g.node("a")["array_column"].tolist() == [1, 2, 3] assert g.node("a")["date_column_ms"] == df["date_column_ms"][0] assert g.node("a")["date_column_us"] == df["date_column_us"][0] diff --git a/python/tests/test_base_install/test_graph_benchmarks.py b/python/tests/test_graph_benchmarks.py similarity index 100% rename from python/tests/test_base_install/test_graph_benchmarks.py rename to python/tests/test_graph_benchmarks.py diff --git a/python/tests/test_permissions.py b/python/tests/test_permissions.py new file mode 100644 index 0000000000..43488acc9d --- /dev/null +++ b/python/tests/test_permissions.py @@ -0,0 +1,1735 @@ +import json +import os +import tempfile +import requests +import jwt +import pytest +from raphtory.graphql import GraphServer, RaphtoryClient, has_permissions_extension + +pytestmark = pytest.mark.skipif( + not has_permissions_extension(), + reason="raphtory-auth not compiled in (open-source build)", +) + +# Reuse the same key pair as test_auth.py +PUB_KEY = "MCowBQYDK2VwAyEADdrWr1kTLj+wSHlr45eneXmOjlHo3N1DjLIvDa2ozno=" +PRIVATE_KEY = """-----BEGIN PRIVATE KEY----- +MC4CAQAwBQYDK2VwBCIEIFzEcSO/duEjjX4qKxDVy4uLqfmiEIA6bEw1qiPyzTQg +-----END PRIVATE KEY-----""" + +RAPHTORY = "http://localhost:1736" + +ANALYST_JWT = jwt.encode( + {"access": "ro", "role": "analyst"}, PRIVATE_KEY, algorithm="EdDSA" +) +ANALYST_HEADERS = {"Authorization": f"Bearer {ANALYST_JWT}"} + +ADMIN_JWT = jwt.encode( + {"access": "rw", "role": "admin"}, PRIVATE_KEY, algorithm="EdDSA" +) +ADMIN_HEADERS = {"Authorization": f"Bearer {ADMIN_JWT}"} + +NO_ROLE_JWT = jwt.encode({"access": "ro"}, PRIVATE_KEY, algorithm="EdDSA") +NO_ROLE_HEADERS = {"Authorization": f"Bearer {NO_ROLE_JWT}"} + +QUERY_JIRA = """query { graph(path: "jira") { path } }""" +QUERY_ADMIN = """query { graph(path: "admin") { path } }""" +QUERY_NS_GRAPHS = """query { root { graphs { list { path } } } }""" +QUERY_NS_CHILDREN = """query { root { children { list { path } } } }""" +QUERY_META_JIRA = """query { graphMetadata(path: "jira") { path nodeCount } }""" +CREATE_JIRA = """mutation { newGraph(path:"jira", graphType:EVENT) }""" +CREATE_ADMIN = """mutation { newGraph(path:"admin", graphType:EVENT) }""" +CREATE_TEAM_JIRA = """mutation { newGraph(path:"team/jira", graphType:EVENT) }""" +CREATE_TEAM_CONFLUENCE = ( + """mutation { newGraph(path:"team/confluence", graphType:EVENT) }""" +) +CREATE_DEEP = """mutation { newGraph(path:"a/b/c", graphType:EVENT) }""" +QUERY_TEAM_JIRA = """query { graph(path: "team/jira") { path } }""" +QUERY_TEAM_GRAPHS = """query { namespace(path: "team") { graphs { list { path } } } }""" +QUERY_A_CHILDREN = """query { namespace(path: "a") { children { list { path } } } }""" + + +def gql(query: str, headers=None) -> dict: + h = headers if headers is not None else ADMIN_HEADERS + return requests.post(RAPHTORY, headers=h, data=json.dumps({"query": query})).json() + + +def create_role(role: str) -> None: + gql(f'mutation {{ permissions {{ createRole(name: "{role}") {{ success }} }} }}') + + +def grant_graph(role: str, path: str, permission: str) -> None: + gql( + f'mutation {{ permissions {{ grantGraph(role: "{role}", path: "{path}", permission: {permission}) {{ success }} }} }}' + ) + + +def grant_namespace(role: str, path: str, permission: str) -> None: + gql( + f'mutation {{ permissions {{ grantNamespace(role: "{role}", path: "{path}", permission: {permission}) {{ success }} }} }}' + ) + + +def revoke_graph(role: str, path: str) -> None: + gql( + f'mutation {{ permissions {{ revokeGraph(role: "{role}", path: "{path}") {{ success }} }} }}' + ) + + +def grant_graph_filtered_read_only(role: str, path: str, filter_gql: str) -> None: + """Call grantGraphFilteredReadOnly with a raw GQL filter fragment.""" + resp = gql( + f'mutation {{ permissions {{ grantGraphFilteredReadOnly(role: "{role}", path: "{path}", filter: {filter_gql}) {{ success }} }} }}' + ) + assert "errors" not in resp, f"grantGraphFilteredReadOnly failed: {resp}" + + +def make_server(work_dir: str): + """Create a GraphServer wired with a permissions store at {work_dir}/permissions.json.""" + return GraphServer( + work_dir, + auth_public_key=PUB_KEY, + permissions_store_path=os.path.join(work_dir, "permissions.json"), + ) + + +def test_analyst_can_access_permitted_graph(): + work_dir = tempfile.mkdtemp() + with make_server(work_dir).start(): + gql(CREATE_JIRA) + gql(CREATE_ADMIN) + create_role("analyst") + grant_graph("analyst", "jira", "READ") + + response = gql(QUERY_JIRA, headers=ANALYST_HEADERS) + assert "errors" not in response, response + assert response["data"]["graph"]["path"] == "jira" + + +def test_analyst_cannot_access_denied_graph(): + work_dir = tempfile.mkdtemp() + with make_server(work_dir).start(): + gql(CREATE_ADMIN) + create_role("analyst") + grant_graph("analyst", "jira", "READ") # only jira, not admin + + # "admin" graph is silently null — analyst has no namespace INTROSPECT, so + # existence of "admin" is not revealed. + response = gql(QUERY_ADMIN, headers=ANALYST_HEADERS) + assert "errors" not in response, response + assert response["data"]["graph"] is None + + +def test_admin_can_access_all_graphs(): + work_dir = tempfile.mkdtemp() + with make_server(work_dir).start(): + gql(CREATE_JIRA) + gql(CREATE_ADMIN) + + for query in [QUERY_JIRA, QUERY_ADMIN]: + response = gql(query, headers=ADMIN_HEADERS) + assert "errors" not in response, response + + +def test_no_role_is_denied_when_policy_is_active(): + work_dir = tempfile.mkdtemp() + with make_server(work_dir).start(): + gql(CREATE_JIRA) + create_role("analyst") + grant_graph("analyst", "jira", "READ") + + response = gql(QUERY_JIRA, headers=NO_ROLE_HEADERS) + assert "errors" not in response, response + assert response["data"]["graph"] is None + + +def test_unknown_role_is_denied_when_policy_is_active(): + """JWT has a role claim but that role does not exist in the store → Denied. + + Distinct from test_no_role_is_denied_when_policy_is_active: here the JWT + does carry a role claim ('analyst'), but 'analyst' was never created in the + store. Both paths deny, but via different branches of the policy flowchart. + """ + work_dir = tempfile.mkdtemp() + with make_server(work_dir).start(): + gql(CREATE_JIRA) + # Make the store non-empty with a different role — but never create "analyst" + create_role("other_team") + + response = gql(QUERY_JIRA, headers=ANALYST_HEADERS) # JWT says role="analyst" + assert "errors" not in response, response + assert response["data"]["graph"] is None + + +def test_empty_store_denies_non_admin(): + """With an empty permissions store (no roles configured), non-admin users are denied.""" + work_dir = tempfile.mkdtemp() + with make_server(work_dir).start(): + gql(CREATE_JIRA) + + response = gql(QUERY_JIRA, headers=ANALYST_HEADERS) + assert "errors" not in response, response + assert response["data"]["graph"] is None + + +def test_empty_store_allows_admin(): + """With an empty permissions store, admin (rw JWT) still gets full access.""" + work_dir = tempfile.mkdtemp() + with make_server(work_dir).start(): + gql(CREATE_JIRA) + + response = gql(QUERY_JIRA, headers=ADMIN_HEADERS) + assert "errors" not in response, response + assert response["data"]["graph"]["path"] == "jira" + + +def test_introspection_allowed_with_introspect_permission(): + """Namespace INTROSPECT makes graphs visible in listings but graph() is denied.""" + work_dir = tempfile.mkdtemp() + with make_server(work_dir).start(): + gql(CREATE_TEAM_JIRA) + create_role("analyst") + grant_namespace("analyst", "team", "INTROSPECT") + + # Namespace listing shows the graph as MetaGraph + response = gql(QUERY_TEAM_GRAPHS, headers=ANALYST_HEADERS) + assert "errors" not in response, response + paths = [g["path"] for g in response["data"]["namespace"]["graphs"]["list"]] + assert "team/jira" in paths + + # graph() resolver returns null — INTROSPECT does not grant data access + response = gql(QUERY_TEAM_JIRA, headers=ANALYST_HEADERS) + assert "errors" not in response, response + assert response["data"]["graph"] is None + + +def test_read_implies_introspect(): + """READ also shows the graph in namespace listings (implies INTROSPECT).""" + work_dir = tempfile.mkdtemp() + with make_server(work_dir).start(): + gql(CREATE_JIRA) + create_role("analyst") + grant_graph("analyst", "jira", "READ") + + response = gql(QUERY_NS_GRAPHS, headers=ANALYST_HEADERS) + assert "errors" not in response, response + paths = [g["path"] for g in response["data"]["root"]["graphs"]["list"]] + assert "jira" in paths + + +def test_permissions_update_via_mutation(): + """Granting access via mutation takes effect immediately.""" + work_dir = tempfile.mkdtemp() + with make_server(work_dir).start(): + gql(CREATE_JIRA) + create_role("analyst") + + # No grants yet — graph returns null (indistinguishable from "graph not found") + response = gql(QUERY_JIRA, headers=ANALYST_HEADERS) + assert "errors" not in response, response + assert response["data"]["graph"] is None + + # Grant via mutation + grant_graph("analyst", "jira", "READ") + + response = gql(QUERY_JIRA, headers=ANALYST_HEADERS) + assert "errors" not in response, response + assert response["data"]["graph"]["path"] == "jira" + + +def test_namespace_grant_does_not_cover_root_level_graphs(): + """Namespace grants only apply to graphs within that namespace; root-level graphs require explicit graph grants.""" + work_dir = tempfile.mkdtemp() + with make_server(work_dir).start(): + gql(CREATE_JIRA) + gql(CREATE_TEAM_JIRA) + create_role("analyst") + grant_namespace( + "analyst", "team", "READ" + ) # covers team/jira but not root-level jira + + response = gql(QUERY_TEAM_JIRA, headers=ANALYST_HEADERS) + assert "errors" not in response, response + + response = gql(QUERY_JIRA, headers=ANALYST_HEADERS) + assert "errors" not in response, response + assert ( + response["data"]["graph"] is None + ) # root-level graph not covered by namespace grant + + +# --- WRITE permission enforcement --- + +UPDATE_JIRA = """query { updateGraph(path: "jira") { addNode(time: 1, name: "test_node") { success } } }""" +CREATE_JIRA_NS = """mutation { newGraph(path:"team/jira", graphType:EVENT) }""" + + +def test_admin_bypasses_policy_for_reads(): + """'access':'rw' admin can read any graph even without a role entry in the store.""" + work_dir = tempfile.mkdtemp() + with make_server(work_dir).start(): + gql(CREATE_JIRA) + # Policy is active (analyst role exists) but admin has no role entry + create_role("analyst") + grant_graph("analyst", "jira", "READ") + + response = gql(QUERY_JIRA, headers=ADMIN_HEADERS) + assert "errors" not in response, response + assert response["data"]["graph"]["path"] == "jira" + + +def test_analyst_can_write_with_write_grant(): + """'access':'ro' user with WRITE grant on a specific graph can call updateGraph.""" + work_dir = tempfile.mkdtemp() + with make_server(work_dir).start(): + gql(CREATE_JIRA) + create_role("analyst") + grant_graph("analyst", "jira", "WRITE") + + response = gql(UPDATE_JIRA, headers=ANALYST_HEADERS) + assert "errors" not in response, response + + +def test_analyst_cannot_write_without_write_grant(): + """'access':'ro' user with READ-only grant cannot call updateGraph.""" + work_dir = tempfile.mkdtemp() + with make_server(work_dir).start(): + gql(CREATE_JIRA) + create_role("analyst") + grant_graph("analyst", "jira", "READ") # READ only, no WRITE + + response = gql(UPDATE_JIRA, headers=ANALYST_HEADERS) + assert response["data"] is None or response["data"].get("updateGraph") is None + assert "errors" in response + assert "Access denied" in response["errors"][0]["message"] + + +def test_analyst_can_create_graph_in_namespace(): + """'access':'ro' user with namespace WRITE grant can create a new graph in that namespace.""" + work_dir = tempfile.mkdtemp() + with make_server(work_dir).start(): + create_role("analyst") + grant_namespace("analyst", "team/", "WRITE") + + response = gql(CREATE_JIRA_NS, headers=ANALYST_HEADERS) + assert "errors" not in response, response + assert response["data"]["newGraph"] is True + + +def test_analyst_cannot_create_graph_outside_namespace(): + """'access':'ro' user with namespace WRITE grant cannot create a graph outside that namespace.""" + work_dir = tempfile.mkdtemp() + with make_server(work_dir).start(): + create_role("analyst") + grant_namespace("analyst", "team/", "WRITE") + + response = gql(CREATE_JIRA, headers=ANALYST_HEADERS) # "jira" not under "team/" + assert "errors" in response + assert "Access denied" in response["errors"][0]["message"] + # Verify "jira" was not created as a side effect + ns_graphs = gql(QUERY_NS_GRAPHS)["data"]["root"]["graphs"]["list"] + assert "jira" not in [g["path"] for g in ns_graphs] + + +def test_analyst_cannot_call_permissions_mutations(): + """'access':'ro' user with WRITE grant on a graph cannot manage roles/permissions.""" + work_dir = tempfile.mkdtemp() + with make_server(work_dir).start(): + create_role("analyst") + grant_namespace("analyst", "team", "WRITE") + + response = gql( + 'mutation { permissions { createRole(name: "hacker") { success } } }', + headers=ANALYST_HEADERS, + ) + assert "errors" in response + assert "Access denied" in response["errors"][0]["message"] + # Verify "hacker" role was not created as a side effect + roles = gql("query { permissions { listRoles } }")["data"]["permissions"][ + "listRoles" + ] + assert "hacker" not in roles + + +def test_admin_can_list_roles(): + """'access':'rw' admin can query permissions { listRoles }.""" + work_dir = tempfile.mkdtemp() + with make_server(work_dir).start(): + create_role("analyst") + + response = gql("query { permissions { listRoles } }", headers=ADMIN_HEADERS) + assert "errors" not in response, response + assert "analyst" in response["data"]["permissions"]["listRoles"] + + +def test_analyst_cannot_list_roles(): + """'access':'ro' user cannot query permissions { listRoles }.""" + work_dir = tempfile.mkdtemp() + with make_server(work_dir).start(): + create_role("analyst") + + response = gql("query { permissions { listRoles } }", headers=ANALYST_HEADERS) + assert "errors" in response + assert "Access denied" in response["errors"][0]["message"] + + +def test_admin_can_get_role(): + """'access':'rw' admin can query permissions { getRole(...) }.""" + work_dir = tempfile.mkdtemp() + with make_server(work_dir).start(): + create_role("analyst") + grant_graph("analyst", "jira", "READ") + + response = gql( + 'query { permissions { getRole(name: "analyst") { name graphs { path permission } } } }', + headers=ADMIN_HEADERS, + ) + assert "errors" not in response, response + role_data = response["data"]["permissions"]["getRole"] + assert role_data["name"] == "analyst" + assert role_data["graphs"][0]["path"] == "jira" + assert role_data["graphs"][0]["permission"] == "READ" + + +def test_analyst_cannot_get_role(): + """'access':'ro' user cannot query permissions { getRole(...) }.""" + work_dir = tempfile.mkdtemp() + with make_server(work_dir).start(): + create_role("analyst") + + response = gql( + 'query { permissions { getRole(name: "analyst") { name } } }', + headers=ANALYST_HEADERS, + ) + assert "errors" in response + assert "Access denied" in response["errors"][0]["message"] + + +def test_introspect_only_cannot_access_graph_data(): + """Namespace INTROSPECT is denied by graph() — READ is required to access graph data.""" + work_dir = tempfile.mkdtemp() + with make_server(work_dir).start(): + gql(CREATE_TEAM_JIRA) + create_role("analyst") + grant_namespace("analyst", "team", "INTROSPECT") # no READ + + response = gql(QUERY_TEAM_JIRA, headers=ANALYST_HEADERS) + assert "errors" not in response, response + assert response["data"]["graph"] is None + + +def test_no_grant_hidden_from_namespace_and_graph(): + """A role with no namespace INTROSPECT sees graph() as null, not an 'Access denied' error. + + Returning an error would leak that the graph exists. Null is indistinguishable from + 'graph not found'. An error is only appropriate when the role already has INTROSPECT + on the namespace (and therefore can list the graph name anyway). + """ + work_dir = tempfile.mkdtemp() + with make_server(work_dir).start(): + gql(CREATE_JIRA) + create_role("analyst") + # analyst has no grant at all + + # graph() returns null silently — does not reveal the graph exists + response = gql(QUERY_JIRA, headers=ANALYST_HEADERS) + assert "errors" not in response, response + assert response["data"]["graph"] is None + + # namespace listing hides it + response = gql(QUERY_NS_GRAPHS, headers=ANALYST_HEADERS) + assert "errors" not in response, response + paths = [g["path"] for g in response["data"]["root"]["graphs"]["list"]] + assert "jira" not in paths + + +def test_grantgraph_introspect_rejected(): + """grantGraph with INTROSPECT permission is rejected — INTROSPECT is namespace-only.""" + work_dir = tempfile.mkdtemp() + with make_server(work_dir).start(): + gql(CREATE_JIRA) + create_role("analyst") + + response = gql( + 'mutation { permissions { grantGraph(role: "analyst", path: "jira", permission: INTROSPECT) { success } } }' + ) + assert "errors" in response + assert ( + "INTROSPECT cannot be granted on a graph" + in response["errors"][0]["message"] + ) + + +def test_graph_metadata_allowed_with_introspect(): + """graphMetadata is accessible with INTROSPECT permission (namespace grant).""" + work_dir = tempfile.mkdtemp() + with make_server(work_dir).start(): + gql(CREATE_TEAM_JIRA) + create_role("analyst") + grant_namespace("analyst", "team", "INTROSPECT") + + response = gql( + 'query { graphMetadata(path: "team/jira") { path nodeCount } }', + headers=ANALYST_HEADERS, + ) + assert "errors" not in response, response + assert response["data"]["graphMetadata"]["path"] == "team/jira" + + # graph() returns null — INTROSPECT does not grant data access + response = gql(QUERY_TEAM_JIRA, headers=ANALYST_HEADERS) + assert "errors" not in response, response + assert response["data"]["graph"] is None + + +def test_graph_metadata_allowed_with_read(): + """graphMetadata is also accessible with READ.""" + work_dir = tempfile.mkdtemp() + with make_server(work_dir).start(): + gql(CREATE_JIRA) + create_role("analyst") + grant_graph("analyst", "jira", "READ") + + response = gql(QUERY_META_JIRA, headers=ANALYST_HEADERS) + assert "errors" not in response, response + assert response["data"]["graphMetadata"]["path"] == "jira" + + +def test_graph_metadata_denied_without_grant(): + """graphMetadata is denied when the role has no grant on the graph.""" + work_dir = tempfile.mkdtemp() + with make_server(work_dir).start(): + gql(CREATE_JIRA) + create_role("analyst") + # no grant on jira + + response = gql(QUERY_META_JIRA, headers=ANALYST_HEADERS) + assert "errors" not in response, response + assert response["data"]["graphMetadata"] is None + + +def test_analyst_sees_only_filtered_nodes(): + """grantGraphFilteredReadOnly applies a node filter transparently for the role. + + Admin sees all nodes; analyst only sees nodes matching the stored filter. + Calling grantGraph(READ) clears the filter and restores full access. + """ + work_dir = tempfile.mkdtemp() + with make_server(work_dir).start(): + # Create graph and add nodes with a "region" property + gql(CREATE_JIRA) + for name, region in [ + ("alice", "us-west"), + ("bob", "us-east"), + ("carol", "us-west"), + ]: + resp = gql(f"""query {{ + updateGraph(path: "jira") {{ + addNode( + time: 1, + name: "{name}", + properties: [{{ key: "region", value: {{ str: "{region}" }} }}] + ) {{ + success + node {{ + name + }} + }} + }} + }}""") + assert resp["data"]["updateGraph"]["addNode"]["success"] is True, resp + + create_role("analyst") + # Grant filtered read-only: analyst only sees nodes where region = "us-west" + grant_graph_filtered_read_only( + "analyst", + "jira", + '{ node: { property: { name: "region", where: { eq: { str: "us-west" } } } } }', + ) + + QUERY_NODES = 'query { graph(path: "jira") { nodes { list { name } } } }' + + # Analyst should only see alice and carol (region=us-west) + analyst_response = gql(QUERY_NODES, headers=ANALYST_HEADERS) + assert "errors" not in analyst_response, analyst_response + analyst_names = { + n["name"] for n in analyst_response["data"]["graph"]["nodes"]["list"] + } + assert analyst_names == { + "alice", + "carol", + }, f"expected {{alice, carol}}, got {analyst_names}" + + # Admin should see all three nodes (filter is bypassed for "access":"rw") + admin_response = gql(QUERY_NODES, headers=ADMIN_HEADERS) + assert "errors" not in admin_response, admin_response + admin_names = { + n["name"] for n in admin_response["data"]["graph"]["nodes"]["list"] + } + assert admin_names == { + "alice", + "bob", + "carol", + }, f"expected all 3 nodes, got {admin_names}" + + # Clear the filter by calling grantGraph(READ) — analyst should now see all nodes + grant_graph("analyst", "jira", "READ") + analyst_response_after = gql(QUERY_NODES, headers=ANALYST_HEADERS) + assert "errors" not in analyst_response_after, analyst_response_after + names_after = { + n["name"] for n in analyst_response_after["data"]["graph"]["nodes"]["list"] + } + assert names_after == { + "alice", + "bob", + "carol", + }, f"after plain grant, expected all 3 nodes, got {names_after}" + + +def test_analyst_sees_only_filtered_edges(): + """grantGraphFilteredReadOnly with an edge filter hides edges that don't match. + + Edges with weight >= 5 are visible; edges with weight < 5 are hidden. + Admin bypasses the filter and sees all edges. + """ + work_dir = tempfile.mkdtemp() + with make_server(work_dir).start(): + gql(CREATE_JIRA) + # Add three edges: (a->b weight=3), (b->c weight=7), (a->c weight=9) + for src, dst, weight in [("a", "b", 3), ("b", "c", 7), ("a", "c", 9)]: + resp = gql(f"""query {{ + updateGraph(path: "jira") {{ + addEdge( + time: 1, + src: "{src}", + dst: "{dst}", + properties: [{{ key: "weight", value: {{ i64: {weight} }} }}] + ) {{ + success + edge {{ + src {{ name }} + dst {{ name }} + }} + }} + }} + }}""") + assert resp["data"]["updateGraph"]["addEdge"]["success"] is True, resp + + create_role("analyst") + # Only show edges where weight >= 5 + grant_graph_filtered_read_only( + "analyst", + "jira", + '{ edge: { property: { name: "weight", where: { ge: { i64: 5 } } } } }', + ) + + QUERY_EDGES = 'query { graph(path: "jira") { edges { list { src { name } dst { name } } } } }' + + analyst_response = gql(QUERY_EDGES, headers=ANALYST_HEADERS) + assert "errors" not in analyst_response, analyst_response + analyst_edges = { + (e["src"]["name"], e["dst"]["name"]) + for e in analyst_response["data"]["graph"]["edges"]["list"] + } + assert analyst_edges == { + ("b", "c"), + ("a", "c"), + }, f"expected only heavy edges, got {analyst_edges}" + + # Admin sees all three edges + admin_response = gql(QUERY_EDGES, headers=ADMIN_HEADERS) + assert "errors" not in admin_response, admin_response + admin_edges = { + (e["src"]["name"], e["dst"]["name"]) + for e in admin_response["data"]["graph"]["edges"]["list"] + } + assert admin_edges == { + ("a", "b"), + ("b", "c"), + ("a", "c"), + }, f"expected all edges for admin, got {admin_edges}" + + +def test_raphtory_client_analyst_can_query_permitted_graph(): + """RaphtoryClient with analyst role can query a graph it has READ access to.""" + work_dir = tempfile.mkdtemp() + with make_server(work_dir).start(): + gql(CREATE_JIRA) + create_role("analyst") + grant_graph("analyst", "jira", "READ") + + client = RaphtoryClient(url=RAPHTORY, token=ANALYST_JWT) + result = client.query(QUERY_JIRA) + assert result["graph"]["path"] == "jira" + + +def test_raphtory_client_analyst_denied_unpermitted_graph(): + """RaphtoryClient with analyst role gets null for a graph it has no grant for.""" + work_dir = tempfile.mkdtemp() + with make_server(work_dir).start(): + gql(CREATE_JIRA) + create_role("analyst") + # No grant on jira — graph returns null (indistinguishable from "graph not found") + + client = RaphtoryClient(url=RAPHTORY, token=ANALYST_JWT) + response = client.query(QUERY_JIRA) + assert response["graph"] is None + + +def test_raphtory_client_analyst_write_with_write_grant(): + """RaphtoryClient with analyst role and WRITE grant can add nodes via remote_graph.""" + work_dir = tempfile.mkdtemp() + with make_server(work_dir).start(): + gql(CREATE_JIRA) + create_role("analyst") + grant_graph("analyst", "jira", "WRITE") + + client = RaphtoryClient(url=RAPHTORY, token=ANALYST_JWT) + client.remote_graph("jira").add_node(1, "client_node") + + client2 = RaphtoryClient(url=RAPHTORY, token=ADMIN_JWT) + received = client2.receive_graph("jira") + assert received.node("client_node") is not None + + +def test_raphtory_client_analyst_write_denied_without_write_grant(): + """RaphtoryClient with analyst role and READ-only grant cannot add nodes via remote_graph.""" + work_dir = tempfile.mkdtemp() + with make_server(work_dir).start(): + gql(CREATE_JIRA) + create_role("analyst") + grant_graph("analyst", "jira", "READ") + + client = RaphtoryClient(url=RAPHTORY, token=ANALYST_JWT) + with pytest.raises(Exception, match="Access denied"): + client.remote_graph("jira").add_node(1, "client_node") + + +def test_receive_graph_requires_read(): + """receive_graph (graph download) requires at least READ; namespace INTROSPECT is not enough.""" + work_dir = tempfile.mkdtemp() + with make_server(work_dir).start(): + gql(CREATE_TEAM_JIRA) + create_role("analyst") + + # No grant — looks like the graph doesn't exist (no information leakage) + client = RaphtoryClient(url=RAPHTORY, token=ANALYST_JWT) + with pytest.raises(Exception, match="does not exist"): + client.receive_graph("team/jira") + + # Namespace INTROSPECT only — also denied for receive_graph, but now reveals access denied + grant_namespace("analyst", "team", "INTROSPECT") + with pytest.raises(Exception, match="Access denied"): + client.receive_graph("team/jira") + + # READ — allowed + grant_namespace("analyst", "team", "READ") + g = client.receive_graph("team/jira") + assert g is not None + + +def test_receive_graph_without_introspect_hides_existence(): + """Without namespace INTROSPECT, receive_graph acts as if the graph does not exist. + + This prevents information leakage: a role without any grants cannot distinguish + between 'graph does not exist' and 'graph exists but you are denied'. + """ + work_dir = tempfile.mkdtemp() + with make_server(work_dir).start(): + gql(CREATE_TEAM_JIRA) + create_role("analyst") + + client = RaphtoryClient(url=RAPHTORY, token=ANALYST_JWT) + + # No grants at all — error must be indistinguishable from a missing graph + with pytest.raises(Exception, match="does not exist") as exc_no_grant: + client.receive_graph("team/jira") + + # Compare with a truly non-existent graph — error should look the same + with pytest.raises(Exception, match="does not exist") as exc_missing: + client.receive_graph("team/nonexistent") + + assert "Access denied" not in str(exc_no_grant.value) + assert "Access denied" not in str(exc_missing.value) + + +def test_receive_graph_with_filtered_access(): + """receive_graph with grantGraphFilteredReadOnly returns a materialized view of the filtered graph. + + The downloaded graph should only contain nodes/edges that pass the stored filter, + not the full unfiltered graph. + """ + work_dir = tempfile.mkdtemp() + with make_server(work_dir).start(): + gql(CREATE_JIRA) + for name, region in [ + ("alice", "us-west"), + ("bob", "us-east"), + ("carol", "us-west"), + ]: + resp = gql(f"""query {{ + updateGraph(path: "jira") {{ + addNode( + time: 1, + name: "{name}", + properties: [{{ key: "region", value: {{ str: "{region}" }} }}] + ) {{ success }} + }} + }}""") + assert resp["data"]["updateGraph"]["addNode"]["success"] is True, resp + + create_role("analyst") + grant_graph_filtered_read_only( + "analyst", + "jira", + '{ node: { property: { name: "region", where: { eq: { str: "us-west" } } } } }', + ) + + client = RaphtoryClient(url=RAPHTORY, token=ANALYST_JWT) + received = client.receive_graph("jira") + + names = {n.name for n in received.nodes} + assert names == {"alice", "carol"}, f"Expected only us-west nodes, got: {names}" + assert "bob" not in names + + +def test_analyst_sees_only_graph_filter_window(): + """grantGraphFilteredReadOnly with a graph-level window filter restricts the temporal view. + + Nodes added inside the window [5, 15) are visible; those outside are not. + Admin bypasses the filter and sees all nodes. + """ + work_dir = tempfile.mkdtemp() + with make_server(work_dir).start(): + gql(CREATE_JIRA) + # Add nodes at different timestamps: t=1 (outside), t=10 (inside), t=20 (outside) + for name, t in [("early", 1), ("middle", 10), ("late", 20)]: + resp = gql(f"""query {{ + updateGraph(path: "jira") {{ + addNode(time: {t}, name: "{name}") {{ + success + node {{ + name + }} + }} + }} + }}""") + assert resp["data"]["updateGraph"]["addNode"]["success"] is True, resp + + create_role("analyst") + # Window [5, 15) — only "middle" (t=10) falls inside + grant_graph_filtered_read_only( + "analyst", + "jira", + "{ graph: { window: { start: 5, end: 15 } } }", + ) + + QUERY_NODES = 'query { graph(path: "jira") { nodes { list { name } } } }' + + analyst_response = gql(QUERY_NODES, headers=ANALYST_HEADERS) + assert "errors" not in analyst_response, analyst_response + analyst_names = { + n["name"] for n in analyst_response["data"]["graph"]["nodes"]["list"] + } + assert analyst_names == { + "middle" + }, f"expected only 'middle' in window, got {analyst_names}" + + # Admin sees all three nodes + admin_response = gql(QUERY_NODES, headers=ADMIN_HEADERS) + assert "errors" not in admin_response, admin_response + admin_names = { + n["name"] for n in admin_response["data"]["graph"]["nodes"]["list"] + } + assert admin_names == { + "early", + "middle", + "late", + }, f"expected all nodes for admin, got {admin_names}" + + +# --- Filter composition (And / Or) tests --- + + +def test_filter_and_node_node(): + """And([node, node]): both node predicates must match (intersection).""" + work_dir = tempfile.mkdtemp() + with make_server(work_dir).start(): + gql(CREATE_JIRA) + for name, region, role in [ + ("alice", "us-west", "admin"), + ("bob", "us-east", "admin"), + ("carol", "us-west", "user"), + ]: + resp = gql(f"""query {{ + updateGraph(path: "jira") {{ + addNode( + time: 1, name: "{name}", + properties: [ + {{ key: "region", value: {{ str: "{region}" }} }}, + {{ key: "role", value: {{ str: "{role}" }} }} + ] + ) {{ success }} + }} + }}""") + assert resp["data"]["updateGraph"]["addNode"]["success"] is True, resp + + create_role("analyst") + # region=us-west AND role=admin → only alice + grant_graph_filtered_read_only( + "analyst", + "jira", + "{ and: [" + '{ node: { property: { name: "region", where: { eq: { str: "us-west" } } } } },' + '{ node: { property: { name: "role", where: { eq: { str: "admin" } } } } }' + "] }", + ) + + QUERY_NODES = 'query { graph(path: "jira") { nodes { list { name } } } }' + analyst_names = { + n["name"] + for n in gql(QUERY_NODES, headers=ANALYST_HEADERS)["data"]["graph"][ + "nodes" + ]["list"] + } + assert analyst_names == {"alice"}, f"expected only alice, got {analyst_names}" + + +def test_filter_and_edge_edge(): + """And([edge, edge]): both edge predicates must match (intersection).""" + work_dir = tempfile.mkdtemp() + with make_server(work_dir).start(): + gql(CREATE_JIRA) + for src, dst, weight, kind in [ + ("a", "b", 3, "follows"), + ("b", "c", 7, "mentions"), + ("a", "c", 9, "follows"), + ]: + resp = gql(f"""query {{ + updateGraph(path: "jira") {{ + addEdge( + time: 1, src: "{src}", dst: "{dst}", + properties: [ + {{ key: "weight", value: {{ i64: {weight} }} }}, + {{ key: "kind", value: {{ str: "{kind}" }} }} + ] + ) {{ success }} + }} + }}""") + assert resp["data"]["updateGraph"]["addEdge"]["success"] is True, resp + + create_role("analyst") + # weight >= 5 AND kind=follows → only (a,c) weight=9 follows + grant_graph_filtered_read_only( + "analyst", + "jira", + "{ and: [" + '{ edge: { property: { name: "weight", where: { ge: { i64: 5 } } } } },' + '{ edge: { property: { name: "kind", where: { eq: { str: "follows" } } } } }' + "] }", + ) + + QUERY_EDGES = 'query { graph(path: "jira") { edges { list { src { name } dst { name } } } } }' + analyst_edges = { + (e["src"]["name"], e["dst"]["name"]) + for e in gql(QUERY_EDGES, headers=ANALYST_HEADERS)["data"]["graph"][ + "edges" + ]["list"] + } + assert analyst_edges == { + ("a", "c") + }, f"expected only (a,c), got {analyst_edges}" + + +def test_filter_and_graph_graph(): + """And([graph, graph]): two graph-level views intersect (sequential narrowing).""" + work_dir = tempfile.mkdtemp() + with make_server(work_dir).start(): + gql(CREATE_JIRA) + for name, t in [("early", 1), ("middle", 10), ("late", 20)]: + resp = gql(f"""query {{ + updateGraph(path: "jira") {{ + addNode(time: {t}, name: "{name}") {{ success }} + }} + }}""") + assert resp["data"]["updateGraph"]["addNode"]["success"] is True, resp + + create_role("analyst") + # window [1,15) ∩ window [5,25) → effective [5,15) → only middle (t=10) + grant_graph_filtered_read_only( + "analyst", + "jira", + "{ and: [" + "{ graph: { window: { start: 1, end: 15 } } }," + "{ graph: { window: { start: 5, end: 25 } } }" + "] }", + ) + + QUERY_NODES = 'query { graph(path: "jira") { nodes { list { name } } } }' + analyst_names = { + n["name"] + for n in gql(QUERY_NODES, headers=ANALYST_HEADERS)["data"]["graph"][ + "nodes" + ]["list"] + } + assert analyst_names == {"middle"}, f"expected only middle, got {analyst_names}" + + +def test_filter_and_node_edge(): + """And([node, edge]): node filter applied first restricts nodes (and their edges), then edge filter further restricts.""" + work_dir = tempfile.mkdtemp() + with make_server(work_dir).start(): + gql(CREATE_JIRA) + for name, region in [ + ("alice", "us-west"), + ("bob", "us-east"), + ("carol", "us-west"), + ]: + resp = gql(f"""query {{ + updateGraph(path: "jira") {{ + addNode( + time: 1, name: "{name}", + properties: [{{ key: "region", value: {{ str: "{region}" }} }}] + ) {{ success }} + }} + }}""") + assert resp["data"]["updateGraph"]["addNode"]["success"] is True, resp + + for src, dst, weight in [ + ("alice", "bob", 3), + ("alice", "carol", 7), + ("bob", "carol", 9), + ]: + resp = gql(f"""query {{ + updateGraph(path: "jira") {{ + addEdge( + time: 1, src: "{src}", dst: "{dst}", + properties: [{{ key: "weight", value: {{ i64: {weight} }} }}] + ) {{ success }} + }} + }}""") + assert resp["data"]["updateGraph"]["addEdge"]["success"] is True, resp + + create_role("analyst") + # Node(us-west) applied first: bob hidden, bob's edges hidden. + # Then Edge(weight≥5): of remaining edges (alice→carol weight=7), only alice→carol passes. + grant_graph_filtered_read_only( + "analyst", + "jira", + "{ and: [" + '{ node: { property: { name: "region", where: { eq: { str: "us-west" } } } } },' + '{ edge: { property: { name: "weight", where: { ge: { i64: 5 } } } } }' + "] }", + ) + + QUERY_NODES = 'query { graph(path: "jira") { nodes { list { name } } } }' + QUERY_EDGES = 'query { graph(path: "jira") { edges { list { src { name } dst { name } } } } }' + + analyst_names = { + n["name"] + for n in gql(QUERY_NODES, headers=ANALYST_HEADERS)["data"]["graph"][ + "nodes" + ]["list"] + } + assert analyst_names == { + "alice", + "carol", + }, f"expected us-west nodes, got {analyst_names}" + + analyst_edges = { + (e["src"]["name"], e["dst"]["name"]) + for e in gql(QUERY_EDGES, headers=ANALYST_HEADERS)["data"]["graph"][ + "edges" + ]["list"] + } + # Sequential And: Node(us-west) hides bob and bob's edges, then Edge(weight≥5) keeps alice→carol (7). + assert analyst_edges == { + ("alice", "carol"), + }, f"expected only (alice,carol), got {analyst_edges}" + + +def test_filter_and_node_graph(): + """And([node, graph]): node property filter combined with a graph window.""" + work_dir = tempfile.mkdtemp() + with make_server(work_dir).start(): + gql(CREATE_JIRA) + for name, region, t in [ + ("alice", "us-west", 1), + ("bob", "us-west", 10), + ("carol", "us-east", 10), + ]: + resp = gql(f"""query {{ + updateGraph(path: "jira") {{ + addNode( + time: {t}, name: "{name}", + properties: [{{ key: "region", value: {{ str: "{region}" }} }}] + ) {{ success }} + }} + }}""") + assert resp["data"]["updateGraph"]["addNode"]["success"] is True, resp + + create_role("analyst") + # window [5,15): bob(t=10) + carol(t=10); then node us-west → only bob + grant_graph_filtered_read_only( + "analyst", + "jira", + "{ and: [" + "{ graph: { window: { start: 5, end: 15 } } }," + '{ node: { property: { name: "region", where: { eq: { str: "us-west" } } } } }' + "] }", + ) + + QUERY_NODES = 'query { graph(path: "jira") { nodes { list { name } } } }' + analyst_names = { + n["name"] + for n in gql(QUERY_NODES, headers=ANALYST_HEADERS)["data"]["graph"][ + "nodes" + ]["list"] + } + assert analyst_names == {"bob"}, f"expected only bob, got {analyst_names}" + + +def test_filter_and_edge_graph(): + """And([edge, graph]): edge property filter combined with a graph window.""" + work_dir = tempfile.mkdtemp() + with make_server(work_dir).start(): + gql(CREATE_JIRA) + for src, dst, weight, t in [ + ("a", "b", 3, 1), + ("b", "c", 7, 10), + ("a", "c", 9, 20), + ]: + resp = gql(f"""query {{ + updateGraph(path: "jira") {{ + addEdge( + time: {t}, src: "{src}", dst: "{dst}", + properties: [{{ key: "weight", value: {{ i64: {weight} }} }}] + ) {{ success }} + }} + }}""") + assert resp["data"]["updateGraph"]["addEdge"]["success"] is True, resp + + create_role("analyst") + # window [5,15): b→c(t=10); then edge weight≥5 → b→c(weight=7) passes + grant_graph_filtered_read_only( + "analyst", + "jira", + "{ and: [" + "{ graph: { window: { start: 5, end: 15 } } }," + '{ edge: { property: { name: "weight", where: { ge: { i64: 5 } } } } }' + "] }", + ) + + QUERY_EDGES = 'query { graph(path: "jira") { edges { list { src { name } dst { name } } } } }' + analyst_edges = { + (e["src"]["name"], e["dst"]["name"]) + for e in gql(QUERY_EDGES, headers=ANALYST_HEADERS)["data"]["graph"][ + "edges" + ]["list"] + } + assert analyst_edges == { + ("b", "c") + }, f"expected only (b,c), got {analyst_edges}" + + +def test_filter_or_node_node(): + """Or([node, node]): nodes matching either predicate are visible (union).""" + work_dir = tempfile.mkdtemp() + with make_server(work_dir).start(): + gql(CREATE_JIRA) + for name, region in [("alice", "us-west"), ("bob", "us-east"), ("carol", "eu")]: + resp = gql(f"""query {{ + updateGraph(path: "jira") {{ + addNode( + time: 1, name: "{name}", + properties: [{{ key: "region", value: {{ str: "{region}" }} }}] + ) {{ success }} + }} + }}""") + assert resp["data"]["updateGraph"]["addNode"]["success"] is True, resp + + create_role("analyst") + # us-west OR us-east → alice + bob; carol(eu) filtered out + grant_graph_filtered_read_only( + "analyst", + "jira", + "{ or: [" + '{ node: { property: { name: "region", where: { eq: { str: "us-west" } } } } },' + '{ node: { property: { name: "region", where: { eq: { str: "us-east" } } } } }' + "] }", + ) + + QUERY_NODES = 'query { graph(path: "jira") { nodes { list { name } } } }' + analyst_names = { + n["name"] + for n in gql(QUERY_NODES, headers=ANALYST_HEADERS)["data"]["graph"][ + "nodes" + ]["list"] + } + assert analyst_names == { + "alice", + "bob", + }, f"expected alice+bob, got {analyst_names}" + + +def test_filter_or_edge_edge(): + """Or([edge, edge]): edges matching either predicate are visible (union).""" + work_dir = tempfile.mkdtemp() + with make_server(work_dir).start(): + gql(CREATE_JIRA) + for src, dst, weight in [("a", "b", 3), ("b", "c", 7), ("a", "c", 9)]: + resp = gql(f"""query {{ + updateGraph(path: "jira") {{ + addEdge( + time: 1, src: "{src}", dst: "{dst}", + properties: [{{ key: "weight", value: {{ i64: {weight} }} }}] + ) {{ success }} + }} + }}""") + assert resp["data"]["updateGraph"]["addEdge"]["success"] is True, resp + + create_role("analyst") + # weight=3 OR weight=9 → (a,b) + (a,c); (b,c) weight=7 filtered out + grant_graph_filtered_read_only( + "analyst", + "jira", + "{ or: [" + '{ edge: { property: { name: "weight", where: { eq: { i64: 3 } } } } },' + '{ edge: { property: { name: "weight", where: { eq: { i64: 9 } } } } }' + "] }", + ) + + QUERY_EDGES = 'query { graph(path: "jira") { edges { list { src { name } dst { name } } } } }' + analyst_edges = { + (e["src"]["name"], e["dst"]["name"]) + for e in gql(QUERY_EDGES, headers=ANALYST_HEADERS)["data"]["graph"][ + "edges" + ]["list"] + } + assert analyst_edges == { + ("a", "b"), + ("a", "c"), + }, f"expected (a,b)+(a,c), got {analyst_edges}" + + +# --- Namespace permission tests --- + + +def test_namespace_introspect_shows_graphs_in_listing(): + """grantNamespace INTROSPECT: graphs appear in namespace listing but graph() is denied.""" + work_dir = tempfile.mkdtemp() + with make_server(work_dir).start(): + gql(CREATE_TEAM_JIRA) + create_role("analyst") + grant_namespace("analyst", "team", "INTROSPECT") + + # Graphs visible as MetaGraph in namespace listing + response = gql(QUERY_TEAM_GRAPHS, headers=ANALYST_HEADERS) + assert "errors" not in response, response + paths = [g["path"] for g in response["data"]["namespace"]["graphs"]["list"]] + assert "team/jira" in paths + + # Direct graph access returns null — INTROSPECT does not grant data access. + response = gql(QUERY_TEAM_JIRA, headers=ANALYST_HEADERS) + assert "errors" not in response, response + assert response["data"]["graph"] is None + + +def test_namespace_read_exposes_graphs(): + """grantNamespace READ: graphs in the namespace are fully accessible via graph().""" + work_dir = tempfile.mkdtemp() + with make_server(work_dir).start(): + gql(CREATE_TEAM_JIRA) + create_role("analyst") + grant_namespace("analyst", "team", "READ") + + response = gql(QUERY_TEAM_JIRA, headers=ANALYST_HEADERS) + assert "errors" not in response, response + assert response["data"]["graph"]["path"] == "team/jira" + + +def test_child_namespace_restriction_overrides_parent(): + """More-specific child namespace grant overrides a broader parent grant. + + team → READ (parent) + team/restricted → INTROSPECT (child — more specific, should win) + + Graphs under team/jira are reachable via READ (only parent matches). + Graphs under team/restricted/ are only introspectable — the child INTROSPECT + entry overrides the parent READ, so graph() is denied there. + """ + work_dir = tempfile.mkdtemp() + with make_server(work_dir).start(): + gql(CREATE_TEAM_JIRA) + gql("""mutation { newGraph(path:"team/restricted/secret", graphType:EVENT) }""") + create_role("analyst") + grant_namespace("analyst", "team", "READ") + grant_namespace("analyst", "team/restricted", "INTROSPECT") + + # team/jira: only matched by "team" → READ — direct access allowed + response = gql(QUERY_TEAM_JIRA, headers=ANALYST_HEADERS) + assert "errors" not in response, response + assert response["data"]["graph"]["path"] == "team/jira" + + # team/restricted/secret: "team/restricted" is the most specific match → INTROSPECT only + response = gql( + """query { graph(path: "team/restricted/secret") { path } }""", + headers=ANALYST_HEADERS, + ) + assert "errors" not in response, response + assert response["data"]["graph"] is None + + # But team/restricted/secret should still appear in the namespace listing + response = gql( + """query { namespace(path: "team/restricted") { graphs { list { path } } } }""", + headers=ANALYST_HEADERS, + ) + assert "errors" not in response, response + paths = [g["path"] for g in response["data"]["namespace"]["graphs"]["list"]] + assert "team/restricted/secret" in paths + + +def test_discover_derivation(): + """grantGraph READ on a namespaced graph → ancestor namespace gets DISCOVER (visible in children).""" + work_dir = tempfile.mkdtemp() + with make_server(work_dir).start(): + gql(CREATE_TEAM_JIRA) + create_role("analyst") + grant_graph("analyst", "team/jira", "READ") # no explicit namespace grant + + # "team" namespace appears in root children due to DISCOVER derivation + response = gql(QUERY_NS_CHILDREN, headers=ANALYST_HEADERS) + assert "errors" not in response, response + paths = [n["path"] for n in response["data"]["root"]["children"]["list"]] + assert "team" in paths + + +def test_discover_revoked_when_only_child_revoked(): + """Revoking the only child READ grant removes DISCOVER from the parent namespace.""" + work_dir = tempfile.mkdtemp() + with make_server(work_dir).start(): + gql(CREATE_TEAM_JIRA) + create_role("analyst") + grant_graph("analyst", "team/jira", "READ") + + paths = [ + n["path"] + for n in gql(QUERY_NS_CHILDREN, headers=ANALYST_HEADERS)["data"]["root"][ + "children" + ]["list"] + ] + assert "team" in paths # baseline: DISCOVER present + + revoke_graph("analyst", "team/jira") + + paths = [ + n["path"] + for n in gql(QUERY_NS_CHILDREN, headers=ANALYST_HEADERS)["data"]["root"][ + "children" + ]["list"] + ] + assert "team" not in paths # DISCOVER gone + + +def test_discover_stays_when_one_of_two_children_revoked(): + """DISCOVER persists while at least one child grant remains; clears only when all are revoked.""" + work_dir = tempfile.mkdtemp() + with make_server(work_dir).start(): + gql(CREATE_TEAM_JIRA) + gql(CREATE_TEAM_CONFLUENCE) + create_role("analyst") + grant_graph("analyst", "team/jira", "READ") + grant_graph("analyst", "team/confluence", "READ") + + revoke_graph("analyst", "team/jira") + paths = [ + n["path"] + for n in gql(QUERY_NS_CHILDREN, headers=ANALYST_HEADERS)["data"]["root"][ + "children" + ]["list"] + ] + assert "team" in paths # still visible via team/confluence + + revoke_graph("analyst", "team/confluence") + paths = [ + n["path"] + for n in gql(QUERY_NS_CHILDREN, headers=ANALYST_HEADERS)["data"]["root"][ + "children" + ]["list"] + ] + assert "team" not in paths # now gone + + +def test_discover_stays_when_parent_has_explicit_namespace_read(): + """Revoking a child graph READ does not remove an explicit namespace READ on the parent.""" + work_dir = tempfile.mkdtemp() + with make_server(work_dir).start(): + gql(CREATE_TEAM_JIRA) + create_role("analyst") + grant_graph("analyst", "team/jira", "READ") + grant_namespace("analyst", "team", "READ") # explicit, higher than DISCOVER + + revoke_graph("analyst", "team/jira") + + paths = [ + n["path"] + for n in gql(QUERY_NS_CHILDREN, headers=ANALYST_HEADERS)["data"]["root"][ + "children" + ]["list"] + ] + assert "team" in paths # still visible via explicit namespace READ + + +def test_discover_revoked_for_nested_namespaces(): + """Revoking the only deep grant removes DISCOVER from all ancestor namespaces.""" + work_dir = tempfile.mkdtemp() + with make_server(work_dir).start(): + gql(CREATE_DEEP) + create_role("analyst") + grant_graph("analyst", "a/b/c", "READ") # "a" and "a/b" both get DISCOVER + + root_paths = [ + n["path"] + for n in gql(QUERY_NS_CHILDREN, headers=ANALYST_HEADERS)["data"]["root"][ + "children" + ]["list"] + ] + assert "a" in root_paths + + a_paths = [ + n["path"] + for n in gql(QUERY_A_CHILDREN, headers=ANALYST_HEADERS)["data"][ + "namespace" + ]["children"]["list"] + ] + assert "a/b" in a_paths + + revoke_graph("analyst", "a/b/c") + + root_paths = [ + n["path"] + for n in gql(QUERY_NS_CHILDREN, headers=ANALYST_HEADERS)["data"]["root"][ + "children" + ]["list"] + ] + assert "a" not in root_paths + + a_paths = [ + n["path"] + for n in gql(QUERY_A_CHILDREN, headers=ANALYST_HEADERS)["data"][ + "namespace" + ]["children"]["list"] + ] + assert "a/b" not in a_paths + + +def test_no_namespace_grant_hidden_from_children(): + """No grants at all → namespace is hidden from root children listing.""" + work_dir = tempfile.mkdtemp() + with make_server(work_dir).start(): + gql(CREATE_TEAM_JIRA) + create_role("analyst") + # analyst has no grants at all + + response = gql(QUERY_NS_CHILDREN, headers=ANALYST_HEADERS) + assert "errors" not in response, response + paths = [n["path"] for n in response["data"]["root"]["children"]["list"]] + assert "team" not in paths + + +# --- deleteGraph / sendGraph policy delegation --- + +DELETE_JIRA = """mutation { deleteGraph(path: "jira") }""" +DELETE_TEAM_JIRA = """mutation { deleteGraph(path: "team/jira") }""" + + +def test_analyst_can_delete_with_graph_and_namespace_write(): + """deleteGraph requires WRITE on both the graph and its parent namespace.""" + work_dir = tempfile.mkdtemp() + with make_server(work_dir).start(): + gql(CREATE_TEAM_JIRA) + create_role("analyst") + grant_graph("analyst", "team/jira", "WRITE") + grant_namespace("analyst", "team", "WRITE") + + response = gql(DELETE_TEAM_JIRA, headers=ANALYST_HEADERS) + assert "errors" not in response, response + assert response["data"]["deleteGraph"] is True + + +def test_analyst_cannot_delete_with_graph_write_only(): + """Graph WRITE alone is insufficient for deleteGraph — namespace WRITE is also required.""" + work_dir = tempfile.mkdtemp() + with make_server(work_dir).start(): + gql(CREATE_JIRA) + create_role("analyst") + grant_graph("analyst", "jira", "WRITE") + + response = gql(DELETE_JIRA, headers=ANALYST_HEADERS) + assert "errors" in response + assert "Access denied" in response["errors"][0]["message"] + # Verify "jira" was not deleted as a side effect + check = gql(QUERY_JIRA) + assert check["data"]["graph"]["path"] == "jira" + + +def test_analyst_cannot_delete_with_read_grant(): + """'access':'ro' user with READ-only grant is denied by deleteGraph.""" + work_dir = tempfile.mkdtemp() + with make_server(work_dir).start(): + gql(CREATE_JIRA) + create_role("analyst") + grant_graph("analyst", "jira", "READ") + + response = gql(DELETE_JIRA, headers=ANALYST_HEADERS) + assert "errors" in response + assert "Access denied" in response["errors"][0]["message"] + # Verify "jira" was not deleted as a side effect + check = gql(QUERY_JIRA) + assert check["data"]["graph"]["path"] == "jira" + + +def test_analyst_can_delete_with_namespace_write(): + """'access':'ro' user with namespace WRITE (cascades to graph WRITE) can delete a graph.""" + work_dir = tempfile.mkdtemp() + with make_server(work_dir).start(): + gql(CREATE_TEAM_JIRA) + create_role("analyst") + grant_namespace("analyst", "team", "WRITE") + + response = gql(DELETE_TEAM_JIRA, headers=ANALYST_HEADERS) + assert "errors" not in response, response + assert response["data"]["deleteGraph"] is True + + +def test_analyst_cannot_send_graph_without_namespace_write(): + """'access':'ro' user without namespace WRITE is denied by sendGraph.""" + work_dir = tempfile.mkdtemp() + with make_server(work_dir).start(): + create_role("analyst") + grant_namespace("analyst", "team", "READ") # READ, not WRITE + + response = gql( + 'mutation { sendGraph(path: "team/new", graph: "dummydata", overwrite: false) }', + headers=ANALYST_HEADERS, + ) + assert "errors" in response + assert "Access denied" in response["errors"][0]["message"] + + +def test_analyst_send_graph_passes_auth_with_namespace_write(): + """'access':'ro' user with namespace WRITE passes the auth gate in sendGraph. + + The request fails on graph decoding (invalid data), not on access control — + proving the namespace WRITE check is honoured. + """ + work_dir = tempfile.mkdtemp() + with make_server(work_dir).start(): + create_role("analyst") + grant_namespace("analyst", "team", "WRITE") + + response = gql( + 'mutation { sendGraph(path: "team/new", graph: "not_valid_base64", overwrite: false) }', + headers=ANALYST_HEADERS, + ) + # Auth passed — error is about graph decoding, not access + assert "errors" in response + assert "Access denied" not in response["errors"][0]["message"] + + +def test_analyst_send_graph_valid_data_with_namespace_write(): + """'access':'ro' user with namespace WRITE can successfully send a valid graph via sendGraph. + + Admin creates a graph and downloads it; analyst with WRITE sends it to a new path. + The graph appears at the new path and its data matches the original. + """ + work_dir = tempfile.mkdtemp() + with make_server(work_dir).start(): + gql(CREATE_JIRA) + # Add a node so the graph has content to verify after the roundtrip + gql("""query { + updateGraph(path: "jira") { + addNode(time: 1, name: "alice", properties: []) { success } + } + }""") + + # Admin downloads the graph as valid base64 + encoded = gql('query { receiveGraph(path: "jira") }')["data"]["receiveGraph"] + + create_role("analyst") + grant_namespace("analyst", "team", "WRITE") + + # Analyst sends the encoded graph to a new path + response = gql( + f'mutation {{ sendGraph(path: "team/copy", graph: "{encoded}", overwrite: false) }}', + headers=ANALYST_HEADERS, + ) + assert "errors" not in response, response + assert response["data"]["sendGraph"] == "team/copy" + + # Verify the copy exists and contains the expected node + check = gql('query { graph(path: "team/copy") { nodes { list { name } } } }') + names = [n["name"] for n in check["data"]["graph"]["nodes"]["list"]] + assert "alice" in names + + +# --- moveGraph policy --- + +MOVE_TEAM_JIRA = """mutation { moveGraph(path: "team/jira", newPath: "team/jira-moved", overwrite: false) }""" + + +def test_analyst_can_move_with_graph_write_and_namespace_write(): + """moveGraph requires WRITE on the source graph and its parent namespace, plus WRITE on the destination namespace.""" + work_dir = tempfile.mkdtemp() + with make_server(work_dir).start(): + gql(CREATE_TEAM_JIRA) + create_role("analyst") + grant_graph("analyst", "team/jira", "WRITE") + grant_namespace("analyst", "team", "WRITE") + + response = gql(MOVE_TEAM_JIRA, headers=ANALYST_HEADERS) + assert "errors" not in response, response + assert response["data"]["moveGraph"] is True + + +def test_analyst_cannot_move_with_graph_write_only(): + """Graph WRITE alone is insufficient for moveGraph — namespace WRITE on source is also required.""" + work_dir = tempfile.mkdtemp() + with make_server(work_dir).start(): + gql(CREATE_TEAM_JIRA) + create_role("analyst") + grant_graph("analyst", "team/jira", "WRITE") + # no namespace grant → namespace WRITE check fails + + response = gql(MOVE_TEAM_JIRA, headers=ANALYST_HEADERS) + assert "errors" in response + assert "Access denied" in response["errors"][0]["message"] + # Verify "team/jira" still exists and "team/jira-moved" was not created + team_graphs = gql(QUERY_TEAM_GRAPHS)["data"]["namespace"]["graphs"]["list"] + paths = [g["path"] for g in team_graphs] + assert "team/jira" in paths + assert "team/jira-moved" not in paths + + +def test_analyst_cannot_move_with_read_grant(): + """READ on source graph is insufficient for moveGraph — WRITE is required.""" + work_dir = tempfile.mkdtemp() + with make_server(work_dir).start(): + gql(CREATE_TEAM_JIRA) + create_role("analyst") + grant_graph("analyst", "team/jira", "READ") + grant_namespace("analyst", "team", "WRITE") + + response = gql(MOVE_TEAM_JIRA, headers=ANALYST_HEADERS) + assert "errors" in response + assert "Access denied" in response["errors"][0]["message"] + # Verify "team/jira" still exists and "team/jira-moved" was not created + team_graphs = gql(QUERY_TEAM_GRAPHS)["data"]["namespace"]["graphs"]["list"] + paths = [g["path"] for g in team_graphs] + assert "team/jira" in paths + assert "team/jira-moved" not in paths + + +# --- newGraph namespace write enforcement --- + + +def test_analyst_can_create_namespaced_graph_with_namespace_write(): + """'access':'ro' user with namespace WRITE can create a graph inside that namespace.""" + work_dir = tempfile.mkdtemp() + with make_server(work_dir).start(): + create_role("analyst") + grant_namespace("analyst", "team", "WRITE") + + response = gql(CREATE_TEAM_JIRA, headers=ANALYST_HEADERS) + assert "errors" not in response, response + assert response["data"]["newGraph"] is True + + +def test_analyst_cannot_create_graph_with_namespace_read_only(): + """'access':'ro' user with namespace READ (not WRITE) is denied by newGraph.""" + work_dir = tempfile.mkdtemp() + with make_server(work_dir).start(): + create_role("analyst") + grant_namespace("analyst", "team", "READ") + + response = gql(CREATE_TEAM_JIRA, headers=ANALYST_HEADERS) + assert "errors" in response + assert "Access denied" in response["errors"][0]["message"] + # Verify "team/jira" was not created as a side effect — "team" namespace should be absent + children = gql(QUERY_NS_CHILDREN)["data"]["root"]["children"]["list"] + assert "team" not in [c["path"] for c in children] + + +# --- permissions entry point admin gate --- + + +def test_analyst_cannot_access_permissions_query_entry_point(): + """'access':'ro' user is denied at the permissions query entry point, not just the individual ops. + + This verifies the entry-point-level admin check added to query { permissions { ... } }. + Even with full namespace WRITE, a non-admin JWT cannot reach the permissions resolver. + """ + work_dir = tempfile.mkdtemp() + with make_server(work_dir).start(): + create_role("analyst") + grant_namespace("analyst", "team", "WRITE") # full write, still not admin + + response = gql( + "query { permissions { listRoles } }", + headers=ANALYST_HEADERS, + ) + assert "errors" in response + assert "Access denied" in response["errors"][0]["message"] + + +def test_analyst_cannot_access_permissions_mutation_entry_point(): + """'access':'ro' user is denied at the mutation { permissions { ... } } entry point. + + Even with full namespace WRITE, a non-admin JWT is blocked before reaching any op. + """ + work_dir = tempfile.mkdtemp() + with make_server(work_dir).start(): + create_role("analyst") + grant_namespace("analyst", "team", "WRITE") # full write, still not admin + + response = gql( + 'mutation { permissions { createRole(name: "hacker") { success } } }', + headers=ANALYST_HEADERS, + ) + assert "errors" in response + assert "Access denied" in response["errors"][0]["message"] + + +# --- createIndex policy --- + + +def test_analyst_can_create_index_with_graph_write(): + """A user with WRITE on a graph can call createIndex (not admin-only).""" + work_dir = tempfile.mkdtemp() + with make_server(work_dir).start(): + gql(CREATE_JIRA) + create_role("analyst") + grant_graph("analyst", "jira", "WRITE") + + response = gql( + 'mutation { createIndex(path: "jira", inRam: true) }', + headers=ANALYST_HEADERS, + ) + # Auth passed — success or a feature-not-compiled error, not an access denial + if "errors" in response: + assert "Access denied" not in response["errors"][0]["message"] + + +def test_analyst_cannot_create_index_with_read_grant(): + """READ on a graph is insufficient for createIndex — WRITE is required.""" + work_dir = tempfile.mkdtemp() + with make_server(work_dir).start(): + gql(CREATE_JIRA) + create_role("analyst") + grant_graph("analyst", "jira", "READ") + + response = gql( + 'mutation { createIndex(path: "jira", inRam: true) }', + headers=ANALYST_HEADERS, + ) + assert "errors" in response + assert "Access denied" in response["errors"][0]["message"] diff --git a/python/tests/test_base_install/test_graphql/test_gql_index_spec.py b/python/tests/test_search/test_gql_index_spec.py similarity index 100% rename from python/tests/test_base_install/test_graphql/test_gql_index_spec.py rename to python/tests/test_search/test_gql_index_spec.py diff --git a/python/tests/test_base_install/test_index.py b/python/tests/test_search/test_index.py similarity index 100% rename from python/tests/test_base_install/test_index.py rename to python/tests/test_search/test_index.py diff --git a/python/tests/test_base_install/test_index_spec.py b/python/tests/test_search/test_index_spec.py similarity index 100% rename from python/tests/test_base_install/test_index_spec.py rename to python/tests/test_search/test_index_spec.py diff --git a/python/tests/test_base_install/test_graphql/misc/test_graphql_vectors.py b/python/tests/test_vectors/test_graphql_vectors.py similarity index 60% rename from python/tests/test_base_install/test_graphql/misc/test_graphql_vectors.py rename to python/tests/test_vectors/test_graphql_vectors.py index b0aa5fdd43..6a22c0b35c 100644 --- a/python/tests/test_base_install/test_graphql/misc/test_graphql_vectors.py +++ b/python/tests/test_vectors/test_graphql_vectors.py @@ -94,6 +94,57 @@ def test_upload_graph(): assert_correct_documents(client) +def test_vectorised_graph_window_accepts_time_input_shapes(): + """`VectorisedGraphWindow.{start, end}` accepts every `TimeInput` shape — + Int, RFC3339 string, and `{timestamp, eventId}` object. + + Verifies the schema accepts each form and that all three forms produce + the *same* result for the same time bounds (i.e. they're parsed + equivalently).""" + work_dir = tempfile.TemporaryDirectory() + server = GraphServer(work_dir.name) + with embeddings.start(7340): + with server.start(): + client = RaphtoryClient("http://localhost:1736") + client.new_graph("abb", "EVENT") + rg = client.remote_graph("abb") + setup_graph(rg) + # `model` and `apiBase` point at the mock embedding server above, + # so the model name is just a placeholder identifier. + client.query(""" + { + vectoriseGraph(path: "abb", model: { openAI: { model: "mock-model", apiBase: "http://localhost:7340" } }, nodes: { custom: "{{ name }}" }, edges: { enabled: false }) + } + """) + + def run(window_literal: str): + q = """ + { + vectorisedGraph(path: "abb") { + entitiesBySimilarity(query: "aab", limit: 5, window: %s) { + getDocuments { entity { ... on Node { name } } } + } + } + } + """ % window_literal + return client.query(q) + + # Same time bounds, three different input shapes — all should be + # accepted by the schema and produce identical results. + int_form = run("{ start: 0, end: 1000 }") + str_form = run( + '{ start: "1970-01-01T00:00:00.000Z", end: "1970-01-01T00:00:01.000Z" }' + ) + obj_form = run( + "{ start: {timestamp: 0, eventId: 0}, end: {timestamp: 1000, eventId: 0} }" + ) + + assert int_form == str_form == obj_form, ( + "All three TimeInput shapes should produce identical results " + f"for equivalent time bounds.\nint: {int_form}\nstr: {str_form}\nobj: {obj_form}" + ) + + GRAPH_NAME = "abb" diff --git a/python/tests/test_base_install/test_vectors.py b/python/tests/test_vectors/test_vectors.py similarity index 100% rename from python/tests/test_base_install/test_vectors.py rename to python/tests/test_vectors/test_vectors.py diff --git a/python/tox.ini b/python/tox.ini index 8117a83f30..a0b42869dd 100644 --- a/python/tox.ini +++ b/python/tox.ini @@ -1,7 +1,7 @@ [tox] requires = tox>=4 -env_list = base, export, all, examples, docs +env_list = base, export, benchmark, examples, docs, auth, timezone # MATURIN_PEP517_ARGS [testenv] @@ -10,21 +10,42 @@ package = wheel wheel_build_env = .pkg extras = tox - all, storage: test + all, storage, auth, timezone, permissions: test export: export - all, storage: all + all: all pass_env = DISK_TEST_MARK [testenv:.pkg] pass_env = MATURIN_PEP517_ARGS +[testenv:search] +wheel_build_env = .pkg_search +commands = pytest {tty:--color=yes} tests/test_search + +[testenv:.pkg_search] +set_env = + MATURIN_PEP517_ARGS="--features=search,extension-module" + + [testenv:export] -commands = pytest --nbmake --nbmake-timeout=1200 {tty:--color=yes} tests/test_export.py +commands = pytest {tty:--color=yes} tests/test_export.py + +[testenv:timezone] +commands = pytest {tty:--color=yes} tests/test_timezone.py [testenv:base] commands = pytest --nbmake --nbmake-timeout=1200 {tty:--color=yes} tests/test_base_install +[testenv:auth] +commands = pytest tests/test_auth.py + +[testenv:permissions] +commands = pytest tests/test_permissions.py + +[testenv:vectors] +commands = pytest tests/test_vectors + [testenv:all] commands = pytest --nbmake --nbmake-timeout=1200 {tty:--color=yes} tests @@ -34,15 +55,8 @@ deps = matplotlib commands = pytest --nbmake --nbmake-timeout=1200 {tty:--color=yes} ../examples/python/socio-patterns/example.ipynb -[testenv:storage] -set_env = - DISK_TEST_MARK=1 -wheel_build_env = .pkg_private -commands = pytest --nbmake --nbmake-timeout=1200 {tty:--color=yes} tests - -[testenv:.pkg_private] -set_env = - MATURIN_PEP517_ARGS="--features=storage,extension-module" +[testenv:benchmark] +commands = pytest tests/test_graph_benchmarks.py [testenv:docs] deps = diff --git a/raphtory-api-macros/Cargo.toml b/raphtory-api-macros/Cargo.toml new file mode 100644 index 0000000000..1d2df24aa0 --- /dev/null +++ b/raphtory-api-macros/Cargo.toml @@ -0,0 +1,20 @@ +[package] +name = "raphtory-api-macros" +version.workspace = true +documentation.workspace = true +repository.workspace = true +license.workspace = true +readme.workspace = true +homepage.workspace = true +keywords.workspace = true +authors.workspace = true +rust-version.workspace = true +edition.workspace = true + +[lib] +proc-macro = true + +[dependencies] +proc-macro2 = "1.0" +quote = "1.0" +syn = { version = "2.0", features = ["full"] } diff --git a/raphtory-api-macros/build.rs b/raphtory-api-macros/build.rs new file mode 100644 index 0000000000..33154a7c92 --- /dev/null +++ b/raphtory-api-macros/build.rs @@ -0,0 +1,11 @@ +use std::io::Result; +fn main() -> Result<()> { + println!("cargo::rustc-check-cfg=cfg(has_debug_symbols)"); + + if let Ok(profile) = std::env::var("PROFILE") { + if profile.contains("debug") { + println!("cargo::rustc-cfg=has_debug_symbols"); + } + } + Ok(()) +} diff --git a/raphtory-api-macros/src/lib.rs b/raphtory-api-macros/src/lib.rs new file mode 100644 index 0000000000..aaa289882f --- /dev/null +++ b/raphtory-api-macros/src/lib.rs @@ -0,0 +1,217 @@ +use proc_macro::TokenStream; +use proc_macro2::TokenStream as TokenStream2; +use quote::{quote, ToTokens}; +use syn::{parse_macro_input, Error, ItemFn, Path, Result, ReturnType, Type, TypeParamBound}; + +/// A specialized procedural macro for functions with complex lifetime parameters. +/// This macro handles functions that have explicit lifetime parameters and complex bounds. +/// +/// # Usage +/// +/// Simply annotate your iterator-returning function with `#[box_on_debug_lifetime]`: +/// +/// ## Method with complex lifetime bounds: +/// ```rust +/// use raphtory_api_macros::box_on_debug_lifetime; +/// +/// struct Graph; +/// struct LayerIds; +/// struct EntryRef<'a>(&'a str); +/// +/// impl Graph { +/// #[box_on_debug_lifetime] +/// fn edge_iter<'a, 'b: 'a>( +/// &'a self, +/// layer_ids: &'b LayerIds, +/// ) -> impl Iterator> + Send + Sync + 'a { +/// std::iter::once(EntryRef("test")) +/// } +/// } +/// +/// // Test the method works +/// let graph = Graph; +/// let layer_ids = LayerIds; +/// let entries: Vec = graph.edge_iter(&layer_ids).collect(); +/// assert_eq!(entries.len(), 1); +/// assert_eq!(entries[0].0, "test"); +/// ``` +/// +/// ## Function consuming self with lifetime parameter: +/// ```rust +/// use raphtory_api_macros::box_on_debug_lifetime; +/// +/// struct EdgeStorage; +/// struct LayerIds; +/// struct EdgeStorageEntry<'a>(&'a str); +/// +/// impl EdgeStorage { +/// #[box_on_debug_lifetime] +/// pub fn iter<'a>(self, layer_ids: &'a LayerIds) -> impl Iterator> + 'a { +/// std::iter::once(EdgeStorageEntry("test")) +/// } +/// } +/// +/// // Test the function works +/// let storage = EdgeStorage; +/// let layer_ids = LayerIds; +/// let entries: Vec = storage.iter(&layer_ids).collect(); +/// assert_eq!(entries.len(), 1); +/// assert_eq!(entries[0].0, "test"); +/// ``` +/// +/// ## Function with where clause: +/// ```rust +/// use raphtory_api_macros::box_on_debug_lifetime; +/// +/// struct Data { +/// items: Vec, +/// } +/// +/// impl Data +/// where +/// T: Clone + Send + Sync, +/// { +/// #[box_on_debug_lifetime] +/// pub fn iter_cloned<'a>(&'a self) -> impl Iterator + 'a +/// where +/// T: Clone, +/// { +/// self.items.iter().cloned() +/// } +/// } +/// +/// // Test the function works +/// let data = Data { items: vec![1, 2, 3, 4, 5] }; +/// let cloned: Vec = data.iter_cloned().collect(); +/// assert_eq!(cloned, vec![1, 2, 3, 4, 5]); +/// ``` +/// +#[proc_macro_attribute] +pub fn box_on_debug_lifetime(_attr: TokenStream, item: TokenStream) -> TokenStream { + let input_fn = parse_macro_input!(item as ItemFn); + + match generate_box_on_debug_lifetime_impl(&input_fn) { + Ok(output) => output.into(), + Err(err) => err.to_compile_error().into(), + } +} + +fn generate_box_on_debug_lifetime_impl(input_fn: &ItemFn) -> Result { + let attrs = &input_fn.attrs; + let vis = &input_fn.vis; + let sig = &input_fn.sig; + let block = &input_fn.block; + let fn_name = &sig.ident; + + // Parse the return type to extract iterator information + let (item_type, bounds) = parse_iterator_return_type(&sig.output)?; + + // For lifetime version, we preserve all bounds including lifetimes + let debug_return_type = generate_boxed_return_type_with_lifetimes(&item_type, &bounds); + + // Generate the release version (original) + let release_return_type = &sig.output; + + let generics = &sig.generics; + let inputs = &sig.inputs; + let where_clause = &sig.generics.where_clause; + + Ok(quote! { + #[cfg(has_debug_symbols)] + #(#attrs)* + #vis fn #fn_name #generics(#inputs) #debug_return_type #where_clause { + let iter = #block; + Box::new(iter) + } + + #[cfg(not(has_debug_symbols))] + #(#attrs)* + #vis fn #fn_name #generics(#inputs) #release_return_type #where_clause { + #block + } + }) +} + +fn parse_iterator_return_type( + return_type: &ReturnType, +) -> Result<(TokenStream2, Vec)> { + match return_type { + ReturnType::Type(_, ty) => { + if let Type::ImplTrait(impl_trait) = ty.as_ref() { + let mut item_type = None; + let mut bounds = Vec::new(); + + for bound in &impl_trait.bounds { + match bound { + TypeParamBound::Trait(trait_bound) => { + let path = &trait_bound.path; + + // Check if this is an Iterator trait + if is_iterator_trait(path) { + // Extract the Item type from Iterator + if let Some(seg) = path.segments.last() { + if let syn::PathArguments::AngleBracketed(args) = &seg.arguments + { + for arg in &args.args { + if let syn::GenericArgument::AssocType(binding) = arg { + if binding.ident == "Item" { + item_type = Some(binding.ty.to_token_stream()); + } + } + } + } + } + } else { + // This is another bound like Send, Sync, or lifetime + bounds.push(bound.to_token_stream()); + } + } + TypeParamBound::Lifetime(_) => { + bounds.push(bound.to_token_stream()); + } + _ => { + // Handle any other bounds (e.g. Verbatim) + bounds.push(bound.to_token_stream()); + } + } + } + + if let Some(item) = item_type { + Ok((item, bounds)) + } else { + Err(Error::new_spanned( + return_type, + "Expected Iterator in return type", + )) + } + } else { + Err(Error::new_spanned( + return_type, + "Expected impl Iterator<...> return type", + )) + } + } + _ => Err(Error::new_spanned( + return_type, + "Expected -> impl Iterator<...> return type", + )), + } +} + +fn is_iterator_trait(path: &Path) -> bool { + path.segments + .last() + .map(|seg| seg.ident == "Iterator") + .unwrap_or(false) +} + +fn generate_boxed_return_type_with_lifetimes( + item_type: &TokenStream2, + bounds: &[TokenStream2], +) -> TokenStream2 { + if bounds.is_empty() { + quote! { -> Box> } + } else { + quote! { -> Box + #(#bounds)+*> } + } +} diff --git a/raphtory-api-macros/tests/integration_test.rs b/raphtory-api-macros/tests/integration_test.rs new file mode 100644 index 0000000000..3aaa79cb7c --- /dev/null +++ b/raphtory-api-macros/tests/integration_test.rs @@ -0,0 +1,74 @@ +use raphtory_api_macros::box_on_debug_lifetime; + +struct LayerIds; +struct Direction; +struct EdgeRef; + +struct TestStruct; + +impl TestStruct { + #[box_on_debug_lifetime] + fn edge_iter<'a, 'b: 'a>( + &'a self, + _layer_ids: &'b LayerIds, + ) -> impl Iterator + Send + Sync + 'a { + // Simplified version of your complex matching logic + std::iter::empty() + } +} + +trait TestTrait<'a> { + type EntryRef; + + fn edges_iter<'b>( + self, + layers_ids: &'b LayerIds, + dir: Direction, + ) -> impl Iterator + Send + Sync + 'a + where + Self: Sized; +} + +impl<'a> TestTrait<'a> for &'a TestStruct { + type EntryRef = EdgeRef; + + #[box_on_debug_lifetime] + fn edges_iter<'b>( + self, + _layers_ids: &'b LayerIds, + _dir: Direction, + ) -> impl Iterator + Send + Sync + 'a + where + Self: Sized, + { + std::iter::empty() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn can_send_and_sync(_t: &T) {} + + #[test] + fn test_edge_iter() { + let test_struct = TestStruct; + let layer_ids = LayerIds; + let iter = test_struct.edge_iter(&layer_ids); + can_send_and_sync(&iter); + let collected: Vec = iter.collect(); + assert_eq!(collected.len(), 0); + } + + #[test] + fn test_edges_iter() { + let test_struct = TestStruct; + let layer_ids = LayerIds; + let direction = Direction; + let iter = (&test_struct).edges_iter(&layer_ids, direction); + can_send_and_sync(&iter); + let collected: Vec = iter.collect(); + assert_eq!(collected.len(), 0); + } +} diff --git a/raphtory-api-macros/tests/macro_expansion_test.rs b/raphtory-api-macros/tests/macro_expansion_test.rs new file mode 100644 index 0000000000..e981dc0f71 --- /dev/null +++ b/raphtory-api-macros/tests/macro_expansion_test.rs @@ -0,0 +1,26 @@ +use raphtory_api_macros::box_on_debug_lifetime; + +struct TestItem; + +#[box_on_debug_lifetime] +fn test_function<'a>() -> impl Iterator + Send + Sync + 'a { + std::iter::empty() +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_debug_vs_release_types() { + let iter = test_function(); + let _collected: Vec = iter.collect(); + } + + #[test] + #[cfg(debug_assertions)] + fn test_debug_build_returns_box() { + let iter = test_function(); + let _boxed: Box + Send + Sync> = iter; + } +} diff --git a/raphtory-api/Cargo.toml b/raphtory-api/Cargo.toml index 38ea527905..7155bb5430 100644 --- a/raphtory-api/Cargo.toml +++ b/raphtory-api/Cargo.toml @@ -18,11 +18,12 @@ edition.workspace = true [dependencies] serde = { workspace = true, features = ["derive"] } serde_json = { workspace = true, optional = true } -bigdecimal = { workspace = true } +bigdecimal = { workspace = true, features = ["string-only"] } thiserror = { workspace = true } bytemuck = { workspace = true } chrono.workspace = true dashmap = { workspace = true } +derive_more = { workspace = true, features = ["from"] } rustc-hash = { workspace = true } lock_api = { workspace = true } parking_lot = { workspace = true } @@ -35,9 +36,11 @@ twox-hash.workspace = true tracing-subscriber = { workspace = true } tracing = { workspace = true } sorted_vector_map = { workspace = true } -arrow-array = { workspace = true, optional = true } -arrow-ipc = { workspace = true, optional = true } -arrow-schema = { workspace = true, optional = true } +arrow-array = { workspace = true } +arrow-buffer = { workspace = true } +arrow-ipc = { workspace = true } +arrow-schema = { workspace = true } +serde_arrow = { workspace = true } itertools = { workspace = true } iter-enum = { workspace = true } minijinja = { workspace = true, optional = true } @@ -45,21 +48,16 @@ display-error-chain = { workspace = true, optional = true } [dev-dependencies] proptest.workspace = true +serde_json.workspace = true [features] -default = [] # Enables generating the pyo3 python bindings python = [ - "dep:pyo3", "dep:pyo3-arrow", "dep:display-error-chain", "dep:arrow-schema" -] - -storage = [ - "dep:arrow-schema", + "dep:pyo3", "dep:pyo3-arrow", "dep:display-error-chain" ] proto = [] vectors = [] template = ["dep:minijinja"] -arrow = ["dep:arrow-array", "dep:arrow-ipc", "dep:arrow-schema"] search = [] io = ["dep:serde_json"] diff --git a/raphtory-api/src/compute.rs b/raphtory-api/src/compute.rs index 406ff281fd..2aab8b9842 100644 --- a/raphtory-api/src/compute.rs +++ b/raphtory-api/src/compute.rs @@ -1,5 +1,13 @@ use rayon::prelude::*; /// Compute cumulative sum in parallel over `num_chunks` chunks +pub fn cum_sum(values: &mut [usize]) { + let mut sum = 0; + for v in values { + sum += *v; + *v = sum; + } +} + pub fn par_cum_sum(values: &mut [usize]) { let num_chunks = rayon::current_num_threads(); let chunk_size = values.len().div_ceil(num_chunks); @@ -28,12 +36,12 @@ pub fn par_cum_sum(values: &mut [usize]) { #[cfg(test)] mod test { - use super::par_cum_sum; + use super::cum_sum; #[test] fn test_cum_sum() { let mut values: Vec<_> = (0..100).collect(); - par_cum_sum(&mut values); + cum_sum(&mut values); let mut cum_sum = 0; for (index, v) in values.into_iter().enumerate() { cum_sum += index; diff --git a/raphtory-api/src/core/entities/edges/edge_ref.rs b/raphtory-api/src/core/entities/edges/edge_ref.rs index ceafb31a40..fa64271bfa 100644 --- a/raphtory-api/src/core/entities/edges/edge_ref.rs +++ b/raphtory-api/src/core/entities/edges/edge_ref.rs @@ -1,5 +1,5 @@ use crate::core::{ - entities::{EID, VID}, + entities::{LayerId, EID, VID}, storage::timeindex::{AsTime, EventTime}, }; use std::cmp::Ordering; @@ -11,7 +11,7 @@ pub struct EdgeRef { dst_pid: VID, e_type: Dir, time: Option, - layer_id: Option, + layer_id: Option, } // This is used for merging iterators of EdgeRefs and only makes sense if the local node for both @@ -66,7 +66,7 @@ impl EdgeRef { } #[inline(always)] - pub fn layer(&self) -> Option { + pub fn layer(&self) -> Option { self.layer_id } @@ -124,7 +124,7 @@ impl EdgeRef { } #[inline] - pub fn at_layer(&self, layer: usize) -> Self { + pub fn at_layer(&self, layer: LayerId) -> Self { let mut e_ref = *self; e_ref.layer_id = Some(layer); e_ref diff --git a/raphtory-api/src/core/entities/layers.rs b/raphtory-api/src/core/entities/layers.rs index 1e5cc31650..74ee307452 100644 --- a/raphtory-api/src/core/entities/layers.rs +++ b/raphtory-api/src/core/entities/layers.rs @@ -1,10 +1,39 @@ use crate::core::storage::arc_str::ArcStr; +use bytemuck::{Pod, Zeroable}; use iter_enum::{ DoubleEndedIterator, ExactSizeIterator, FusedIterator, IndexedParallelIterator, Iterator, ParallelExtend, ParallelIterator, }; use rayon::prelude::*; -use std::{iter::Copied, sync::Arc}; +use serde::{Deserialize, Serialize}; +use std::{ + fmt::{Display, Formatter, Result as FmtResult}, + iter::Copied, + sync::Arc, +}; + +#[derive( + Debug, Copy, Clone, PartialEq, Eq, Ord, PartialOrd, Hash, Serialize, Deserialize, Pod, Zeroable, +)] +#[repr(transparent)] +pub struct LayerId(pub usize); +impl PartialEq for LayerId { + fn eq(&self, other: &usize) -> bool { + self.0 == *other + } +} + +impl PartialEq for usize { + fn eq(&self, other: &LayerId) -> bool { + *self == other.0 + } +} + +impl Display for LayerId { + fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult { + Display::fmt(&self.0, f) + } +} #[derive(Debug, Clone, PartialEq, Eq)] pub enum Layer { @@ -25,6 +54,17 @@ impl Layer { Layer::Multiple(layers) => layers.iter().any(|l| l == name), } } + + fn from_iter>( + names: I, + ) -> Self { + let mut names = names.into_iter(); + match names.len() { + 0 => Layer::None, + 1 => Layer::One(names.next().unwrap().name()), + _ => Layer::Multiple(names.map(|s| s.name()).collect::>().into()), + } + } } pub trait SingleLayer { @@ -77,33 +117,31 @@ impl SingleLayer for Option { impl From> for Layer { fn from(names: Vec) -> Self { - match names.len() { - 0 => Layer::None, - 1 => Layer::One(names.into_iter().next().unwrap().name()), - _ => Layer::Multiple( - names - .into_iter() - .map(|s| s.name()) - .collect::>() - .into(), - ), - } + Self::from_iter(names) } } impl From<[T; N]> for Layer { fn from(names: [T; N]) -> Self { - match N { - 0 => Layer::None, - 1 => Layer::One(names.into_iter().next().unwrap().name()), - _ => Layer::Multiple( - names - .into_iter() - .map(|s| s.name()) - .collect::>() - .into(), - ), - } + Self::from_iter(names) + } +} + +impl<'a, T: 'a> From<&'a [T]> for Layer +where + &'a T: SingleLayer, +{ + fn from(names: &'a [T]) -> Self { + Self::from_iter(names) + } +} + +impl<'a, T: 'a> From<&'a Vec> for Layer +where + &'a T: SingleLayer, +{ + fn from(names: &'a Vec) -> Self { + Self::from_iter(names) } } @@ -111,7 +149,7 @@ impl From<[T; N]> for Layer { pub enum LayerIds { None, All, - One(usize), + One(LayerId), Multiple(Multiple), } @@ -132,11 +170,11 @@ pub enum LayerVariants { } #[derive(Clone, Debug, Default)] -pub struct Multiple(pub Arc<[usize]>); +pub struct Multiple(pub Arc<[LayerId]>); impl<'a> IntoIterator for &'a Multiple { - type Item = usize; - type IntoIter = Copied>; + type Item = LayerId; + type IntoIter = Copied>; fn into_iter(self) -> Self::IntoIter { self.0.iter().copied() @@ -145,33 +183,33 @@ impl<'a> IntoIterator for &'a Multiple { impl Multiple { #[inline] - pub fn contains(&self, id: usize) -> bool { + pub fn contains(&self, id: LayerId) -> bool { self.0.binary_search(&id).is_ok() } #[inline] - pub fn into_iter(&self) -> impl Iterator { + pub fn into_iter(self) -> impl Iterator { let ids = self.0.clone(); (0..ids.len()).map(move |i| ids[i]) } #[inline] - pub fn iter(&self) -> impl Iterator + '_ { + pub fn iter(&self) -> impl Iterator + '_ { self.0.iter().copied() } #[inline] - pub fn get_id_by_index(&self, index: usize) -> Option { + pub fn get_id_by_index(&self, index: usize) -> Option { self.0.get(index).copied() } #[inline] - pub fn get_index_by_id(&self, id: usize) -> Option { + pub fn get_index_by_id(&self, id: LayerId) -> Option { self.0.binary_search(&id).ok() } #[inline] - pub fn par_iter(&self) -> impl rayon::iter::ParallelIterator { + pub fn par_iter(&self) -> impl rayon::iter::ParallelIterator { let bit_vec = self.0.clone(); (0..bit_vec.len()).into_par_iter().map(move |i| bit_vec[i]) } @@ -189,6 +227,15 @@ impl Multiple { impl FromIterator for Multiple { fn from_iter>(iter: I) -> Self { + let mut inner: Vec<_> = iter.into_iter().map(LayerId).collect(); + inner.sort(); + inner.dedup(); + Multiple(inner.into()) + } +} + +impl FromIterator for Multiple { + fn from_iter>(iter: I) -> Self { let mut inner: Vec<_> = iter.into_iter().collect(); inner.sort(); inner.dedup(); @@ -196,8 +243,17 @@ impl FromIterator for Multiple { } } +impl From> for Multiple { + fn from(mut v: Vec) -> Self { + v.sort(); + v.dedup(); + Multiple(v.into()) + } +} + impl From> for Multiple { - fn from(mut v: Vec) -> Self { + fn from(v: Vec) -> Self { + let mut v: Vec<_> = v.into_iter().map(LayerId).collect(); v.sort(); v.dedup(); Multiple(v.into()) diff --git a/raphtory-api/src/core/entities/mod.rs b/raphtory-api/src/core/entities/mod.rs index cec2a58fbf..3225e71f36 100644 --- a/raphtory-api/src/core/entities/mod.rs +++ b/raphtory-api/src/core/entities/mod.rs @@ -1,7 +1,6 @@ use super::input::input_node::parse_u64_strict; use crate::iter::IntoDynBoxed; use bytemuck::{Pod, Zeroable}; -use edges::edge_ref::EdgeRef; use num_traits::ToPrimitive; use serde::{Deserialize, Serialize}; use std::{ @@ -14,6 +13,7 @@ pub mod edges; pub mod layers; pub mod properties; +use crate::core::entities::properties::prop::PropType; pub use layers::*; // The only reason this is public is because the physical IDs of the nodes don’t move. @@ -37,6 +37,11 @@ impl VID { pub fn as_u64(&self) -> u64 { self.0 as u64 } + + /// check if the VID points to a node + pub fn is_initialised(&self) -> bool { + self.0 != usize::MAX + } } impl From for VID { @@ -64,15 +69,19 @@ impl Default for EID { } impl EID { + pub fn index(&self) -> usize { + self.0 + } + pub fn as_u64(self) -> u64 { self.0 as u64 } - pub fn with_layer(self, layer: usize) -> ELID { + pub fn with_layer(self, layer: LayerId) -> ELID { ELID::new(self, layer) } - pub fn with_layer_deletion(self, layer: usize) -> ELID { + pub fn with_layer_deletion(self, layer: LayerId) -> ELID { ELID::new_deletion(self, layer) } } @@ -95,6 +104,12 @@ impl EID { } } +impl From for EID { + fn from(elid: ELID) -> Self { + elid.edge + } +} + #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Default, Serialize, Deserialize)] pub struct ELID { pub edge: EID, @@ -115,22 +130,22 @@ const LAYER_FLAG: usize = 1usize.reverse_bits(); pub const MAX_LAYER: usize = usize::MAX & !LAYER_FLAG; impl ELID { - pub fn new(edge: EID, layer: usize) -> Self { + pub fn new(edge: EID, layer: LayerId) -> Self { ELID { edge, - layer_and_deletion: layer, + layer_and_deletion: layer.0, } } - pub fn new_deletion(edge: EID, layer: usize) -> Self { + pub fn new_deletion(edge: EID, layer: LayerId) -> Self { ELID { edge, - layer_and_deletion: layer | LAYER_FLAG, + layer_and_deletion: layer.0 | LAYER_FLAG, } } - pub fn layer(&self) -> usize { - self.layer_and_deletion & !LAYER_FLAG + pub fn layer(&self) -> LayerId { + LayerId(self.layer_and_deletion & !LAYER_FLAG) } pub fn is_deletion(&self) -> bool { @@ -226,7 +241,7 @@ impl GID { } } - pub fn to_str(&'_ self) -> Cow<'_, str> { + pub fn to_str(&self) -> Cow<'_, str> { match self { GID::U64(v) => Cow::Owned(v.to_string()), GID::Str(v) => Cow::Borrowed(v), @@ -294,6 +309,40 @@ pub enum GidRef<'a> { Str(&'a str), } +#[derive(Clone, Debug, PartialEq, PartialOrd, Eq, Ord, Hash, Serialize, Deserialize)] +pub enum GidCow<'a> { + U64(u64), + Str(Cow<'a, str>), +} + +impl<'a> From> for GidCow<'a> { + fn from(value: GidRef<'a>) -> Self { + match value { + GidRef::U64(v) => Self::U64(v), + GidRef::Str(v) => Self::Str(Cow::Borrowed(v)), + } + } +} + +impl<'a> GidCow<'a> { + pub fn as_ref<'b>(&'b self) -> GidRef<'b> + where + 'a: 'b, + { + match self { + GidCow::U64(v) => GidRef::U64(*v), + GidCow::Str(v) => GidRef::Str(v), + } + } + + pub fn into_owned(self) -> GID { + match self { + GidCow::U64(v) => GID::U64(v), + GidCow::Str(v) => GID::Str(v.into_owned()), + } + } +} + #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] pub enum GidType { U64, @@ -313,11 +362,21 @@ impl Display for GidType { } } +impl GidType { + pub fn from_prop_type(prop_type: &PropType) -> Option { + match prop_type { + PropType::Str => Some(GidType::Str), + PropType::U64 | PropType::U32 | PropType::I64 | PropType::I32 => Some(GidType::U64), + _ => None, + } + } +} + impl Display for GidRef<'_> { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { match self { - GidRef::U64(v) => write!(f, "{}", v), - GidRef::Str(v) => write!(f, "{}", v), + GidRef::U64(v) => write!(f, "{v}"), + GidRef::Str(v) => write!(f, "{v}"), } } } @@ -337,6 +396,12 @@ impl<'a> From<&'a str> for GidRef<'a> { } } +impl From for GidRef<'_> { + fn from(value: u64) -> Self { + GidRef::U64(value) + } +} + impl<'a> GidRef<'a> { pub fn dtype(self) -> GidType { match self { @@ -389,13 +454,13 @@ impl<'a> GidRef<'a> { #[cfg(test)] mod test { - use crate::core::entities::Multiple; + use crate::core::entities::{LayerId, Multiple}; #[test] fn empty_bit_multiple() { let bm = super::Multiple::default(); let actual = bm.into_iter().collect::>(); - let expected: Vec = vec![]; + let expected: Vec = vec![]; assert_eq!(actual, expected); } @@ -416,21 +481,6 @@ mod test { } impl LayerIds { - pub fn find(&self, layer_id: usize) -> Option { - match self { - LayerIds::All => Some(layer_id), - LayerIds::One(id) => { - if *id == layer_id { - Some(layer_id) - } else { - None - } - } - LayerIds::Multiple(ids) => ids.contains(layer_id).then_some(layer_id), - LayerIds::None => None, - } - } - pub fn intersect(&self, other: &LayerIds) -> LayerIds { match (self, other) { (LayerIds::None, _) => LayerIds::None, @@ -445,7 +495,7 @@ impl LayerIds { } } (LayerIds::Multiple(ids), other) => { - let ids: Vec = ids.iter().filter(|id| other.contains(id)).collect(); + let ids: Vec<_> = ids.iter().filter(|id| other.contains(id)).collect(); match ids.len() { 0 => LayerIds::None, 1 => LayerIds::One(ids[0]), @@ -455,20 +505,15 @@ impl LayerIds { } } - pub fn constrain_from_edge(&self, e: EdgeRef) -> Cow<'_, LayerIds> { - match e.layer() { - None => Cow::Borrowed(self), - Some(l) => self - .find(l) - .map(|id| Cow::Owned(LayerIds::One(id))) - .unwrap_or(Cow::Owned(LayerIds::None)), + pub fn contains(&self, layer_id: &LayerId) -> bool { + match self { + LayerIds::All => true, + LayerIds::One(id) => id == layer_id, + LayerIds::Multiple(ids) => ids.contains(*layer_id), + LayerIds::None => false, } } - pub fn contains(&self, layer_id: &usize) -> bool { - self.find(*layer_id).is_some() - } - pub fn is_none(&self) -> bool { matches!(self, LayerIds::None) } @@ -477,18 +522,41 @@ impl LayerIds { matches!(self, LayerIds::One(_)) } - pub fn iter(&self, num_layers: usize) -> impl Iterator { + pub fn is_all(&self) -> bool { + matches!(self, LayerIds::All) + } + + pub fn iter(&self, num_layers: usize) -> impl Iterator + use<'_> { match self { LayerIds::None => iter::empty().into_dyn_boxed(), - LayerIds::All => (0..num_layers).into_dyn_boxed(), + LayerIds::All => (0..num_layers).map(LayerId).into_dyn_boxed(), LayerIds::One(id) => iter::once(*id).into_dyn_boxed(), LayerIds::Multiple(ids) => ids.into_iter().into_dyn_boxed(), } } + + pub fn into_iter(self, num_layers: usize) -> impl Iterator { + match self { + LayerIds::None => iter::empty().into_dyn_boxed(), + LayerIds::All => (0..num_layers).map(LayerId).into_dyn_boxed(), + LayerIds::One(id) => iter::once(id).into_dyn_boxed(), + LayerIds::Multiple(ids) => ids.into_iter().into_dyn_boxed(), + } + } } impl From> for LayerIds { fn from(v: Vec) -> Self { + match v.len() { + 0 => LayerIds::All, + 1 => LayerIds::One(LayerId(v[0])), + _ => LayerIds::Multiple(v.into()), + } + } +} + +impl From> for LayerIds { + fn from(v: Vec) -> Self { match v.len() { 0 => LayerIds::All, 1 => LayerIds::One(v[0]), @@ -499,6 +567,16 @@ impl From> for LayerIds { impl From<[usize; N]> for LayerIds { fn from(v: [usize; N]) -> Self { + match v.len() { + 0 => LayerIds::All, + 1 => LayerIds::One(LayerId(v[0])), + _ => LayerIds::Multiple(v.into_iter().collect()), + } + } +} + +impl From<[LayerId; N]> for LayerIds { + fn from(v: [LayerId; N]) -> Self { match v.len() { 0 => LayerIds::All, 1 => LayerIds::One(v[0]), @@ -509,24 +587,30 @@ impl From<[usize; N]> for LayerIds { impl From for LayerIds { fn from(id: usize) -> Self { + LayerIds::One(LayerId(id)) + } +} + +impl From for LayerIds { + fn from(id: LayerId) -> Self { LayerIds::One(id) } } #[cfg(test)] mod tests { - use crate::core::entities::{EID, MAX_LAYER}; + use crate::core::entities::{LayerId, EID, MAX_LAYER}; use proptest::{prop_assert, prop_assert_eq, proptest}; #[test] fn test_elid_layer() { proptest!(|(eid in 0..=usize::MAX, layer in 0..=MAX_LAYER)| { - let elid = EID(eid).with_layer(layer); - prop_assert_eq!(elid.layer(), layer); + let elid = EID(eid).with_layer(LayerId(layer)); + prop_assert_eq!(elid.layer(), LayerId(layer)); prop_assert!(!elid.is_deletion()); let elid_deleted = elid.into_deletion(); - prop_assert_eq!(elid_deleted.layer(), layer); + prop_assert_eq!(elid_deleted.layer(), LayerId(layer)); prop_assert_eq!(elid_deleted.edge, EID(eid)); prop_assert!(elid_deleted.is_deletion()) }) @@ -535,8 +619,8 @@ mod tests { #[test] fn test_elid_deletion() { proptest!(|(eid in 0..=usize::MAX, layer in 0..=MAX_LAYER)| { - let elid = EID(eid).with_layer_deletion(layer); - prop_assert_eq!(elid.layer(), layer); + let elid = EID(eid).with_layer_deletion(LayerId(layer)); + prop_assert_eq!(elid.layer(), LayerId(layer)); prop_assert!(elid.is_deletion()); prop_assert_eq!(elid, elid.into_deletion()); prop_assert_eq!(elid.edge.0, eid); diff --git a/raphtory-api/src/core/entities/properties/meta.rs b/raphtory-api/src/core/entities/properties/meta.rs index b227ca1c91..6113f87596 100644 --- a/raphtory-api/src/core/entities/properties/meta.rs +++ b/raphtory-api/src/core/entities/properties/meta.rs @@ -1,18 +1,39 @@ -use std::{ops::Deref, sync::Arc}; - -use parking_lot::RwLock; -use serde::{Deserialize, Serialize}; - use crate::core::{ - entities::properties::prop::{unify_types, PropError, PropType}, + entities::{ + properties::prop::{check_for_unification, unify_types, PropError, PropType}, + LayerId, + }, storage::{ arc_str::ArcStr, - dict_mapper::{DictMapper, MaybeNew}, - locked_vec::ArcReadLockedVec, + dict_mapper::{DictMapper, LockedDictMapper, MaybeNew, PublicKeys, WriteLockedDictMapper}, }, }; +use itertools::Either; +use parking_lot::{RwLock, RwLockReadGuard, RwLockWriteGuard}; +use rustc_hash::FxHashMap; +use serde::{Deserialize, Serialize}; +use std::{ + ops::{Deref, DerefMut}, + sync::{ + atomic::{self, AtomicUsize}, + Arc, + }, +}; + +// Internal const props for node id and type +pub const NODE_ID_PROP_KEY: &str = "_raphtory_node_id"; +pub const NODE_ID_IDX: usize = 0; -#[derive(Serialize, Deserialize, Debug)] +pub const NODE_TYPE_PROP_KEY: &str = "_raphtory_node_type"; +pub const NODE_TYPE_IDX: usize = 1; + +pub const STATIC_GRAPH_LAYER: &str = "_static_graph"; +pub const STATIC_GRAPH_LAYER_ID: LayerId = LayerId(0); + +/// The type ID for nodes that don't have a specified type. +pub const DEFAULT_NODE_TYPE_ID: usize = 0; + +#[derive(Serialize, Deserialize, Debug, Default)] pub struct Meta { temporal_prop_mapper: PropMapper, metadata_mapper: PropMapper, @@ -20,19 +41,25 @@ pub struct Meta { node_type_mapper: DictMapper, } -impl Default for Meta { - fn default() -> Self { - Self::new() +impl Meta { + pub fn all_layer_iter(&self) -> impl Iterator + use<'_> { + self.layer_mapper + .all_ids() + .map(LayerId) + .zip(self.layer_mapper.all_keys()) } -} -impl Meta { pub fn set_metadata_mapper(&mut self, meta: PropMapper) { self.metadata_mapper = meta; } - pub fn set_temporal_prop_meta(&mut self, meta: PropMapper) { + + pub fn set_temporal_prop_mapper(&mut self, meta: PropMapper) { self.temporal_prop_mapper = meta; } + + pub fn set_layer_mapper(&mut self, meta: DictMapper) { + self.layer_mapper = meta; + } pub fn metadata_mapper(&self) -> &PropMapper { &self.metadata_mapper } @@ -49,10 +76,37 @@ impl Meta { &self.node_type_mapper } - pub fn new() -> Self { - let meta_layer = DictMapper::default(); + #[inline] + pub fn temporal_est_row_size(&self) -> usize { + self.temporal_prop_mapper.row_size() + } + + #[inline] + pub fn const_est_row_size(&self) -> usize { + self.metadata_mapper.row_size() + } + + pub fn new_for_nodes() -> Self { + let meta_layer = DictMapper::new_layer_mapper(); let meta_node_type = DictMapper::default(); meta_node_type.get_or_create_id("_default"); + + Self { + temporal_prop_mapper: PropMapper::default(), + metadata_mapper: PropMapper::new_with_private_fields( + [NODE_ID_PROP_KEY, NODE_TYPE_PROP_KEY], + [PropType::Empty, PropType::U64], + ), + layer_mapper: meta_layer, + node_type_mapper: meta_node_type, // type 0 is the default type for a node + } + } + + pub fn new_for_edges() -> Self { + let meta_layer = DictMapper::new_layer_mapper(); + let meta_node_type = DictMapper::default(); + meta_node_type.get_or_create_id("_default"); + Self { temporal_prop_mapper: PropMapper::default(), metadata_mapper: PropMapper::default(), @@ -61,6 +115,19 @@ impl Meta { } } + pub fn new_for_graph_props() -> Self { + let meta_layer = DictMapper::new_layer_mapper(); + let meta_node_type = DictMapper::default(); + + // For now, only temporal and metadata mappers are used for graph metadata. + Self { + temporal_prop_mapper: PropMapper::default(), + metadata_mapper: PropMapper::default(), + layer_mapper: meta_layer, + node_type_mapper: meta_node_type, + } + } + #[inline] pub fn resolve_prop_id( &self, @@ -93,14 +160,15 @@ impl Meta { } #[inline] - pub fn get_or_create_layer_id(&self, name: Option<&str>) -> MaybeNew { + pub fn get_or_create_layer_id(&self, name: Option<&str>) -> MaybeNew { self.layer_mapper .get_or_create_id(name.unwrap_or("_default")) + .map(|l| LayerId(l)) } #[inline] pub fn get_default_node_type_id(&self) -> usize { - 0usize + DEFAULT_NODE_TYPE_ID } #[inline] @@ -109,13 +177,13 @@ impl Meta { } #[inline] - pub fn get_layer_id(&self, name: &str) -> Option { - self.layer_mapper.get_id(name) + pub fn get_layer_id(&self, name: &str) -> Option { + self.layer_mapper.get_id(name).map(|l| LayerId(l)) } #[inline] - pub fn get_default_layer_id(&self) -> Option { - self.layer_mapper.get_id("_default") + pub fn get_default_layer_id(&self) -> Option { + self.layer_mapper.get_id("_default").map(|id| LayerId(id)) } #[inline] @@ -123,25 +191,21 @@ impl Meta { self.node_type_mapper.get_id(node_type) } - pub fn get_layer_name_by_id(&self, id: usize) -> ArcStr { - self.layer_mapper.get_name(id) + pub fn get_layer_name_by_id(&self, id: LayerId) -> ArcStr { + self.layer_mapper.get_name(id.0) } pub fn get_node_type_name_by_id(&self, id: usize) -> Option { - if id == 0 { + if id == DEFAULT_NODE_TYPE_ID { None } else { Some(self.node_type_mapper.get_name(id)) } } - pub fn get_all_layers(&self) -> Vec { - self.layer_mapper.get_values() - } - pub fn get_all_node_types(&self) -> Vec { self.node_type_mapper - .get_keys() + .keys() .iter() .filter_map(|key| { if key != "_default" { @@ -153,11 +217,11 @@ impl Meta { .collect() } - pub fn get_all_property_names(&self, is_static: bool) -> ArcReadLockedVec { + pub fn get_all_property_names(&self, is_static: bool) -> PublicKeys { if is_static { - self.metadata_mapper.get_keys() + self.metadata_mapper.keys() } else { - self.temporal_prop_mapper.get_keys() + self.temporal_prop_mapper.keys() } } @@ -173,6 +237,7 @@ impl Meta { #[derive(Default, Debug, Serialize, Deserialize)] pub struct PropMapper { id_mapper: DictMapper, + row_size: AtomicUsize, dtypes: Arc>>, } @@ -186,14 +251,38 @@ impl Deref for PropMapper { } impl PropMapper { + pub fn new_with_private_fields( + fields: impl IntoIterator>, + dtypes: impl IntoIterator, + ) -> Self { + let dtypes = Vec::from_iter(dtypes); + let row_size = dtypes.iter().map(|dtype| dtype.est_size()).sum(); + + PropMapper { + id_mapper: DictMapper::new_with_private_fields(fields), + row_size: AtomicUsize::new(row_size), + dtypes: Arc::new(RwLock::new(dtypes)), + } + } + + pub fn d_types(&self) -> impl Deref> + '_ { + self.dtypes.read_recursive() + } + pub fn deep_clone(&self) -> Self { let dtypes = self.dtypes.read_recursive().clone(); Self { id_mapper: self.id_mapper.deep_clone(), + row_size: AtomicUsize::new(self.row_size.load(std::sync::atomic::Ordering::Relaxed)), dtypes: Arc::new(RwLock::new(dtypes)), } } + #[inline] + pub fn row_size(&self) -> usize { + self.row_size.load(atomic::Ordering::Relaxed) + } + pub fn get_id_and_dtype(&self, prop: &str) -> Option<(usize, PropType)> { self.get_id(prop).map(|id| { let existing_dtype = self @@ -244,6 +333,8 @@ impl PropMapper { None => { // vector not resized yet, resize it and set the dtype and return id dtype_write.resize(id + 1, PropType::Empty); + self.row_size + .fetch_add(dtype.est_size(), atomic::Ordering::Relaxed); dtype_write[id] = dtype; Ok(wrapped_id) } @@ -251,11 +342,17 @@ impl PropMapper { } pub fn set_id_and_dtype(&self, key: impl Into, id: usize, dtype: PropType) { - let mut dtypes = self.dtypes.write(); self.set_id(key, id); + self.set_dtype(id, dtype); + } + + pub fn set_dtype(&self, id: usize, dtype: PropType) { + let mut dtypes = self.dtypes.write(); if dtypes.len() <= id { dtypes.resize(id + 1, PropType::Empty); } + self.row_size + .fetch_add(dtype.est_size(), atomic::Ordering::Relaxed); dtypes[id] = dtype; } @@ -263,8 +360,159 @@ impl PropMapper { self.dtypes.read_recursive().get(prop_id).cloned() } - pub fn dtypes(&self) -> impl Deref> + '_ { - self.dtypes.read_recursive() + pub fn locked(&self) -> LockedPropMapper<'_> { + LockedPropMapper { + dict_mapper: self.id_mapper.read(), + d_types: self.dtypes.read_recursive(), + } + } + + pub fn write_locked(&self) -> WriteLockedPropMapper<'_> { + WriteLockedPropMapper { + dict_mapper: self.id_mapper.write(), + d_types: self.dtypes.write(), + } + } +} + +pub struct LockedPropMapper<'a> { + dict_mapper: LockedDictMapper<'a>, + d_types: RwLockReadGuard<'a, Vec>, +} + +pub struct WriteLockedPropMapper<'a> { + dict_mapper: WriteLockedDictMapper<'a>, + d_types: RwLockWriteGuard<'a, Vec>, +} + +impl<'a> WriteLockedPropMapper<'a> { + pub fn get_dtype(&'a self, prop_id: usize) -> Option<&'a PropType> { + self.d_types.get(prop_id) + } + + /// Fast check for property type without unifying the types + /// Returns: + /// - `Some(Either::Left(id))` if the property type can be unified + /// - `Some(Either::Right(id))` if the property type is already set and no unification is needed + /// - `None` if the property type is not set + /// - `Err(PropError::PropertyTypeError)` if the property type cannot be unified + pub fn fast_proptype_check( + &mut self, + prop: &str, + dtype: PropType, + ) -> Result>, PropError> { + fast_proptype_check(self.dict_mapper.map(), &self.d_types, prop, dtype) + } + + pub fn set_id_and_dtype(&mut self, key: impl Into, id: usize, dtype: PropType) { + self.dict_mapper.set_id(key, id); + self.set_dtype(id, dtype); + } + + pub fn set_or_unify_id_and_dtype( + &mut self, + key: impl Into, + id: usize, + dtype: PropType, + ) -> Result<(), PropError> { + self.dict_mapper.set_id(key, id); + self.set_or_unify_dtype(id, dtype) + } + + pub fn set_dtype(&mut self, id: usize, dtype: PropType) { + let dtypes = self.d_types.deref_mut(); + if dtypes.len() <= id { + dtypes.resize(id + 1, PropType::Empty); + } + dtypes[id] = dtype; + } + + pub fn set_or_unify_dtype(&mut self, id: usize, dtype: PropType) -> Result<(), PropError> { + let dtypes = self.d_types.deref_mut(); + match dtypes.get_mut(id) { + None => { + dtypes.resize(id + 1, PropType::Empty); + dtypes[id] = dtype; + } + Some(old_dtype) => { + let mut unified = false; + let unified_type = unify_types(&old_dtype, &dtype, &mut unified)?; + *old_dtype = unified_type; + } + } + Ok(()) + } + + pub fn new_id_and_dtype(&mut self, key: impl Into, dtype: PropType) -> usize { + let id = self.dict_mapper.get_or_create_id(&key.into()); + let dtypes = self.d_types.deref_mut(); + if dtypes.len() <= id.inner() { + dtypes.resize(id.inner() + 1, PropType::Empty); + } + dtypes[id.inner()] = dtype; + id.inner() + } +} + +impl<'a> LockedPropMapper<'a> { + pub fn get_id(&self, prop: &str) -> Option { + self.dict_mapper.get_id(prop) + } + + pub fn get_dtype(&'a self, prop_id: usize) -> Option<&'a PropType> { + self.d_types.get(prop_id) + } + + /// Fast check for property type without unifying the types + /// Returns: + /// - `Some(Either::Left(id))` if the property type can be unified + /// - `Some(Either::Right(id))` if the property type is already set and no unification is needed + /// - `None` if the property type is not set + /// - `Err(PropError::PropertyTypeError)` if the property type cannot be unified + pub fn fast_proptype_check( + &self, + prop: &str, + dtype: PropType, + ) -> Result>, PropError> { + fast_proptype_check(self.dict_mapper.map(), &self.d_types, prop, dtype) + } + + pub fn iter_ids_and_types(&self) -> impl Iterator { + self.dict_mapper + .iter_ids() + .map(move |(id, name)| (id, name, &self.d_types[id])) + } +} + +fn fast_proptype_check( + mapper: &FxHashMap, + d_types: &[PropType], + prop: &str, + dtype: PropType, +) -> Result>, PropError> { + match mapper.get(prop) { + Some(&id) => { + let existing_dtype = d_types + .get(id) + .expect("Existing id should always have a dtype"); + + let fast_check = check_for_unification(&dtype, existing_dtype); + if fast_check.is_none() { + // means nothing to do + return Ok(Some(Either::Right(id))); + } + let can_unify = fast_check.unwrap(); + if can_unify { + Ok(Some(Either::Left(id))) + } else { + Err(PropError { + name: prop.to_string(), + expected: existing_dtype.clone(), + actual: dtype, + }) + } + } + None => Ok(None), } } diff --git a/raphtory-api/src/core/entities/properties/prop/arrow.rs b/raphtory-api/src/core/entities/properties/prop/arrow.rs index 22bc5c1b43..f38f723042 100644 --- a/raphtory-api/src/core/entities/properties/prop/arrow.rs +++ b/raphtory-api/src/core/entities/properties/prop/arrow.rs @@ -1,5 +1,368 @@ -use crate::core::{ - entities::properties::{prop::Prop, prop_array::PropArray}, - PropType, +use std::borrow::Cow; + +use arrow_array::{ + cast::AsArray, types::*, Array, ArrowPrimitiveType, OffsetSizeTrait, StructArray, }; -use std::sync::Arc; +use arrow_schema::{DataType, TimeUnit}; +use chrono::DateTime; +use itertools::Itertools; +use serde::{ser::SerializeMap, Serialize}; + +use crate::core::entities::properties::prop::{Prop, PropArray, PropRef}; + +pub const EMPTY_MAP_FIELD_NAME: &str = "__empty__"; + +#[derive(Debug, Clone, Copy)] +pub struct ArrowRow<'a> { + array: &'a StructArray, + index: usize, +} + +impl<'a> Serialize for ArrowRow<'a> { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + let mut state = serializer.serialize_map(Some(self.array.num_columns()))?; + for col in 0..self.array.num_columns() { + let field = &self.array.fields()[col]; + let key = field.name(); + let value = self.prop_ref(col); + state.serialize_entry(key, &value)?; + } + state.end() + } +} + +impl<'a> ArrowRow<'a> { + pub fn primitive_value(&self, col: usize) -> Option { + let primitive_array = self.array.column(col).as_primitive_opt::()?; + (primitive_array.len() > self.index && !primitive_array.is_null(self.index)) + .then(|| primitive_array.value(self.index)) + } + + fn primitive_dt(&self, col: usize) -> Option<(T::Native, &DataType)> { + let col = self.array.column(col).as_primitive_opt::()?; + (col.len() > self.index && !col.is_null(self.index)) + .then(|| (col.value(self.index), col.data_type())) + } + + fn primitive_prop(&self, col: usize) -> Option { + let (value, dt) = self.primitive_dt::(col)?; + let prop = T::prop(value, dt); + Some(prop) + } + + fn primitive_prop_ref(self, col: usize) -> Option> { + let col = self.array.column(col).as_primitive_opt::()?; + let (value, dt) = (col.len() > self.index && !col.is_null(self.index)) + .then(|| (col.value(self.index), col.data_type()))?; + let prop_ref = T::prop_ref(value, dt); + Some(prop_ref) + } + + fn struct_prop(&self, col: usize) -> Option { + let col = self.array.column(col).as_struct_opt()?; + let row = ArrowRow::new(col, self.index); + if col.len() > self.index && !col.is_null(self.index) { + row.into_prop() + } else { + None + } + } + + fn list_prop(&self, col: usize) -> Option { + let col = self.array.column(col).as_list_opt::()?; + let row = col.value(self.index); + if col.len() > self.index && !col.is_null(self.index) { + Some(row.into()) + } else { + None + } + } + + fn struct_prop_ref(&self, col: usize) -> Option> { + let column = self.array.column(col).as_struct_opt()?; + if self.index < column.len() && column.is_valid(self.index) { + let row = ArrowRow::new(column, self.index); + Some(PropRef::from(row)) + } else { + None + } + } + + fn list_prop_ref(&self, col: usize) -> Option> { + let column = self.array.column(col).as_list_opt::()?; + if self.index < column.len() && column.is_valid(self.index) { + let list_array = column.value(self.index); + Some(PropRef::List(Cow::Owned(PropArray::from(list_array)))) + } else { + None + } + } + + pub fn bool_value(&self, col: usize) -> Option { + let column = self.array.column(col); + match column.data_type() { + DataType::Boolean => { + let col = column.as_boolean(); + (col.len() > self.index && !col.is_null(self.index)).then(|| col.value(self.index)) + } + _ => None, + } + } + + pub fn str_value(self, col: usize) -> Option<&'a str> { + let column = self.array.column(col); + let len = column.len(); + let valid = len > self.index && !column.is_null(self.index); + match column.data_type() { + DataType::Utf8 => valid.then(|| column.as_string::().value(self.index)), + DataType::LargeUtf8 => valid.then(|| column.as_string::().value(self.index)), + DataType::Utf8View => valid.then(|| column.as_string_view().value(self.index)), + _ => None, + } + } + + pub fn prop_value(self, col: usize) -> Option { + let dtype = self.array.fields().get(col)?.data_type(); + match dtype { + DataType::Null => None, + DataType::Boolean => self.bool_value(col).map(|b| b.into()), + DataType::Int32 => self.primitive_prop::(col), + DataType::Int64 => self.primitive_prop::(col), + DataType::UInt8 => self.primitive_prop::(col), + DataType::UInt16 => self.primitive_prop::(col), + DataType::UInt32 => self.primitive_prop::(col), + DataType::UInt64 => self.primitive_prop::(col), + DataType::Float32 => self.primitive_prop::(col), + DataType::Float64 => self.primitive_prop::(col), + DataType::Timestamp(unit, _) => match unit { + TimeUnit::Second => self.primitive_prop::(col), + TimeUnit::Millisecond => self.primitive_prop::(col), + TimeUnit::Microsecond => self.primitive_prop::(col), + TimeUnit::Nanosecond => self.primitive_prop::(col), + }, + DataType::Date32 => self.primitive_prop::(col), + DataType::Date64 => self.primitive_prop::(col), + DataType::Utf8 | DataType::LargeUtf8 | DataType::Utf8View => { + self.str_value(col).map(|v| v.into()) + } + DataType::Decimal128(_, _) => self.primitive_prop::(col), + DataType::Struct(_) => self.struct_prop(col), + DataType::List(_) => self.list_prop::(col), + DataType::LargeList(_) => self.list_prop::(col), + _ => None, + } + } + + pub fn prop_ref(self, col: usize) -> Option> { + let dtype = self.array.fields().get(col)?.data_type(); + match dtype { + DataType::Null => None, + DataType::Boolean => self.bool_value(col).map(|b| b.into()), + DataType::Int32 => self.primitive_prop_ref::(col), + DataType::Int64 => self.primitive_prop_ref::(col), + DataType::UInt8 => self.primitive_prop_ref::(col), + DataType::UInt16 => self.primitive_prop_ref::(col), + DataType::UInt32 => self.primitive_prop_ref::(col), + DataType::UInt64 => self.primitive_prop_ref::(col), + DataType::Float32 => self.primitive_prop_ref::(col), + DataType::Float64 => self.primitive_prop_ref::(col), + DataType::Timestamp(unit, _) => match unit { + TimeUnit::Second => self.primitive_prop_ref::(col), + TimeUnit::Millisecond => self.primitive_prop_ref::(col), + TimeUnit::Microsecond => self.primitive_prop_ref::(col), + TimeUnit::Nanosecond => self.primitive_prop_ref::(col), + }, + DataType::Date32 => self.primitive_prop_ref::(col), + DataType::Date64 => self.primitive_prop_ref::(col), + DataType::Utf8 | DataType::LargeUtf8 | DataType::Utf8View => { + self.str_value(col).map(|v| v.into()) + } + DataType::Decimal128(_, _) => self.primitive_prop_ref::(col), + DataType::Struct(_) => self.struct_prop_ref(col), + DataType::LargeList(_) => self.list_prop_ref(col), + _ => None, + } + } + + pub fn into_prop(self) -> Option { + if self.index >= self.array.len() || self.array.is_null(self.index) { + None + } else { + let map = Prop::map( + self.array + .fields() + .iter() + .enumerate() + .filter_map(|(col, field)| { + Some((field.name().as_ref(), self.prop_value(col)?)) + }), + ); + Some(map) + } + } + + pub fn is_valid(&self, col: usize) -> bool { + let col = self.array.column(col); + !col.data_type().is_null() && col.is_valid(self.index) + } + + pub fn any_valid(&self) -> bool { + self.array + .columns() + .iter() + .any(|col| !col.data_type().is_null() && col.is_valid(self.index)) + } + + pub fn first_valid(&self) -> Option { + self.array + .columns() + .iter() + .find_position(|col| !col.data_type().is_null() && col.is_valid(self.index)) + .map(|(pos, _)| pos) + } +} + +impl<'a> ArrowRow<'a> { + pub fn new(array: &'a StructArray, index: usize) -> Self { + Self { array, index } + } + + pub fn get(&self, column: usize) -> Option<&T> { + self.array.column(column).as_any().downcast_ref() + } +} + +pub trait DirectConvert: ArrowPrimitiveType { + fn prop_ref(native: Self::Native, dtype: &DataType) -> PropRef<'static>; + fn prop(native: Self::Native, dtype: &DataType) -> Prop { + Self::prop_ref(native, dtype).into() + } +} + +impl DirectConvert for UInt8Type { + fn prop_ref(native: Self::Native, _dtype: &DataType) -> PropRef<'static> { + PropRef::from(native) + } +} + +impl DirectConvert for UInt16Type { + fn prop_ref(native: Self::Native, _dtype: &DataType) -> PropRef<'static> { + PropRef::from(native) + } +} + +impl DirectConvert for UInt32Type { + fn prop_ref(native: Self::Native, _dtype: &DataType) -> PropRef<'static> { + PropRef::from(native) + } +} + +impl DirectConvert for UInt64Type { + fn prop_ref(native: Self::Native, _dtype: &DataType) -> PropRef<'static> { + PropRef::from(native) + } +} + +impl DirectConvert for Int32Type { + fn prop_ref(native: Self::Native, _dtype: &DataType) -> PropRef<'static> { + PropRef::from(native) + } +} + +impl DirectConvert for Int64Type { + fn prop_ref(native: Self::Native, _dtype: &DataType) -> PropRef<'static> { + PropRef::from(native) + } +} + +impl DirectConvert for Float32Type { + fn prop_ref(native: Self::Native, _dtype: &DataType) -> PropRef<'static> { + PropRef::from(native) + } +} + +impl DirectConvert for Float64Type { + fn prop_ref(native: Self::Native, _dtype: &DataType) -> PropRef<'static> { + PropRef::from(native) + } +} + +impl DirectConvert for Date64Type { + fn prop_ref(native: Self::Native, _dtype: &DataType) -> PropRef<'static> { + PropRef::from(DateTime::from_timestamp_millis(native).unwrap()) + } +} + +impl DirectConvert for Date32Type { + fn prop_ref(native: Self::Native, _dtype: &DataType) -> PropRef<'static> { + PropRef::from( + Date32Type::to_naive_date(native) + .and_hms_opt(0, 0, 0) + .unwrap() + .and_utc(), + ) + } +} + +impl DirectConvert for TimestampNanosecondType { + fn prop_ref(native: Self::Native, dtype: &DataType) -> PropRef<'static> { + match dtype { + DataType::Timestamp(_, tz) => match tz { + None => PropRef::from(DateTime::from_timestamp_nanos(native).naive_utc()), + Some(_) => PropRef::from(DateTime::from_timestamp_nanos(native)), + }, + _ => unreachable!(), + } + } +} + +impl DirectConvert for TimestampMicrosecondType { + fn prop_ref(native: Self::Native, dtype: &DataType) -> PropRef<'static> { + match dtype { + DataType::Timestamp(_, tz) => match tz { + None => PropRef::from(DateTime::from_timestamp_micros(native).unwrap().naive_utc()), + Some(_) => PropRef::from(DateTime::from_timestamp_micros(native).unwrap()), + }, + _ => unreachable!(), + } + } +} + +impl DirectConvert for TimestampMillisecondType { + fn prop_ref(native: Self::Native, dtype: &DataType) -> PropRef<'static> { + match dtype { + DataType::Timestamp(_, tz) => match tz { + None => PropRef::from(DateTime::from_timestamp_millis(native).unwrap().naive_utc()), + Some(_) => PropRef::from(DateTime::from_timestamp_millis(native).unwrap()), + }, + _ => unreachable!(), + } + } +} + +impl DirectConvert for TimestampSecondType { + fn prop_ref(native: Self::Native, dtype: &DataType) -> PropRef<'static> { + match dtype { + DataType::Timestamp(_, tz) => match tz { + None => PropRef::from(DateTime::from_timestamp(native, 0).unwrap().naive_utc()), + Some(_) => PropRef::from(DateTime::from_timestamp(native, 0).unwrap()), + }, + _ => unreachable!(), + } + } +} + +impl DirectConvert for Decimal128Type { + fn prop_ref(native: Self::Native, dtype: &DataType) -> PropRef<'static> { + match dtype { + DataType::Decimal128(_, scale) => PropRef::Decimal { + num: native, + scale: *scale as i8, + }, + _ => unreachable!(), + } + } +} diff --git a/raphtory-api/src/core/entities/properties/prop/mod.rs b/raphtory-api/src/core/entities/properties/prop/mod.rs index 3b449d5059..4f563cdf57 100644 --- a/raphtory-api/src/core/entities/properties/prop/mod.rs +++ b/raphtory-api/src/core/entities/properties/prop/mod.rs @@ -1,6 +1,8 @@ -#[cfg(feature = "arrow")] -mod prop_array; +pub mod arrow; +pub mod prop_array; +pub mod prop_col; mod prop_enum; +mod prop_ref_enum; mod prop_type; mod prop_unwrap; #[cfg(feature = "io")] @@ -9,8 +11,10 @@ mod serde; #[cfg(feature = "template")] mod template; -#[cfg(feature = "arrow")] +pub use arrow::*; + pub use prop_array::*; pub use prop_enum::*; +pub use prop_ref_enum::*; pub use prop_type::*; pub use prop_unwrap::*; diff --git a/raphtory-api/src/core/entities/properties/prop/prop_array.rs b/raphtory-api/src/core/entities/properties/prop/prop_array.rs index 8ab7ee0676..e2489cb024 100644 --- a/raphtory-api/src/core/entities/properties/prop/prop_array.rs +++ b/raphtory-api/src/core/entities/properties/prop/prop_array.rs @@ -1,52 +1,62 @@ use crate::{ - core::entities::properties::prop::{Prop, PropType}, + core::entities::properties::prop::{ + unify_types, ArrowRow, DirectConvert, Prop, PropType, EMPTY_MAP_FIELD_NAME, + }, iter::{BoxedLIter, IntoDynBoxed}, }; use arrow_array::{ - cast::AsArray, - types::{ - Float32Type, Float64Type, Int32Type, Int64Type, UInt16Type, UInt32Type, UInt64Type, - UInt8Type, - }, - Array, ArrayRef, ArrowPrimitiveType, PrimitiveArray, RecordBatch, + cast::AsArray, types::*, Array, ArrayRef, ArrowPrimitiveType, OffsetSizeTrait, PrimitiveArray, + RecordBatch, }; -use arrow_ipc::{reader::StreamReader, writer::StreamWriter}; -use arrow_schema::{ArrowError, DataType, Field, Fields, Schema}; -use serde::{Deserialize, Serialize, Serializer}; +use arrow_ipc::{reader::FileReader, writer::FileWriter}; +use arrow_schema::{DataType, Field, Fields, Schema, TimeUnit}; +use serde::{de, ser, Deserialize, Deserializer, Serialize, Serializer}; use std::{ hash::{Hash, Hasher}, + io::Cursor, sync::Arc, }; -use thiserror::Error; -#[derive(Default, Debug, Clone)] +#[derive(Debug, Clone, derive_more::From)] pub enum PropArray { - #[default] - Empty, + Vec(Arc<[Prop]>), Array(ArrayRef), } -#[derive(Error, Debug)] -pub enum DeserialisationError { - #[error("Failed to deserialize ArrayRef")] - DeserialisationError, - #[error(transparent)] - ArrowError(#[from] ArrowError), +#[derive(Debug, Clone, Deserialize, Serialize)] +enum SerializedPropArray { + Vec(Arc<[Prop]>), + Array(Vec), +} + +impl Default for PropArray { + fn default() -> Self { + PropArray::Vec(vec![].into()) + } +} + +impl From> for PropArray { + fn from(vec: Vec) -> Self { + PropArray::Vec(Arc::from(vec)) + } } impl Hash for PropArray { fn hash(&self, state: &mut H) { - if let PropArray::Array(array) = self { - let data = array.to_data(); - let dtype = array.data_type(); - dtype.hash(state); - data.offset().hash(state); - data.len().hash(state); - for buffer in data.buffers() { - buffer.hash(state); + match self { + PropArray::Array(array) => { + let data = array.to_data(); + let dtype = array.data_type(); + dtype.hash(state); + data.offset().hash(state); + data.len().hash(state); + for buffer in data.buffers() { + buffer.hash(state); + } + } + PropArray::Vec(ps) => { + ps.hash(state); } - } else { - PropArray::Empty.hash(state); } } } @@ -55,48 +65,32 @@ impl PropArray { pub fn len(&self) -> usize { match self { PropArray::Array(arr) => arr.len(), - PropArray::Empty => 0, + PropArray::Vec(ps) => ps.len(), } } pub fn is_empty(&self) -> bool { match self { - PropArray::Empty => true, + PropArray::Vec(ps) => ps.is_empty(), PropArray::Array(arr) => arr.is_empty(), } } pub fn dtype(&self) -> PropType { match self { - PropArray::Empty => PropType::Empty, + PropArray::Vec(ps) if ps.is_empty() => PropType::Empty, + PropArray::Vec(ps) => ps + .iter() + .map(|p| p.dtype()) + .reduce(|dt1, dt2| { + unify_types(&dt1, &dt2, &mut false) + .unwrap_or_else(|e| panic!("Failed to unify props {e}")) + }) + .unwrap(), PropArray::Array(a) => PropType::from(a.data_type()), } } - pub fn to_vec_u8(&self) -> Vec { - // assuming we can allocate this can't fail - let mut bytes = vec![]; - if let PropArray::Array(value) = self { - let schema = Schema::new(vec![Field::new("data", value.data_type().clone(), true)]); - let mut writer = StreamWriter::try_new(&mut bytes, &schema).unwrap(); - let rb = RecordBatch::try_new(schema.into(), vec![value.clone()]).unwrap(); - writer.write(&rb).unwrap(); - writer.finish().unwrap(); - } - bytes - } - - pub fn from_vec_u8(bytes: &[u8]) -> Result { - if bytes.is_empty() { - return Ok(PropArray::Empty); - } - let mut reader = StreamReader::try_new(bytes, None)?; - let rb = reader - .next() - .ok_or(DeserialisationError::DeserialisationError)??; - Ok(PropArray::Array(rb.column(0).clone())) - } - pub fn into_array_ref(self) -> Option { match self { PropArray::Array(arr) => Some(arr), @@ -111,97 +105,189 @@ impl PropArray { } } - pub fn iter_prop(&self) -> impl Iterator + '_ { - self.iter_prop_inner().into_iter().flatten() + // TODO: need something that returns PropRef instead to avoid allocations + pub fn iter(&self) -> impl Iterator + '_ { + self.iter_all().flatten() } - fn iter_prop_inner(&self) -> Option> { - let arr = self.as_array_ref()?; + pub fn iter_all(&self) -> BoxedLIter<'_, Option> { + match self { + PropArray::Vec(ps) => ps.iter().cloned().map(Some).into_dyn_boxed(), + PropArray::Array(arr) => { + let dtype = arr.data_type(); + match dtype { + DataType::Boolean => arr + .as_boolean() + .iter() + .map(|p| p.map(Prop::Bool)) + .into_dyn_boxed(), + DataType::Int32 => as_primitive_iter::(arr), + DataType::Int64 => as_primitive_iter::(arr), + DataType::UInt8 => as_primitive_iter::(arr), + DataType::UInt16 => as_primitive_iter::(arr), + DataType::UInt32 => as_primitive_iter::(arr), + DataType::UInt64 => as_primitive_iter::(arr), + DataType::Float32 => as_primitive_iter::(arr), + DataType::Float64 => as_primitive_iter::(arr), + DataType::Timestamp(unit, _) => match unit { + TimeUnit::Second => as_primitive_iter::(arr), + TimeUnit::Millisecond => as_primitive_iter::(arr), + TimeUnit::Microsecond => as_primitive_iter::(arr), + TimeUnit::Nanosecond => as_primitive_iter::(arr), + }, + DataType::Date32 => as_primitive_iter::(arr), + DataType::Date64 => as_primitive_iter::(arr), + DataType::Utf8 | DataType::LargeUtf8 | DataType::Utf8View => as_str_iter(arr), + DataType::Decimal128(_, _) => as_primitive_iter::(arr), + DataType::Struct(_) => as_struct_iter(arr), + DataType::List(_) => as_list_iter::(arr), + DataType::LargeList(_) => as_list_iter::(arr), + _ => std::iter::empty().into_dyn_boxed(), + } + } + } + } +} - arr.as_primitive_opt::() - .map(|arr| { - arr.into_iter() - .map(|v| Prop::I32(v.unwrap_or_default())) - .into_dyn_boxed() - }) - .or_else(|| { - arr.as_primitive_opt::().map(|arr| { - arr.into_iter() - .map(|v| Prop::F64(v.unwrap_or_default())) - .into_dyn_boxed() - }) - }) - .or_else(|| { - arr.as_primitive_opt::().map(|arr| { - arr.into_iter() - .map(|v| Prop::F32(v.unwrap_or_default())) - .into_dyn_boxed() - }) - }) - .or_else(|| { - arr.as_primitive_opt::().map(|arr| { - arr.into_iter() - .map(|v| Prop::U64(v.unwrap_or_default())) - .into_dyn_boxed() - }) - }) - .or_else(|| { - arr.as_primitive_opt::().map(|arr| { - arr.into_iter() - .map(|v| Prop::U32(v.unwrap_or_default())) - .into_dyn_boxed() - }) - }) - .or_else(|| { - arr.as_primitive_opt::().map(|arr| { - arr.into_iter() - .map(|v| Prop::I64(v.unwrap_or_default())) - .into_dyn_boxed() - }) - }) - .or_else(|| { - arr.as_primitive_opt::().map(|arr| { - arr.into_iter() - .map(|v| Prop::U16(v.unwrap_or_default())) - .into_dyn_boxed() - }) - }) - .or_else(|| { - arr.as_primitive_opt::().map(|arr| { - arr.into_iter() - .map(|v| Prop::U8(v.unwrap_or_default())) - .into_dyn_boxed() - }) - }) +fn as_primitive_iter(arr: &ArrayRef) -> BoxedLIter<'_, Option> { + arr.as_primitive_opt::() + .into_iter() + .flat_map(|primitive_array| { + let dt = arr.data_type(); + primitive_array.iter().map(|v| v.map(|v| TT::prop(v, dt))) + }) + .into_dyn_boxed() +} + +fn as_str_iter(arr: &ArrayRef) -> BoxedLIter<'_, Option> { + match arr.data_type() { + DataType::Utf8 => arr + .as_string::() + .into_iter() + .map(|opt_str| opt_str.map(|s| Prop::str(s.to_string()))) + .into_dyn_boxed(), + DataType::LargeUtf8 => arr + .as_string::() + .into_iter() + .map(|opt_str| opt_str.map(|s| Prop::str(s.to_string()))) + .into_dyn_boxed(), + DataType::Utf8View => arr + .as_string_view() + .into_iter() + .map(|opt_str| opt_str.map(|s| Prop::str(s.to_string()))) + .into_dyn_boxed(), + _ => panic!("as_str_iter called on non-string array"), } } +fn as_struct_iter(arr: &ArrayRef) -> BoxedLIter<'_, Option> { + let arr = arr.as_struct(); + (0..arr.len()) + .map(|row| (!arr.is_null(row)).then(|| ArrowRow::new(arr, row))) + .map(|arrow_row| arrow_row.and_then(|row| row.into_prop())) + .into_dyn_boxed() +} + +fn as_list_iter(arr: &ArrayRef) -> BoxedLIter<'_, Option> { + let arr = arr.as_list::(); + (0..arr.len()) + .map(|i| { + if arr.is_null(i) { + None + } else { + let value_array = arr.value(i); + let prop_array = PropArray::Array(value_array); + Some(Prop::List(prop_array)) + } + }) + .into_dyn_boxed() +} + impl Serialize for PropArray { fn serialize(&self, serializer: S) -> Result where S: Serializer, { - let bytes = self.to_vec_u8(); - bytes.serialize(serializer) + let serializable = match self { + PropArray::Vec(inner) => SerializedPropArray::Vec(inner.clone()), + PropArray::Array(array) => { + let mut bytes = Vec::new(); + let cursor = Cursor::new(&mut bytes); + let schema = + Schema::new(vec![Field::new("value", array.data_type().clone(), true)]); + let mut writer = FileWriter::try_new(cursor, &schema) + .map_err(|err| ser::Error::custom(err.to_string()))?; + let batch = RecordBatch::try_new(schema.into(), vec![array.clone()]) + .map_err(|err| ser::Error::custom(err.to_string()))?; + writer + .write(&batch) + .map_err(|err| ser::Error::custom(err.to_string()))?; + writer + .finish() + .map_err(|err| ser::Error::custom(err.to_string()))?; + SerializedPropArray::Array(bytes) + } + }; + serializable.serialize(serializer) } } impl<'de> Deserialize<'de> for PropArray { fn deserialize(deserializer: D) -> Result where - D: serde::Deserializer<'de>, + D: Deserializer<'de>, { - let bytes = Vec::::deserialize(deserializer)?; - PropArray::from_vec_u8(&bytes).map_err(serde::de::Error::custom) + let data = SerializedPropArray::deserialize(deserializer)?; + let deserialized = match data { + SerializedPropArray::Vec(res) => PropArray::Vec(res), + SerializedPropArray::Array(bytes) => { + let cursor = Cursor::new(bytes); + let mut reader = FileReader::try_new(cursor, None) + .map_err(|err| de::Error::custom(err.to_string()))?; + let batch = reader.next().ok_or_else(|| { + de::Error::custom( + "Failed to deserialize PropArray: Array data missing.".to_owned(), + ) + })?; + let batch = batch.map_err(|err| de::Error::custom(err.to_string()))?; + let (_, arrays, _) = batch.into_parts(); + let array = arrays.into_iter().next().ok_or_else(|| { + de::Error::custom( + "Failed to deserialize PropArray: Array data missing.".to_owned(), + ) + })?; + PropArray::Array(array) + } + }; + Ok(deserialized) } } impl PartialEq for PropArray { fn eq(&self, other: &Self) -> bool { + self.len() == other.len() && self.iter_all().eq(other.iter_all()) + } +} + +impl PartialOrd for PropArray { + fn partial_cmp(&self, other: &Self) -> Option { match (self, other) { - (PropArray::Empty, PropArray::Empty) => true, - (PropArray::Array(a), PropArray::Array(b)) => a.eq(b), - _ => false, + (PropArray::Vec(l), PropArray::Vec(r)) => l.partial_cmp(r), + _ => { + let mut l_iter = self.iter_all(); + let mut r_iter = other.iter_all(); + loop { + match (l_iter.next(), r_iter.next()) { + (Some(lv), Some(rv)) => match lv.partial_cmp(&rv) { + Some(std::cmp::Ordering::Equal) => continue, + other => return other, + }, + (None, None) => return Some(std::cmp::Ordering::Equal), + (None, Some(_)) => return Some(std::cmp::Ordering::Less), + (Some(_), None) => return Some(std::cmp::Ordering::Greater), + } + } + } } } } @@ -212,13 +298,13 @@ impl Prop { PrimitiveArray: From>, { let array = PrimitiveArray::::from(vals); - Prop::Array(PropArray::Array(Arc::new(array))) + Prop::List(PropArray::Array(Arc::new(array))) } } pub fn arrow_dtype_from_prop_type(prop_type: &PropType) -> DataType { match prop_type { - PropType::Str => DataType::LargeUtf8, + PropType::Str => DataType::Utf8View, PropType::U8 => DataType::UInt8, PropType::U16 => DataType::UInt16, PropType::I32 => DataType::Int32, @@ -232,12 +318,8 @@ pub fn arrow_dtype_from_prop_type(prop_type: &PropType) -> DataType { PropType::DTime => { DataType::Timestamp(arrow_schema::TimeUnit::Millisecond, Some("UTC".into())) } - PropType::Array(d_type) => { - DataType::List(Field::new("data", arrow_dtype_from_prop_type(d_type), true).into()) - } - PropType::List(d_type) => { - DataType::List(Field::new("data", arrow_dtype_from_prop_type(d_type), true).into()) + DataType::LargeList(Field::new("data", arrow_dtype_from_prop_type(d_type), true).into()) } PropType::Map(d_type) => { let fields = d_type @@ -246,7 +328,7 @@ pub fn arrow_dtype_from_prop_type(prop_type: &PropType) -> DataType { .collect::>(); if fields.is_empty() { DataType::Struct(Fields::from_iter([Field::new( - "__empty__", + EMPTY_MAP_FIELD_NAME, DataType::Null, true, )])) @@ -263,29 +345,6 @@ pub fn arrow_dtype_from_prop_type(prop_type: &PropType) -> DataType { } } -pub fn prop_type_from_arrow_dtype(arrow_dtype: &DataType) -> PropType { - match arrow_dtype { - DataType::LargeUtf8 | DataType::Utf8 | DataType::Utf8View => PropType::Str, - DataType::UInt8 => PropType::U8, - DataType::UInt16 => PropType::U16, - DataType::Int32 => PropType::I32, - DataType::Int64 => PropType::I64, - DataType::UInt32 => PropType::U32, - DataType::UInt64 => PropType::U64, - DataType::Float32 => PropType::F32, - DataType::Float64 => PropType::F64, - DataType::Boolean => PropType::Bool, - DataType::Decimal128(_, scale) => PropType::Decimal { - scale: *scale as i64, - }, - DataType::List(field) => { - let d_type = field.data_type(); - PropType::Array(Box::new(prop_type_from_arrow_dtype(d_type))) - } - _ => panic!("{:?} not supported as disk_graph property", arrow_dtype), - } -} - pub trait PropArrayUnwrap: Sized { fn into_array(self) -> Option; fn unwrap_array(self) -> ArrayRef { @@ -301,10 +360,35 @@ impl PropArrayUnwrap for Option

{ impl PropArrayUnwrap for Prop { fn into_array(self) -> Option { - if let Prop::Array(v) = self { + if let Prop::List(v) = self { v.into_array_ref() } else { None } } } + +#[cfg(test)] +mod test { + use crate::core::entities::properties::prop::{Prop, PropArray}; + use arrow_array::Int64Array; + use std::sync::Arc; + + #[test] + fn test_prop_array_json() { + let array = PropArray::Array(Arc::new(Int64Array::from(vec![0, 1, 2]))); + let json = serde_json::to_string(&array).unwrap(); + println!("{json}"); + let recovered: PropArray = serde_json::from_str(&json).unwrap(); + assert_eq!(array, recovered); + } + + #[test] + fn test_prop_array_list_json() { + let array = PropArray::Vec([Prop::U64(1), Prop::U64(2)].into()); + let json = serde_json::to_string(&array).unwrap(); + println!("{json}"); + let recovered: PropArray = serde_json::from_str(&json).unwrap(); + assert_eq!(array, recovered); + } +} diff --git a/raphtory-api/src/core/entities/properties/prop/prop_col.rs b/raphtory-api/src/core/entities/properties/prop/prop_col.rs new file mode 100644 index 0000000000..aaff467cca --- /dev/null +++ b/raphtory-api/src/core/entities/properties/prop/prop_col.rs @@ -0,0 +1,622 @@ +use crate::{ + core::{ + entities::properties::prop::{IntoPropList, Prop, PropArray, PropMapRef, PropNum, PropRef}, + storage::arc_str::ArcStr, + }, + iter::IntoDynBoxed, +}; +use arrow_array::{ + cast::AsArray, + types::{ + Date32Type, Date64Type, Decimal128Type, Float32Type, Float64Type, Int32Type, Int64Type, + TimestampMicrosecondType, TimestampMillisecondType, TimestampNanosecondType, + TimestampSecondType, UInt16Type, UInt32Type, UInt64Type, UInt8Type, + }, + Array, ArrayRef, ArrowPrimitiveType, BooleanArray, Decimal128Array, FixedSizeListArray, + GenericListArray, GenericStringArray, NullArray, OffsetSizeTrait, PrimitiveArray, + StringViewArray, StructArray, +}; +use arrow_buffer::NullBuffer; +use arrow_schema::{DataType, Field, TimeUnit}; +use bigdecimal::BigDecimal; +use chrono::{DateTime, Utc}; +use rustc_hash::FxHashMap; +use std::{borrow::Cow, sync::Arc}; + +pub trait PropCol: Send + Sync + std::fmt::Debug { + fn get(&self, i: usize) -> Option; + + fn get_ref(&self, i: usize) -> Option>; + + fn as_array(&self) -> ArrayRef; + + fn iter(&self) -> Box> + '_> { + (0..self.as_array().len()) + .map(move |i| self.get(i)) + .into_dyn_boxed() + } + + fn iter_ref(&self) -> Box>> + '_> { + (0..self.as_array().len()) + .map(move |i| self.get_ref(i)) + .into_dyn_boxed() + } +} + +#[derive(Debug)] +pub struct MapCol { + validity: Option, + values: Vec<(String, Box)>, +} + +impl MapCol { + fn new(arr: &StructArray) -> Self { + let validity = arr.nulls().cloned(); + let values = arr + .fields() + .iter() + .zip(arr.columns()) + .map(|(field, col)| (field.name().clone(), lift_property_col(col.as_ref()))) + .collect(); + Self { validity, values } + } +} +impl PropCol for MapCol { + fn get(&self, i: usize) -> Option { + if self + .validity + .as_ref() + .is_none_or(|validity| validity.is_valid(i)) + { + Some(Prop::map(self.values.iter().filter_map(|(field, col)| { + Some((field.as_str(), col.get(i)?)) + }))) + } else { + None + } + } + + fn get_ref(&self, i: usize) -> Option> { + if self + .validity + .as_ref() + .is_none_or(|validity| validity.is_valid(i)) + { + Some(PropRef::Map(PropMapRef::PropCol { map: self, i })) + } else { + None + } + } + + fn as_array(&self) -> ArrayRef { + let fields = self + .values + .iter() + .map(|(name, col)| Field::new(name, col.as_array().data_type().clone(), true)) + .collect::>(); + let columns = self.values.iter().map(|(_, col)| col.as_array()).collect(); + Arc::new(StructArray::new( + fields.into(), + columns, + self.validity.clone(), + )) + } +} + +impl PropCol for BooleanArray { + fn get(&self, i: usize) -> Option { + if self.is_null(i) || self.len() <= i { + None + } else { + Some(Prop::Bool(self.value(i))) + } + } + + fn get_ref(&self, i: usize) -> Option> { + if self.is_null(i) || self.len() <= i { + None + } else { + Some(PropRef::Bool(self.value(i))) + } + } + + fn as_array(&self) -> ArrayRef { + Arc::new(self.clone()) + } + + fn iter(&self) -> Box> + '_> { + self.iter().map(|opt| opt.map(Prop::Bool)).into_dyn_boxed() + } +} + +impl PropCol for PrimitiveArray +where + T::Native: Into + Into, +{ + fn get(&self, i: usize) -> Option { + if self.is_null(i) || self.len() <= i { + None + } else { + Some(self.value(i).into()) + } + } + + fn get_ref(&self, i: usize) -> Option> { + if self.is_null(i) || self.len() <= i { + None + } else { + Some(PropRef::Num(self.value(i).into())) + } + } + + fn as_array(&self) -> ArrayRef { + Arc::new(self.clone()) + } + + fn iter(&self) -> Box> + '_> { + self.iter() + .map(|opt| opt.map(|v| v.into())) + .into_dyn_boxed() + } +} + +impl PropCol for GenericStringArray { + fn get(&self, i: usize) -> Option { + if self.is_null(i) || self.len() <= i { + None + } else { + Some(Prop::str(self.value(i))) + } + } + + fn get_ref(&self, i: usize) -> Option> { + if self.is_null(i) || self.len() <= i { + None + } else { + Some(PropRef::Str(self.value(i))) + } + } + fn as_array(&self) -> ArrayRef { + Arc::new(self.clone()) + } + + fn iter(&self) -> Box> + '_> { + self.iter().map(|opt| opt.map(Prop::str)).into_dyn_boxed() + } +} + +impl PropCol for StringViewArray { + fn get(&self, i: usize) -> Option { + if self.is_null(i) || self.len() <= i { + None + } else { + Some(Prop::str(self.value(i))) + } + } + + fn get_ref(&self, i: usize) -> Option> { + if self.is_null(i) || self.len() <= i { + None + } else { + Some(PropRef::Str(self.value(i))) + } + } + fn as_array(&self) -> ArrayRef { + Arc::new(self.clone()) + } + + fn iter(&self) -> Box> + '_> { + self.iter().map(|opt| opt.map(Prop::str)).into_dyn_boxed() + } +} + +impl PropCol for GenericListArray { + fn get(&self, i: usize) -> Option { + if i >= self.len() || self.is_null(i) { + None + } else { + Some(arr_as_prop(self.value(i))) + } + } + + fn get_ref(&self, i: usize) -> Option> { + if self.is_null(i) || self.len() <= i { + None + } else { + Some(PropRef::List(Cow::Owned(self.value(i).into()))) + } + } + fn as_array(&self) -> ArrayRef { + Arc::new(self.clone()) + } +} + +impl PropCol for FixedSizeListArray { + fn get(&self, i: usize) -> Option { + if i >= self.len() || self.is_null(i) { + None + } else { + Some(arr_as_prop(self.value(i))) + } + } + + fn get_ref(&self, i: usize) -> Option> { + if self.is_null(i) || self.len() <= i { + None + } else { + Some(PropRef::List(Cow::Owned(self.value(i).into()))) + } + } + fn as_array(&self) -> ArrayRef { + Arc::new(self.clone()) + } +} + +impl PropCol for NullArray { + fn get(&self, _i: usize) -> Option { + None + } + + fn get_ref(&self, _i: usize) -> Option> { + None + } + fn as_array(&self) -> ArrayRef { + Arc::new(self.clone()) + } +} + +#[derive(Debug)] +struct MappedPrimitiveCol { + arr: PrimitiveArray, + map: fn(T::Native) -> PropRef<'static>, +} + +impl PropCol for MappedPrimitiveCol { + fn get(&self, i: usize) -> Option { + self.get_ref(i).map(|p_ref| p_ref.into()) + } + + fn get_ref(&self, i: usize) -> Option> { + if i >= self.arr.len() || self.arr.is_null(i) { + None + } else { + Some((self.map)(self.arr.value(i))) + } + } + + fn as_array(&self) -> ArrayRef { + Arc::new(self.arr.clone()) + } +} + +#[derive(Debug)] +struct DecimalPropCol { + arr: Decimal128Array, + scale: i64, +} + +impl PropCol for DecimalPropCol { + fn get(&self, i: usize) -> Option { + if i >= self.arr.len() || self.arr.is_null(i) { + None + } else { + Some(Prop::Decimal(BigDecimal::new( + self.arr.value(i).into(), + self.scale, + ))) + } + } + + fn get_ref(&self, i: usize) -> Option> { + if i >= self.arr.len() || self.arr.is_null(i) { + None + } else { + Some(PropRef::Decimal { + num: self.arr.value(i).into(), + scale: self.scale as i8, + }) + } + } + + fn as_array(&self) -> ArrayRef { + Arc::new(self.arr.clone()) + } +} + +#[derive(Debug)] +struct EmptyCol; + +impl PropCol for EmptyCol { + fn get(&self, _i: usize) -> Option { + None + } + + fn get_ref(&self, _i: usize) -> Option> { + None + } + + fn as_array(&self) -> ArrayRef { + Arc::new(NullArray::new(0)) + } +} +pub fn lift_property_col(arr: &dyn Array) -> Box { + match arr.data_type() { + DataType::Boolean => Box::new(arr.as_boolean().clone()), + DataType::Int32 => Box::new(arr.as_primitive::().clone()), + DataType::Int64 => Box::new(arr.as_primitive::().clone()), + DataType::UInt8 => Box::new(arr.as_primitive::().clone()), + DataType::UInt16 => Box::new(arr.as_primitive::().clone()), + DataType::UInt32 => Box::new(arr.as_primitive::().clone()), + DataType::UInt64 => Box::new(arr.as_primitive::().clone()), + DataType::Float32 => Box::new(arr.as_primitive::().clone()), + DataType::Float64 => Box::new(arr.as_primitive::().clone()), + DataType::Utf8 => Box::new(arr.as_string::().clone()), + DataType::LargeUtf8 => Box::new(arr.as_string::().clone()), + DataType::Utf8View => Box::new(arr.as_string_view().clone()), + DataType::List(_) => Box::new(arr.as_list::().clone()), + DataType::LargeList(_) => Box::new(arr.as_list::().clone()), + DataType::FixedSizeList(_, _) => Box::new(arr.as_fixed_size_list().clone()), + DataType::Struct(_) => Box::new(MapCol::new(arr.as_struct())), + DataType::Timestamp(timeunit, timezone) => match timezone { + Some(_) => match timeunit { + TimeUnit::Second => Box::new(MappedPrimitiveCol { + arr: arr.as_primitive::().clone(), + map: |v| { + PropRef::DTime( + DateTime::::from_timestamp(v, 0) + .expect("DateTime conversion failed"), + ) + }, + }), + TimeUnit::Millisecond => Box::new(MappedPrimitiveCol { + arr: arr.as_primitive::().clone(), + map: |v| { + PropRef::DTime( + DateTime::::from_timestamp_millis(v) + .expect("DateTime conversion failed"), + ) + }, + }), + TimeUnit::Microsecond => Box::new(MappedPrimitiveCol { + arr: arr.as_primitive::().clone(), + map: |v| { + PropRef::DTime( + DateTime::::from_timestamp_micros(v) + .expect("DateTime conversion failed"), + ) + }, + }), + TimeUnit::Nanosecond => Box::new(MappedPrimitiveCol { + arr: arr.as_primitive::().clone(), + map: |v| PropRef::DTime(DateTime::::from_timestamp_nanos(v)), + }), + }, + None => match timeunit { + TimeUnit::Second => Box::new(MappedPrimitiveCol { + arr: arr.as_primitive::().clone(), + map: |v| { + PropRef::NDTime( + DateTime::from_timestamp(v, 0) + .expect("DateTime conversion failed") + .naive_utc(), + ) + }, + }), + TimeUnit::Millisecond => Box::new(MappedPrimitiveCol { + arr: arr.as_primitive::().clone(), + map: |v| { + PropRef::NDTime( + DateTime::from_timestamp_millis(v) + .expect("DateTime conversion failed") + .naive_utc(), + ) + }, + }), + TimeUnit::Microsecond => Box::new(MappedPrimitiveCol { + arr: arr.as_primitive::().clone(), + map: |v| { + PropRef::NDTime( + DateTime::from_timestamp_micros(v) + .expect("DateTime conversion failed") + .naive_utc(), + ) + }, + }), + TimeUnit::Nanosecond => Box::new(MappedPrimitiveCol { + arr: arr.as_primitive::().clone(), + map: |v| PropRef::NDTime(DateTime::from_timestamp_nanos(v).naive_utc()), + }), + }, + }, + DataType::Date32 => Box::new(MappedPrimitiveCol { + arr: arr.as_primitive::().clone(), + map: |days| { + let ms = (days as i64) * 86_400_000; // convert days to ms + PropRef::NDTime( + DateTime::from_timestamp_millis(ms) + .expect("DateTime conversion failed for Date32 type") + .naive_utc(), + ) + }, + }), + DataType::Date64 => Box::new(MappedPrimitiveCol { + arr: arr.as_primitive::().clone(), + map: |ms| { + PropRef::NDTime( + DateTime::from_timestamp_millis(ms) + .expect("DateTime conversion failed for Date64 type") + .naive_utc(), + ) + }, + }), + DataType::Decimal128(precision, scale) if *precision <= 38 => { + let arr = arr.as_primitive::().clone(); + Box::new(DecimalPropCol { + arr, + scale: *scale as i64, + }) + } + DataType::Null => Box::new(EmptyCol), + + unsupported => panic!("Data type not supported: {:?}", unsupported), + } +} +fn arr_as_prop(arr: ArrayRef) -> Prop { + match arr.data_type() { + DataType::Boolean => { + let arr = arr.as_boolean(); + arr.iter().flatten().into_prop_list() + } + DataType::Int32 => { + let arr = arr.as_primitive::(); + arr.iter().flatten().into_prop_list() + } + DataType::Int64 => { + let arr = arr.as_primitive::(); + arr.iter().flatten().into_prop_list() + } + DataType::UInt8 => { + let arr = arr.as_primitive::(); + arr.iter().flatten().into_prop_list() + } + DataType::UInt16 => { + let arr = arr.as_primitive::(); + arr.iter().flatten().into_prop_list() + } + DataType::UInt32 => { + let arr = arr.as_primitive::(); + arr.iter().flatten().into_prop_list() + } + DataType::UInt64 => { + let arr = arr.as_primitive::(); + arr.iter().flatten().into_prop_list() + } + DataType::Float32 => { + let arr = arr.as_primitive::(); + arr.iter().flatten().into_prop_list() + } + DataType::Float64 => { + let arr = arr.as_primitive::(); + arr.iter().flatten().into_prop_list() + } + DataType::Utf8 => { + let arr = arr.as_string::(); + arr.iter().flatten().into_prop_list() + } + DataType::LargeUtf8 => { + let arr = arr.as_string::(); + arr.iter().flatten().into_prop_list() + } + DataType::Utf8View => { + let arr = arr.as_string_view(); + arr.iter().flatten().into_prop_list() + } + DataType::List(_) => { + let arr = arr.as_list::(); + arr.iter().flatten().map(arr_as_prop).into_prop_list() + } + DataType::FixedSizeList(_, _) => { + let arr = arr.as_fixed_size_list(); + arr.iter().flatten().map(arr_as_prop).into_prop_list() + } + DataType::LargeList(_) => { + let arr = arr.as_list::(); + arr.iter().flatten().map(arr_as_prop).into_prop_list() + } + DataType::Timestamp(TimeUnit::Second, tz) => { + let map_fn = if tz.is_some() { + |elem: i64| Prop::DTime(DateTime::::from_timestamp_secs(elem).unwrap()) + } else { + |elem: i64| Prop::NDTime(DateTime::from_timestamp_secs(elem).unwrap().naive_utc()) + }; + let arr = arr.as_primitive::(); + arr.iter().flatten().map(map_fn).into_prop_list() + } + DataType::Timestamp(TimeUnit::Millisecond, tz) => { + let map_fn = if tz.is_some() { + |elem: i64| Prop::DTime(DateTime::::from_timestamp_millis(elem).unwrap()) + } else { + |elem: i64| Prop::NDTime(DateTime::from_timestamp_millis(elem).unwrap().naive_utc()) + }; + let arr = arr.as_primitive::(); + arr.iter().flatten().map(map_fn).into_prop_list() + } + DataType::Timestamp(TimeUnit::Microsecond, tz) => { + let map_fn = if tz.is_some() { + |elem: i64| Prop::DTime(DateTime::::from_timestamp_micros(elem).unwrap()) + } else { + |elem: i64| Prop::NDTime(DateTime::from_timestamp_micros(elem).unwrap().naive_utc()) + }; + let arr = arr.as_primitive::(); + arr.iter().flatten().map(map_fn).into_prop_list() + } + DataType::Timestamp(TimeUnit::Nanosecond, tz) => { + let map_fn = if tz.is_some() { + |elem: i64| Prop::DTime(DateTime::::from_timestamp_nanos(elem)) + } else { + |elem: i64| Prop::NDTime(DateTime::from_timestamp_nanos(elem).naive_utc()) + }; + let arr = arr.as_primitive::(); + arr.iter().flatten().map(map_fn).into_prop_list() + } + DataType::Date32 => { + let arr = arr.as_primitive::(); + arr.iter() + .flatten() + .map(|days| { + let ms = (days as i64) * 86_400_000; + Prop::NDTime( + DateTime::from_timestamp_millis(ms) + .expect("DateTime conversion failed for Date32 type") + .naive_utc(), + ) + }) + .into_prop_list() + } + DataType::Date64 => { + let arr = arr.as_primitive::(); + arr.iter() + .flatten() + .map(|ms| { + Prop::NDTime( + DateTime::from_timestamp_millis(ms) + .expect("DateTime conversion failed for Date64 type") + .naive_utc(), + ) + }) + .into_prop_list() + } + DataType::Struct(_) => { + let arr = arr.as_struct(); + let cols = arr + .columns() + .iter() + .map(|arr| lift_property_col(arr.as_ref())) + .collect::>(); + + let mut props = Vec::with_capacity(arr.len()); + for i in 0..arr.len() { + let fields = cols + .iter() + .zip(arr.fields()) + .filter_map(|(col, field)| { + col.get(i) + .map(|prop| (ArcStr::from(field.name().as_str()), prop)) + }) + .collect::>(); + props.push(Prop::Map(fields.into())); + } + + props.into_prop_list() + } + DataType::Decimal128(precision, scale) if *precision <= 38 => { + let arr = arr.as_primitive::(); + arr.iter() + .flatten() + .map(|elem| Prop::Decimal(BigDecimal::new(elem.into(), *scale as i64))) + .into_prop_list() + } + DataType::Null => Prop::List(PropArray::default()), + dt => panic!("Data type not recognized {dt:?}"), + } +} diff --git a/raphtory-api/src/core/entities/properties/prop/prop_enum.rs b/raphtory-api/src/core/entities/properties/prop/prop_enum.rs index 9fcae4b9a5..05ac89825d 100644 --- a/raphtory-api/src/core/entities/properties/prop/prop_enum.rs +++ b/raphtory-api/src/core/entities/properties/prop/prop_enum.rs @@ -1,24 +1,40 @@ -use crate::core::{entities::properties::prop::PropType, storage::arc_str::ArcStr}; +use crate::core::{ + entities::{ + properties::prop::{prop_array::*, prop_ref_enum::PropRef, ArrowRow, PropNum, PropType}, + GidRef, + }, + storage::arc_str::ArcStr, +}; +use arrow_array::{ + cast::AsArray, + types::{ + Date32Type, Date64Type, Decimal128Type, DecimalType, Float32Type, Float64Type, Int32Type, + Int64Type, TimestampMicrosecondType, TimestampMillisecondType, TimestampNanosecondType, + TimestampSecondType, UInt16Type, UInt32Type, UInt64Type, UInt8Type, + }, + Array, ArrayRef, LargeListArray, StructArray, +}; +use arrow_schema::{DataType, Field, FieldRef, TimeUnit}; use bigdecimal::{num_bigint::BigInt, BigDecimal}; use chrono::{DateTime, NaiveDateTime, Utc}; use itertools::Itertools; use num_traits::{Bounded, FromPrimitive, ToPrimitive}; -use rustc_hash::FxHashMap; -use serde::{Deserialize, Serialize}; +use rustc_hash::{FxBuildHasher, FxHashMap}; +use serde::{ + ser::{Error, SerializeMap, SerializeSeq}, + Deserialize, Serialize, Serializer, +}; use std::{ cmp::Ordering, collections::HashMap, fmt, - fmt::{Display, Error, Formatter}, - hash::{Hash, Hasher}, + fmt::{Display, Formatter}, + hash::{DefaultHasher, Hash, Hasher}, + num::Wrapping, sync::Arc, }; use thiserror::Error; -#[cfg(feature = "arrow")] -use crate::core::entities::properties::prop::prop_array::*; -use crate::core::entities::properties::prop::unify_types; - pub const DECIMAL_MAX: i128 = 99999999999999999999999999999999999999i128; // equivalent to parquet decimal(38, 0) #[derive(Error, Debug)] @@ -43,9 +59,7 @@ enum PropUntaggedDef { F64(f64), F32(f32), Bool(bool), - #[cfg(feature = "arrow")] - Array(PropArray), - List(Arc>), + List(PropArray), Map(Arc>), NDTime(NaiveDateTime), DTime(DateTime), @@ -101,9 +115,7 @@ impl<'de> Deserialize<'de> for PropUntagged { PropUntaggedHelper::F64(v) => Prop::F64(v), PropUntaggedHelper::F32(v) => Prop::F32(v), PropUntaggedHelper::Str(v) => Prop::Str(v), - PropUntaggedHelper::List(v) => { - Prop::List(Arc::new(v.into_iter().map(|p| p.0).collect())) - } + PropUntaggedHelper::List(v) => Prop::list(v), PropUntaggedHelper::Map(v) => { Prop::Map(Arc::new(v.into_iter().map(|(k, p)| (k, p.0)).collect())) } @@ -125,7 +137,7 @@ impl PartialEq for PropUntagged { } /// Denotes the types of properties allowed to be stored in the graph. -#[derive(Debug, Serialize, Deserialize, PartialEq, Clone)] +#[derive(Debug, Serialize, Deserialize, PartialEq, Clone, derive_more::From)] pub enum Prop { Str(ArcStr), U8(u8), @@ -137,15 +149,50 @@ pub enum Prop { F64(f64), F32(f32), Bool(bool), - #[cfg(feature = "arrow")] - Array(PropArray), - List(Arc>), + List(PropArray), Map(Arc>), NDTime(NaiveDateTime), DTime(DateTime), Decimal(BigDecimal), } +impl From> for Prop { + fn from(value: GidRef<'_>) -> Self { + match value { + GidRef::U64(n) => Prop::U64(n), + GidRef::Str(s) => Prop::str(s), + } + } +} + +impl<'a> From> for Prop { + fn from(value: PropRef<'a>) -> Self { + match value { + PropRef::Str(s) => Prop::Str(s.into()), + PropRef::Num(n) => match n { + PropNum::U8(u) => Prop::U8(u), + PropNum::U16(u) => Prop::U16(u), + PropNum::I32(i) => Prop::I32(i), + PropNum::I64(i) => Prop::I64(i), + PropNum::U32(u) => Prop::U32(u), + PropNum::U64(u) => Prop::U64(u), + PropNum::F32(f) => Prop::F32(f), + PropNum::F64(f) => Prop::F64(f), + }, + PropRef::Bool(b) => Prop::Bool(b), + PropRef::List(v) => Prop::List(v.as_ref().clone()), + PropRef::Map(m) => m + .into_prop() + .unwrap_or_else(|| Prop::Map(Arc::new(Default::default()))), + PropRef::NDTime(dt) => Prop::NDTime(dt), + PropRef::DTime(dt) => Prop::DTime(dt), + PropRef::Decimal { num, scale } => { + Prop::Decimal(BigDecimal::from_bigint(num.into(), scale as i64)) + } + } + } +} + impl Hash for Prop { fn hash(&self, state: &mut H) { match self { @@ -166,8 +213,6 @@ impl Hash for Prop { } Prop::Bool(b) => b.hash(state), Prop::NDTime(dt) => dt.hash(state), - #[cfg(feature = "arrow")] - Prop::Array(b) => b.hash(state), Prop::DTime(dt) => dt.hash(state), Prop::List(v) => { for prop in v.iter() { @@ -175,10 +220,20 @@ impl Hash for Prop { } } Prop::Map(m) => { - for (key, prop) in m.iter() { - key.hash(state); - prop.hash(state); + // Based on python set hash + let mut hash = Wrapping(1927868237u64); + hash *= (m.len() as u64).wrapping_add(1); + for v in m.iter() { + let mut inner_hasher = DefaultHasher::new(); + v.hash(&mut inner_hasher); + let inner_hash = Wrapping(inner_hasher.finish()); + hash ^= (inner_hash ^ (inner_hash << 16) ^ Wrapping(89869747u64)) + * Wrapping(3644798167u64); } + hash ^= (hash >> 11) ^ (hash >> 25); + hash *= 69069; + hash += 907133923; + state.write_u64(hash.0); } Prop::Decimal(d) => d.hash(state), } @@ -209,150 +264,289 @@ impl PartialOrd for Prop { } } -pub fn validate_prop(prop: Prop) -> Result { - match prop { - Prop::Decimal(ref bd) => { - let (bint, scale) = bd.as_bigint_and_exponent(); - if bint <= BigInt::from(DECIMAL_MAX) && scale <= 38 { - Ok(prop) - } else { - Err(InvalidBigDecimal(bd.clone())) +pub struct SerdeArrowProp<'a>(pub &'a Prop); +#[derive(Clone, Copy, Debug)] +pub struct SerdeArrowList<'a>(pub &'a PropArray); + +#[derive(Clone, Copy, Debug)] +pub struct SerdeArrowArray<'a>(pub &'a ArrayRef); +#[derive(Clone, Copy)] +pub struct SerdeArrowMap<'a>(pub &'a HashMap); + +#[derive(Clone, Copy, Serialize)] +pub struct SerdeRow { + value: Option

, +} + +impl<'a> Serialize for SerdeArrowList<'a> { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + match &self.0 { + PropArray::Vec(list) => { + let mut state = serializer.serialize_seq(Some(self.0.len()))?; + for prop in list.iter() { + state.serialize_element(&SerdeArrowProp(prop))?; + } + state.end() } + PropArray::Array(array) => SerdeArrowArray(array).serialize(serializer), } - _ => Ok(prop), } } -// auxiliary function to help with numerical conversion -fn float_to_int(val: f64) -> Result -where - T: FromPrimitive + Bounded + ToPrimitive, -{ - if val.is_nan() { - return Err("Cannot convert NaN to integer".into()); +impl<'a> Serialize for SerdeArrowMap<'a> { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + let mut state = serializer.serialize_map(Some(self.0.len()))?; + for (k, v) in self.0.iter() { + state.serialize_entry(k, &SerdeArrowProp(v))?; + } + state.end() } +} - if val.is_infinite() { - return Err("Cannot convert infinite value to integer".into()); +impl<'a> Serialize for SerdeArrowProp<'a> { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + match self.0 { + Prop::I32(i) => serializer.serialize_i32(*i), + Prop::I64(i) => serializer.serialize_i64(*i), + Prop::F32(f) => serializer.serialize_f32(*f), + Prop::F64(f) => serializer.serialize_f64(*f), + Prop::U8(u) => serializer.serialize_u8(*u), + Prop::U16(u) => serializer.serialize_u16(*u), + Prop::U32(u) => serializer.serialize_u32(*u), + Prop::U64(u) => serializer.serialize_u64(*u), + Prop::Str(s) => serializer.serialize_str(s), + Prop::Bool(b) => serializer.serialize_bool(*b), + Prop::DTime(dt) => serializer.serialize_i64(dt.timestamp_millis()), + Prop::NDTime(dt) => serializer.serialize_i64(dt.and_utc().timestamp_millis()), + Prop::List(l) => SerdeArrowList(l).serialize(serializer), + Prop::Map(m) => SerdeArrowMap(m).serialize(serializer), + Prop::Decimal(dec) => serializer.serialize_str(&dec.to_string()), + } } - - // Try to convert using num_traits - T::from_f64(val).ok_or_else(|| format!("Value is out of bounds for target type: {}", val)) } -impl Prop { - // auxiliary function to help with numerical conversion - fn try_into_int(self) -> Result +impl<'a> Serialize for SerdeArrowArray<'a> { + fn serialize(&self, serializer: S) -> Result where - T: FromPrimitive + Bounded, + S: Serializer, { - match self { - Prop::U8(v) => T::from_u8(v).ok_or(Error), - Prop::U16(v) => T::from_u16(v).ok_or(Error), - Prop::I32(v) => T::from_i32(v).ok_or(Error), - Prop::I64(v) => T::from_i64(v).ok_or(Error), - Prop::U32(v) => T::from_u32(v).ok_or(Error), - Prop::U64(v) => T::from_u64(v).ok_or(Error), - Prop::F32(v) => { - let as_f64 = v as f64; - float_to_int::(as_f64) - .map_err(|_| Error) - .and_then(|i| T::from_i64(i).ok_or(Error)) + let dtype = self.0.data_type(); + let len = self.0.len(); + let mut state = serializer.serialize_seq(Some(len))?; + match dtype { + DataType::Boolean => { + for v in self.0.as_boolean().iter() { + state.serialize_element(&v)?; + } + } + DataType::Int32 => { + for v in self.0.as_primitive::().iter() { + state.serialize_element(&v)?; + } + } + DataType::Int64 => { + for v in self.0.as_primitive::().iter() { + state.serialize_element(&v)?; + } } - Prop::F64(v) => float_to_int::(v) - .map_err(|_| Error) - .and_then(|i| T::from_i64(i).ok_or(Error)), - _ => Err(Error), + DataType::UInt8 => { + for v in self.0.as_primitive::().iter() { + state.serialize_element(&v)?; + } + } + DataType::UInt16 => { + for v in self.0.as_primitive::().iter() { + state.serialize_element(&v)?; + } + } + DataType::UInt32 => { + for v in self.0.as_primitive::().iter() { + state.serialize_element(&v)?; + } + } + DataType::UInt64 => { + for v in self.0.as_primitive::().iter() { + state.serialize_element(&v)?; + } + } + DataType::Float32 => { + for v in self.0.as_primitive::().iter() { + state.serialize_element(&v)?; + } + } + DataType::Float64 => { + for v in self.0.as_primitive::().iter() { + state.serialize_element(&v)?; + } + } + DataType::Timestamp(unit, _) => match unit { + TimeUnit::Second => { + for v in self.0.as_primitive::().iter() { + state.serialize_element(&v)?; + } + } + TimeUnit::Millisecond => { + for v in self.0.as_primitive::().iter() { + state.serialize_element(&v)?; + } + } + TimeUnit::Microsecond => { + for v in self.0.as_primitive::().iter() { + state.serialize_element(&v)?; + } + } + TimeUnit::Nanosecond => { + for v in self.0.as_primitive::().iter() { + state.serialize_element(&v)?; + } + } + }, + DataType::Date32 => { + for v in self.0.as_primitive::().iter() { + state.serialize_element(&v)?; + } + } + DataType::Date64 => { + for v in self.0.as_primitive::().iter() { + state.serialize_element(&v)?; + } + } + DataType::Utf8 => { + for v in self.0.as_string::().iter() { + state.serialize_element(&v)?; + } + } + DataType::LargeUtf8 => { + for v in self.0.as_string::().iter() { + state.serialize_element(&v)?; + } + } + DataType::Utf8View => { + for v in self.0.as_string_view().iter() { + state.serialize_element(&v)?; + } + } + DataType::Decimal128(precision, scale) => { + for v in self.0.as_primitive::().iter() { + let element = v.map(|v| Decimal128Type::format_decimal(v, *precision, *scale)); + state.serialize_element(&element)? + // i128 not supported by serde_arrow! + } + } + DataType::Struct(_) => { + let struct_array = self.0.as_struct(); + match struct_array.nulls() { + None => { + for i in 0..struct_array.len() { + state.serialize_element(&ArrowRow::new(struct_array, i))?; + } + } + Some(nulls) => { + for (i, is_valid) in nulls.iter().enumerate() { + state.serialize_element( + &is_valid.then_some(ArrowRow::new(struct_array, i)), + )?; + } + } + } + } + DataType::List(_) => { + let list = self.0.as_list::(); + for array in list.iter() { + state.serialize_element(&array.as_ref().map(SerdeArrowArray))?; + } + } + DataType::LargeList(_) => { + let list = self.0.as_list::(); + for array in list.iter() { + state.serialize_element(&array.as_ref().map(SerdeArrowArray))?; + } + } + DataType::Null => { + for _ in 0..self.0.len() { + state.serialize_element(&None::<()>)?; + } + } + dtype => Err(Error::custom(format!("unsuported data type {dtype:?}")))?, } + state.end() } +} - // auxiliary function to help with numerical conversion - fn into_f64(self) -> Result { - let result = match self { - Prop::U8(v) => v.to_f64(), - Prop::U16(v) => v.to_f64(), - Prop::I32(v) => v.to_f64(), - Prop::I64(v) => v.to_f64(), - Prop::U32(v) => v.to_f64(), - Prop::U64(v) => v.to_f64(), - Prop::F32(v) => v.to_f64(), - Prop::F64(v) => Some(v), - _ => None, - }; - result.ok_or(Error) +pub fn validate_bd(bd: &BigDecimal) -> Result<(), InvalidBigDecimal> { + let (bint, scale) = bd.as_bigint_and_exponent(); + if bint <= BigInt::from(DECIMAL_MAX) && scale <= 38 { + Ok(()) + } else { + Err(InvalidBigDecimal(bd.clone())) } +} +impl Prop { // auxiliary function to help with numerical conversion - fn try_into_f32(self) -> Result { - let as_f32 = match self { - Prop::U8(v) => v.to_f32(), - Prop::U16(v) => v.to_f32(), - Prop::I32(v) => v.to_f32(), - Prop::I64(v) => v.to_f32(), - Prop::U32(v) => v.to_f32(), - Prop::U64(v) => v.to_f32(), - Prop::F32(v) => Some(v), - Prop::F64(v) => { - // Check if f64 value fits in f32 range - if v.is_finite() - && v.abs() <= f32::MAX as f64 - && (v == 0.0 || v.abs() >= f32::MIN_POSITIVE as f64) - { - Some(v as f32) - } else if v.is_nan() || v.is_infinite() { - Some(v as f32) // Preserve NaN and infinity - } else { - None - } - } + pub fn cast_num(self) -> Option + where + T: FromPrimitive + Bounded, + { + match self { + Prop::U8(v) => T::from_u8(v), + Prop::U16(v) => T::from_u16(v), + Prop::I32(v) => T::from_i32(v), + Prop::I64(v) => T::from_i64(v), + Prop::U32(v) => T::from_u32(v), + Prop::U64(v) => T::from_u64(v), + Prop::F32(v) => T::from_f32(v), + Prop::F64(v) => T::from_f64(v), _ => None, - }; - as_f32.map(Prop::F32).ok_or(Error) + } } - // convert prop into another prop type (primarily for numerical conversions) - pub fn try_cast(self, prop_type: PropType) -> Result { + /// convert prop into another prop type (primarily for numerical conversions) + pub fn try_cast(self, prop_type: PropType) -> Option { // Early return if casting to the same type if self.dtype() == prop_type { - return Ok(self); + return Some(self); } match self { Prop::Str(v) => match prop_type { - PropType::Str => Ok(Prop::Str(v)), - PropType::U8 => v.parse::().map(Prop::U8).map_err(|_| Error), - PropType::U16 => v.parse::().map(Prop::U16).map_err(|_| Error), - PropType::I32 => v.parse::().map(Prop::I32).map_err(|_| Error), - PropType::I64 => v.parse::().map(Prop::I64).map_err(|_| Error), - PropType::U32 => v.parse::().map(Prop::U32).map_err(|_| Error), - PropType::U64 => v.parse::().map(Prop::U64).map_err(|_| Error), - PropType::F32 => v.parse::().map(Prop::F32).map_err(|_| Error), - PropType::F64 => v.parse::().map(Prop::F64).map_err(|_| Error), - PropType::Bool => v.parse::().map(Prop::Bool).map_err(|_| Error), - PropType::NDTime => v - .parse::() - .map(Prop::NDTime) - .map_err(|_| Error), - PropType::DTime => v - .parse::>() - .map(Prop::DTime) - .map_err(|_| Error), + PropType::Str => Some(Prop::Str(v)), + PropType::U8 => v.parse::().map(Prop::U8).ok(), + PropType::U16 => v.parse::().map(Prop::U16).ok(), + PropType::I32 => v.parse::().map(Prop::I32).ok(), + PropType::I64 => v.parse::().map(Prop::I64).ok(), + PropType::U32 => v.parse::().map(Prop::U32).ok(), + PropType::U64 => v.parse::().map(Prop::U64).ok(), + PropType::F32 => v.parse::().map(Prop::F32).ok(), + PropType::F64 => v.parse::().map(Prop::F64).ok(), + PropType::Bool => v.parse::().map(Prop::Bool).ok(), + PropType::NDTime => v.parse::().map(Prop::NDTime).ok(), + PropType::DTime => v.parse::>().map(Prop::DTime).ok(), PropType::Decimal { scale } => v .parse::() .map(|v| Prop::Decimal(v.with_scale(scale))) - .map_err(|_| Error), - _ => Err(Error), + .ok(), + _ => None, }, Prop::Bool(v) => match prop_type { - PropType::Str => Ok(Prop::Str(v.to_string().into())), - PropType::U8 => Ok(Prop::U8(if v { 1 } else { 0 })), - PropType::U16 => Ok(Prop::U16(if v { 1 } else { 0 })), - PropType::I32 => Ok(Prop::I32(if v { 1 } else { 0 })), - PropType::I64 => Ok(Prop::I64(if v { 1 } else { 0 })), - PropType::U32 => Ok(Prop::U32(if v { 1 } else { 0 })), - PropType::U64 => Ok(Prop::U64(if v { 1 } else { 0 })), - PropType::F32 => Ok(Prop::F32(if v { 1.0 } else { 0.0 })), - PropType::F64 => Ok(Prop::F64(if v { 1.0 } else { 0.0 })), + PropType::Str => Some(Prop::Str(v.to_string().into())), + PropType::U8 => Some(Prop::U8(v as _)), + PropType::U16 => Some(Prop::U16(v as _)), + PropType::I32 => Some(Prop::I32(v as _)), + PropType::I64 => Some(Prop::I64(v as _)), + PropType::U32 => Some(Prop::U32(v as _)), + PropType::U64 => Some(Prop::U64(v as _)), + PropType::F32 => Some(Prop::F32(if v { 1.0 } else { 0.0 })), + PropType::F64 => Some(Prop::F64(if v { 1.0 } else { 0.0 })), PropType::Bool => unreachable!("Same type case handled above"), PropType::Decimal { scale } => { let val = if v { @@ -360,87 +554,85 @@ impl Prop { } else { BigDecimal::from(0) }; - Ok(Prop::Decimal(val.with_scale(scale))) + Some(Prop::Decimal(val.with_scale(scale))) } - _ => Err(Error), + _ => None, }, - Prop::List(_v) => Err(Error), - Prop::Map(_v) => Err(Error), + Prop::List(_v) => None, + Prop::Map(_v) => None, Prop::NDTime(v) => match prop_type { - PropType::Str => Ok(Prop::Str(v.to_string().into())), - PropType::I64 => Ok(Prop::I64(v.and_utc().timestamp())), + PropType::Str => Some(Prop::Str(v.to_string().into())), + PropType::I64 => Some(Prop::I64(v.and_utc().timestamp())), PropType::U64 => { let ts = v.and_utc().timestamp(); if ts >= 0 { - Ok(Prop::U64(ts as u64)) + Some(Prop::U64(ts as u64)) } else { - Err(Error) + None } } - PropType::DTime => Ok(Prop::DTime(v.and_utc())), + PropType::DTime => Some(Prop::DTime(v.and_utc())), PropType::NDTime => unreachable!("Same type case handled above"), - _ => Err(Error), + _ => None, }, Prop::DTime(v) => match prop_type { - PropType::Str => Ok(Prop::Str(v.to_rfc3339().into())), - PropType::I64 => Ok(Prop::I64(v.timestamp())), + PropType::Str => Some(Prop::Str(v.to_rfc3339().into())), + PropType::I64 => Some(Prop::I64(v.timestamp())), PropType::U64 => { let ts = v.timestamp(); if ts >= 0 { - Ok(Prop::U64(ts as u64)) + Some(Prop::U64(ts as u64)) } else { - Err(Error) + None } } - PropType::NDTime => Ok(Prop::NDTime(v.naive_utc())), + PropType::NDTime => Some(Prop::NDTime(v.naive_utc())), PropType::DTime => unreachable!("Same type case handled above"), - _ => Err(Error), + _ => None, }, - #[cfg(feature = "arrow")] - Prop::Array(_v) => Err(Error), Prop::Decimal(v) => match prop_type { - PropType::Str => Ok(Prop::Str(v.to_string().into())), + PropType::Str => Some(Prop::Str(v.to_string().into())), PropType::U8 => { - let as_i64 = v.to_i64().ok_or(Error)?; - u8::from_i64(as_i64).ok_or(Error).map(Prop::U8) + let as_i64 = v.to_i64()?; + u8::from_i64(as_i64).map(Prop::U8) } PropType::U16 => { - let as_i64 = v.to_i64().ok_or(Error)?; - u16::from_i64(as_i64).ok_or(Error).map(Prop::U16) + let as_i64 = v.to_i64()?; + u16::from_i64(as_i64).map(Prop::U16) } PropType::I32 => { - let as_i64 = v.to_i64().ok_or(Error)?; - i32::from_i64(as_i64).ok_or(Error).map(Prop::I32) + let as_i64 = v.to_i64()?; + i32::from_i64(as_i64).map(Prop::I32) } - PropType::I64 => v.to_i64().ok_or(Error).map(Prop::I64), + PropType::I64 => v.to_i64().map(Prop::I64), PropType::U32 => { - let as_i64 = v.to_i64().ok_or(Error)?; - u32::from_i64(as_i64).ok_or(Error).map(Prop::U32) + let as_i64 = v.to_i64()?; + u32::from_i64(as_i64).map(Prop::U32) } PropType::U64 => { - let as_i64 = v.to_i64().ok_or(Error)?; - u64::from_i64(as_i64).ok_or(Error).map(Prop::U64) + let as_i64 = v.to_i64()?; + u64::from_i64(as_i64).map(Prop::U64) } - PropType::F32 => v.to_f32().ok_or(Error).map(Prop::F32), - PropType::F64 => v.to_f64().ok_or(Error).map(Prop::F64), + PropType::F32 => v.to_f32().map(Prop::F32), + PropType::F64 => v.to_f64().map(Prop::F64), PropType::Bool => { - let as_i64 = v.to_i64().ok_or(Error)?; - Ok(Prop::Bool(as_i64 != 0)) + let as_i64 = v.to_i64()?; + Some(Prop::Bool(as_i64 != 0)) } - PropType::Decimal { scale } => Ok(Prop::Decimal(v.with_scale(scale))), - _ => Err(Error), + PropType::Decimal { scale } => Some(Prop::Decimal(v.with_scale(scale))), + _ => None, }, _ => match prop_type { // Numeric conversions using num_traits - PropType::U8 => self.try_into_int::().map(Prop::U8), - PropType::U16 => self.try_into_int::().map(Prop::U16), - PropType::I32 => self.try_into_int::().map(Prop::I32), - PropType::I64 => self.try_into_int::().map(Prop::I64), - PropType::U32 => self.try_into_int::().map(Prop::U32), - PropType::U64 => self.try_into_int::().map(Prop::U64), - PropType::F32 => self.try_into_f32(), - PropType::F64 => self.into_f64().map(Prop::F64), - _ => Err(Error), + PropType::U8 => self.cast_num::().map(Prop::U8), + PropType::U16 => self.cast_num::().map(Prop::U16), + PropType::I32 => self.cast_num::().map(Prop::I32), + PropType::I64 => self.cast_num::().map(Prop::I64), + PropType::U32 => self.cast_num::().map(Prop::U32), + PropType::U64 => self.cast_num::().map(Prop::U64), + PropType::F32 => self.cast_num::().map(Prop::F32), + PropType::F64 => self.cast_num::().map(Prop::F64), + _ => None, }, } } @@ -478,8 +670,8 @@ impl Prop { } pub fn try_from_bd(bd: BigDecimal) -> Result { - let prop = Prop::Decimal(bd); - validate_prop(prop) + validate_bd(&bd)?; + Ok(Prop::Decimal(bd)) } pub fn map(vals: impl IntoIterator, impl Into)>) -> Self { @@ -490,6 +682,13 @@ impl Prop { Prop::Map(h_map.into()) } + pub fn as_map(&self) -> Option> { + match self { + Prop::Map(map) => Some(SerdeArrowMap(map)), + _ => None, + } + } + pub fn dtype(&self) -> PropType { match self { Prop::Str(_) => PropType::Str, @@ -502,26 +701,9 @@ impl Prop { Prop::F32(_) => PropType::F32, Prop::F64(_) => PropType::F64, Prop::Bool(_) => PropType::Bool, - Prop::List(list) => { - let list_type = list - .iter() - .map(|p| Ok(p.dtype())) - .reduce(|a, b| unify_types(&a?, &b?, &mut false)) - .transpose() - .map(|e| e.unwrap_or(PropType::Empty)) - .unwrap_or_else(|e| panic!("Cannot unify types for list {:?}: {e:?}", list)); - PropType::List(Box::new(list_type)) - } + Prop::List(list) => PropType::List(Box::new(list.dtype())), Prop::Map(map) => PropType::map(map.iter().map(|(k, v)| (k, v.dtype()))), Prop::NDTime(_) => PropType::NDTime, - #[cfg(feature = "arrow")] - Prop::Array(arr) => { - let arrow_dtype = arr - .as_array_ref() - .expect("Should not call dtype on empty PropArray") - .data_type(); - PropType::Array(Box::new(prop_type_from_arrow_dtype(arrow_dtype))) - } Prop::DTime(_) => PropType::DTime, Prop::Decimal(d) => PropType::Decimal { scale: d.as_bigint_and_scale().1, @@ -533,6 +715,12 @@ impl Prop { Prop::Str(s.into()) } + pub fn list, I: IntoIterator>(vals: I) -> Prop { + Prop::List(PropArray::Vec( + vals.into_iter().map_into().collect::>().into(), + )) + } + pub fn add(self, other: Prop) -> Option { match (self, other) { (Prop::U8(a), Prop::U8(b)) => Some(Prop::U8(a + b)), @@ -583,6 +771,44 @@ impl Prop { } } +pub fn list_array_from_props( + dt: &DataType, + props: impl IntoIterator>, +) -> Result { + use arrow_schema::{Field, Fields}; + use serde_arrow::ArrayBuilder; + + let fields: Fields = vec![Field::new("value", dt.clone(), true)].into(); + + let mut builder = ArrayBuilder::from_arrow(&fields)?; + + for value in props { + builder.push(SerdeRow { value })?; + } + + let arrays = builder.to_arrow()?; + + Ok(arrays.first().unwrap().as_list::().clone()) +} + +pub fn struct_array_from_props( + dt: &DataType, + props: impl IntoIterator>, +) -> Result { + use serde_arrow::ArrayBuilder; + + let fields = [FieldRef::new(Field::new("value", dt.clone(), true))]; + + let mut builder = ArrayBuilder::from_arrow(&fields)?; + + for p in props { + builder.push(SerdeRow { value: p })? + } + + let arrays = builder.to_arrow()?; + Ok(arrays.first().unwrap().as_struct().clone()) +} + impl Display for Prop { fn fmt(&self, f: &mut Formatter) -> fmt::Result { match self { @@ -598,8 +824,6 @@ impl Display for Prop { Prop::Bool(value) => write!(f, "{}", value), Prop::DTime(value) => write!(f, "{}", value), Prop::NDTime(value) => write!(f, "{}", value), - #[cfg(feature = "arrow")] - Prop::Array(value) => write!(f, "{:?}", value), Prop::List(value) => { write!( f, @@ -643,111 +867,15 @@ impl Display for Prop { } } -impl From for Prop { - fn from(value: ArcStr) -> Self { - Prop::Str(value) - } -} - -impl From<&ArcStr> for Prop { - fn from(value: &ArcStr) -> Self { - Prop::Str(value.clone()) - } -} - -impl From for Prop { - fn from(value: String) -> Self { - Prop::Str(value.into()) - } -} - -impl From<&String> for Prop { - fn from(s: &String) -> Self { - Prop::Str(s.as_str().into()) - } -} - -impl From> for Prop { - fn from(s: Arc) -> Self { - Prop::Str(s.into()) - } -} - -impl From<&Arc> for Prop { - fn from(value: &Arc) -> Self { - Prop::Str(value.clone().into()) - } -} - impl From<&str> for Prop { fn from(s: &str) -> Self { - Prop::Str(s.to_owned().into()) - } -} - -impl From for Prop { - fn from(i: i32) -> Self { - Prop::I32(i) - } -} - -impl From for Prop { - fn from(i: u8) -> Self { - Prop::U8(i) - } -} - -impl From for Prop { - fn from(i: u16) -> Self { - Prop::U16(i) - } -} - -impl From for Prop { - fn from(i: i64) -> Self { - Prop::I64(i) - } -} - -impl From for Prop { - fn from(d: BigDecimal) -> Self { - Prop::Decimal(d) - } -} - -impl From for Prop { - fn from(u: u32) -> Self { - Prop::U32(u) - } -} - -impl From for Prop { - fn from(u: u64) -> Self { - Prop::U64(u) - } -} - -impl From for Prop { - fn from(f: f32) -> Self { - Prop::F32(f) - } -} - -impl From for Prop { - fn from(f: f64) -> Self { - Prop::F64(f) - } -} - -impl From> for Prop { - fn from(f: DateTime) -> Self { - Prop::DTime(f) + Prop::Str(s.into()) } } -impl From for Prop { - fn from(b: bool) -> Self { - Prop::Bool(b) +impl From for Prop { + fn from(s: String) -> Self { + Prop::Str(s.into()) } } @@ -765,7 +893,7 @@ impl From> for Prop { impl From> for Prop { fn from(value: Vec) -> Self { - Prop::List(Arc::new(value)) + Prop::List(value.into()) } } @@ -775,6 +903,12 @@ impl From<&Prop> for Prop { } } +impl From for Prop { + fn from(value: ArrayRef) -> Self { + Prop::List(PropArray::from(value)) + } +} + pub trait IntoPropMap { fn into_prop_map(self) -> Prop; } @@ -795,7 +929,8 @@ pub trait IntoPropList { impl, K: Into> IntoPropList for I { fn into_prop_list(self) -> Prop { - Prop::List(Arc::new(self.into_iter().map(|v| v.into()).collect())) + let vec = self.into_iter().map(|v| v.into()).collect::>(); + Prop::List(vec.into()) } } diff --git a/raphtory-api/src/core/entities/properties/prop/prop_ref_enum.rs b/raphtory-api/src/core/entities/properties/prop/prop_ref_enum.rs new file mode 100644 index 0000000000..549f81b471 --- /dev/null +++ b/raphtory-api/src/core/entities/properties/prop/prop_ref_enum.rs @@ -0,0 +1,239 @@ +use crate::core::{ + entities::properties::prop::{ + prop_col::{MapCol, PropCol}, + validate_bd, ArrowRow, InvalidBigDecimal, Prop, PropArray, PropUnwrap, SerdeArrowList, + SerdeArrowMap, + }, + storage::arc_str::ArcStr, +}; +use bigdecimal::BigDecimal; +use chrono::{DateTime, NaiveDateTime, Utc}; +use num_traits::ToPrimitive; +use rustc_hash::FxHashMap; +use serde::Serialize; +use std::{borrow::Cow, sync::Arc}; + +#[derive(Debug, Clone)] +pub enum PropRef<'a> { + Str(&'a str), + Num(PropNum), + Bool(bool), + List(Cow<'a, PropArray>), + Map(PropMapRef<'a>), + NDTime(NaiveDateTime), + DTime(DateTime), + Decimal { num: i128, scale: i8 }, +} + +impl PropRef<'_> { + pub fn as_map_ref(&self) -> Option> { + if let PropRef::Map(m) = self { + Some(*m) + } else { + None + } + } +} + +#[derive(Debug, Clone, Copy)] +pub enum PropMapRef<'a> { + Mem(&'a Arc>), + PropCol { map: &'a MapCol, i: usize }, + Arrow(ArrowRow<'a>), +} + +impl<'a> PropMapRef<'a> { + pub fn into_prop(self) -> Option { + match self { + PropMapRef::Mem(map) => Some(Prop::Map(map.clone())), + PropMapRef::PropCol { map, i } => map.get(i), + PropMapRef::Arrow(row) => row.into_prop(), + } + } + + pub fn as_map(&self) -> Option<&'a Arc>> { + if let PropMapRef::Mem(m) = self { + Some(*m) + } else { + None + } + } + + pub fn as_mem(&self) -> Arc> { + match self { + PropMapRef::Mem(m) => (*m).clone(), + PropMapRef::PropCol { map, i } => map.get(*i).unwrap_map(), + PropMapRef::Arrow(row) => row.into_prop().unwrap_map(), + } + } +} + +impl> From for PropRef<'static> { + fn from(n: T) -> Self { + PropRef::Num(n.into()) + } +} + +impl<'a> From for PropRef<'a> { + fn from(b: bool) -> Self { + PropRef::Bool(b) + } +} + +impl<'a> From<&'a str> for PropRef<'a> { + fn from(s: &'a str) -> Self { + PropRef::Str(s) + } +} + +impl From for PropRef<'_> { + fn from(dt: NaiveDateTime) -> Self { + PropRef::NDTime(dt) + } +} + +impl From> for PropRef<'_> { + fn from(dt: DateTime) -> Self { + PropRef::DTime(dt) + } +} + +impl<'a> From<&'a BigDecimal> for PropRef<'a> { + fn from(decimal: &'a BigDecimal) -> Self { + let (num, scale) = decimal.as_bigint_and_exponent(); + let num = num.to_i128().unwrap_or_else(|| { + panic!( + "BigDecimal value {} is out of range for i128 representation", + decimal + ) + }); + PropRef::Decimal { + num, + scale: scale as i8, + } + } +} + +impl<'a> From> for PropRef<'a> { + fn from(row: ArrowRow<'a>) -> Self { + PropRef::Map(PropMapRef::Arrow(row)) + } +} + +impl<'a> From<&'a Arc>> for PropRef<'a> { + fn from(map: &'a Arc>) -> Self { + PropRef::Map(PropMapRef::Mem(map)) + } +} + +#[derive(Debug, PartialEq, Clone, Copy, derive_more::From)] +pub enum PropNum { + U8(u8), + U16(u16), + I32(i32), + I64(i64), + U32(u32), + U64(u64), + F32(f32), + F64(f64), +} + +/// A trait for types that can be cheaply viewed as a [`PropRef`]. +pub trait AsPropRef { + fn as_prop_ref(&self) -> PropRef<'_>; +} + +impl<'a> AsPropRef for PropRef<'a> { + #[inline] + fn as_prop_ref(&self) -> PropRef<'_> { + self.clone() + } +} + +impl AsPropRef for Prop { + fn as_prop_ref(&self) -> PropRef<'_> { + match self { + Prop::Str(s) => PropRef::Str(s), + Prop::U8(v) => PropRef::Num(PropNum::U8(*v)), + Prop::U16(v) => PropRef::Num(PropNum::U16(*v)), + Prop::I32(v) => PropRef::Num(PropNum::I32(*v)), + Prop::I64(v) => PropRef::Num(PropNum::I64(*v)), + Prop::U32(v) => PropRef::Num(PropNum::U32(*v)), + Prop::U64(v) => PropRef::Num(PropNum::U64(*v)), + Prop::F32(v) => PropRef::Num(PropNum::F32(*v)), + Prop::F64(v) => PropRef::Num(PropNum::F64(*v)), + Prop::Bool(b) => PropRef::Bool(*b), + Prop::List(lst) => PropRef::List(std::borrow::Cow::Borrowed(lst)), + Prop::Map(map) => PropRef::Map(PropMapRef::Mem(map)), + Prop::NDTime(dt) => PropRef::NDTime(*dt), + Prop::DTime(dt) => PropRef::DTime(*dt), + Prop::Decimal(bd) => PropRef::from(bd), + } + } +} + +impl<'a> PropRef<'a> { + pub fn as_str(&self) -> Option<&'a str> { + if let PropRef::Str(s) = self { + Some(s) + } else { + None + } + } + + pub fn try_from_bd(bd: BigDecimal) -> Result { + validate_bd(&bd)?; + let (num, scale) = bd.as_bigint_and_exponent(); + let num = num.to_i128().unwrap(); + Ok(PropRef::Decimal { + num, + scale: scale as i8, + }) + } +} + +impl<'a> Serialize for PropMapRef<'a> { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + match self { + PropMapRef::Mem(map) => SerdeArrowMap(map).serialize(serializer), + PropMapRef::PropCol { map, i } => match map.get_ref(*i) { + Some(prop) => prop.serialize(serializer), + None => serializer.serialize_none(), + }, + PropMapRef::Arrow(row) => row.serialize(serializer), + } + } +} + +impl<'a> Serialize for PropRef<'a> { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + match self { + PropRef::Str(s) => serializer.serialize_str(s), + PropRef::Num(n) => match n { + PropNum::U8(v) => serializer.serialize_u8(*v), + PropNum::U16(v) => serializer.serialize_u16(*v), + PropNum::I32(v) => serializer.serialize_i32(*v), + PropNum::I64(v) => serializer.serialize_i64(*v), + PropNum::U32(v) => serializer.serialize_u32(*v), + PropNum::U64(v) => serializer.serialize_u64(*v), + PropNum::F32(v) => serializer.serialize_f32(*v), + PropNum::F64(v) => serializer.serialize_f64(*v), + }, + PropRef::Bool(b) => serializer.serialize_bool(*b), + PropRef::List(lst) => SerdeArrowList(lst).serialize(serializer), + PropRef::Map(map_ref) => map_ref.serialize(serializer), + PropRef::NDTime(dt) => serializer.serialize_i64(dt.and_utc().timestamp_millis()), + PropRef::DTime(dt) => serializer.serialize_i64(dt.timestamp_millis()), + PropRef::Decimal { num, scale } => { + let decimal = BigDecimal::new((*num).into(), (*scale).into()); + decimal.serialize(serializer) + } + } + } +} diff --git a/raphtory-api/src/core/entities/properties/prop/prop_type.rs b/raphtory-api/src/core/entities/properties/prop/prop_type.rs index 84e7299883..1807084aa5 100644 --- a/raphtory-api/src/core/entities/properties/prop/prop_type.rs +++ b/raphtory-api/src/core/entities/properties/prop/prop_type.rs @@ -1,4 +1,3 @@ -#[cfg(any(feature = "arrow", feature = "storage", feature = "python"))] use arrow_schema::DataType; use serde::{Deserialize, Serialize}; use std::{ @@ -40,7 +39,6 @@ pub enum PropType { Map(Arc>), NDTime, DTime, - Array(Box), Decimal { scale: i64, }, @@ -71,7 +69,6 @@ impl Display for PropType { } PropType::NDTime => "NDTime", PropType::DTime => "DTime", - PropType::Array(p_type) => return write!(f, "Array<{}>", p_type), PropType::Decimal { scale } => return write!(f, "Decimal({})", scale), }; @@ -142,9 +139,27 @@ impl PropType { } None } + + // This is the best guess for the size of one row of properties + pub fn est_size(&self) -> usize { + const CONTAINER_SIZE: usize = 64; + match self { + PropType::Str => CONTAINER_SIZE, + PropType::U8 | PropType::Bool => 1, + PropType::U16 => 2, + PropType::I32 | PropType::F32 | PropType::U32 => 4, + PropType::I64 | PropType::F64 | PropType::U64 => 8, + PropType::NDTime | PropType::DTime => 8, + PropType::List(p_type) => p_type.est_size() * CONTAINER_SIZE, + PropType::Map(p_map) => { + p_map.values().map(|v| v.est_size()).sum::() * CONTAINER_SIZE + } + PropType::Decimal { .. } => 16, + PropType::Empty => 0, + } + } } -#[cfg(any(feature = "arrow", feature = "storage", feature = "python"))] pub fn data_type_as_prop_type(dt: &DataType) -> Result { match dt { DataType::Boolean => Ok(PropType::Bool), @@ -187,22 +202,18 @@ pub fn data_type_as_prop_type(dt: &DataType) -> Result for PropType { fn from(value: &DataType) -> Self { match value { - DataType::Utf8 => PropType::Str, - DataType::LargeUtf8 => PropType::Str, - DataType::Utf8View => PropType::Str, + DataType::Utf8View | DataType::LargeUtf8 | DataType::Utf8 => PropType::Str, DataType::UInt8 => PropType::U8, DataType::UInt16 => PropType::U16, DataType::Int32 => PropType::I32, @@ -215,8 +226,21 @@ mod arrow { scale: *scale as i64, }, DataType::Boolean => PropType::Bool, - - _ => PropType::Empty, + DataType::Timestamp(TimeUnit::Millisecond, None) => PropType::NDTime, + DataType::Timestamp(TimeUnit::Millisecond, tz) if tz.as_deref() == Some("UTC") => { + PropType::DTime + } + DataType::Struct(fields) => PropType::map( + fields + .iter() + .filter(|field| field.name() != EMPTY_MAP_FIELD_NAME) + .map(|f| (f.name().to_string(), PropType::from(f.data_type()))), + ), + DataType::List(field) | DataType::LargeList(field) => { + PropType::List(Box::new(PropType::from(field.data_type()))) + } + DataType::Null => PropType::Empty, + dtype => panic!("unsupported type {dtype:?}"), } } } @@ -250,9 +274,6 @@ pub fn unify_types(l: &PropType, r: &PropType, unified: &mut bool) -> Result { unify_types(l_type, r_type, unified).map(|t| PropType::List(Box::new(t))) } - (PropType::Array(l_type), PropType::Array(r_type)) => { - unify_types(l_type, r_type, unified).map(|t| PropType::Array(Box::new(t))) - } (PropType::Map(l_map), PropType::Map(r_map)) => { // maps need to be merged and only overlapping keys need to be unified @@ -287,6 +308,64 @@ pub fn unify_types(l: &PropType, r: &PropType, unified: &mut bool) -> Result Option { + match (l, r) { + (PropType::Empty, _) => Some(true), + (_, PropType::Empty) => Some(true), + (PropType::Str, PropType::Str) => None, + (PropType::U8, PropType::U8) => None, + (PropType::U16, PropType::U16) => None, + (PropType::I32, PropType::I32) => None, + (PropType::I64, PropType::I64) => None, + (PropType::U32, PropType::U32) => None, + (PropType::U64, PropType::U64) => None, + (PropType::F32, PropType::F32) => None, + (PropType::F64, PropType::F64) => None, + (PropType::Bool, PropType::Bool) => None, + (PropType::NDTime, PropType::NDTime) => None, + (PropType::DTime, PropType::DTime) => None, + (PropType::List(l_type), PropType::List(r_type)) => check_for_unification(l_type, r_type), + (PropType::Map(l_map), PropType::Map(r_map)) => { + let keys_check = l_map + .keys() + .any(|k| !r_map.contains_key(k)) + .then_some(true) + .or_else(|| r_map.keys().any(|k| !l_map.contains_key(k)).then_some(true)); + + // check for unification of the values + let inner_checks = l_map + .iter() + .filter_map(|(l_key, l_d_type)| { + r_map + .get(l_key) + .and_then(|r_d_type| check_for_unification(r_d_type, l_d_type)) + }) + .chain(r_map.iter().filter_map(|(r_key, r_d_type)| { + l_map + .get(r_key) + .and_then(|l_d_type| check_for_unification(r_d_type, l_d_type)) + })); + for check in inner_checks { + if check { + return Some(true); + } + } + keys_check + } + (PropType::Decimal { scale: l_scale }, PropType::Decimal { scale: r_scale }) + if l_scale == r_scale => + { + None + } + _ => Some(false), + } +} + #[cfg(test)] mod test { use super::*; @@ -397,15 +476,15 @@ mod test { ); assert!(unify); - let l = PropType::Array(Box::new(PropType::map([("a".to_string(), PropType::U8)]))); - let r = PropType::Array(Box::new(PropType::map([ + let l = PropType::List(Box::new(PropType::map([("a".to_string(), PropType::U8)]))); + let r = PropType::List(Box::new(PropType::map([ ("a".to_string(), PropType::Empty), ("b".to_string(), PropType::Str), ]))); let mut unify = false; assert_eq!( unify_types(&l, &r, &mut unify), - Ok(PropType::Array(Box::new(PropType::map([ + Ok(PropType::List(Box::new(PropType::map([ ("a".to_string(), PropType::U8), ("b".to_string(), PropType::Str) ])))) diff --git a/raphtory-api/src/core/entities/properties/prop/prop_unwrap.rs b/raphtory-api/src/core/entities/properties/prop/prop_unwrap.rs index f9e29bc1b5..133d12b3f7 100644 --- a/raphtory-api/src/core/entities/properties/prop/prop_unwrap.rs +++ b/raphtory-api/src/core/entities/properties/prop/prop_unwrap.rs @@ -1,4 +1,7 @@ -use crate::core::{entities::properties::prop::Prop, storage::arc_str::ArcStr}; +use crate::core::{ + entities::properties::prop::{Prop, PropArray}, + storage::arc_str::ArcStr, +}; use bigdecimal::BigDecimal; use chrono::NaiveDateTime; use rustc_hash::FxHashMap; @@ -55,8 +58,8 @@ pub trait PropUnwrap: Sized { self.into_bool().unwrap() } - fn into_list(self) -> Option>>; - fn unwrap_list(self) -> Arc> { + fn into_list(self) -> Option; + fn unwrap_list(self) -> PropArray { self.into_list().unwrap() } @@ -116,7 +119,7 @@ impl PropUnwrap for Option

{ self.and_then(|p| p.into_bool()) } - fn into_list(self) -> Option>> { + fn into_list(self) -> Option { self.and_then(|p| p.into_list()) } @@ -218,7 +221,7 @@ impl PropUnwrap for Prop { } } - fn into_list(self) -> Option>> { + fn into_list(self) -> Option { if let Prop::List(v) = self { Some(v) } else { diff --git a/raphtory-api/src/core/entities/properties/prop/serde.rs b/raphtory-api/src/core/entities/properties/prop/serde.rs index 56b35b2679..fd33605a90 100644 --- a/raphtory-api/src/core/entities/properties/prop/serde.rs +++ b/raphtory-api/src/core/entities/properties/prop/serde.rs @@ -14,7 +14,7 @@ impl TryFrom for Prop { .map(|num| num.into()) .or_else(|| value.as_f64().map(|num| num.into())) .ok_or(format!("Number conversion error for: {}", value)), - Value::String(value) => Ok(value.into()), + Value::String(value) => Ok(value.as_str().into()), Value::Array(value) => value .into_iter() .map(|item| item.try_into()) @@ -49,7 +49,7 @@ impl From for Value { .map(Value::Number) .unwrap_or(Value::Null), Prop::Bool(value) => Value::Bool(value), - Prop::List(values) => Value::Array(values.iter().cloned().map(Value::from).collect()), + Prop::List(values) => Value::Array(values.iter().map(Value::from).collect()), Prop::Map(map) => { let json_map: serde_json::Map = map .iter() diff --git a/raphtory-api/src/core/entities/properties/prop/template.rs b/raphtory-api/src/core/entities/properties/prop/template.rs index 21f55ed2e5..12209991e1 100644 --- a/raphtory-api/src/core/entities/properties/prop/template.rs +++ b/raphtory-api/src/core/entities/properties/prop/template.rs @@ -17,9 +17,7 @@ impl From for Value { Prop::Str(value) => Value::from(value.0.to_owned()), Prop::DTime(value) => Value::from(value.timestamp_millis()), Prop::NDTime(value) => Value::from(value.and_utc().timestamp_millis()), - #[cfg(feature = "arrow")] - Prop::Array(value) => Value::from(value.to_vec_u8()), - Prop::List(value) => value.iter().cloned().collect(), + Prop::List(value) => value.iter().collect(), Prop::Map(value) => value .iter() .map(|(key, value)| (key.to_string(), value.clone())) diff --git a/raphtory-api/src/core/entities/properties/tprop.rs b/raphtory-api/src/core/entities/properties/tprop.rs index f5510f1885..bcf4c2e3c1 100644 --- a/raphtory-api/src/core/entities/properties/tprop.rs +++ b/raphtory-api/src/core/entities/properties/tprop.rs @@ -15,24 +15,46 @@ pub trait TPropOps<'a>: Clone + Send + Sync + Sized + 'a { } fn last_before(&self, t: EventTime) -> Option<(EventTime, Prop)> { - self.clone().iter_window(EventTime::MIN..t).next_back() + self.clone().iter_inner_rev(Some(EventTime::MIN..t)).next() } - fn iter(self) -> impl DoubleEndedIterator + Send + Sync + 'a; + fn iter_inner( + self, + range: Option>, + ) -> impl Iterator + Send + Sync + 'a; - fn iter_t(self) -> impl DoubleEndedIterator + Send + Sync + 'a { - self.iter().map(|(t, v)| (t.t(), v)) + fn iter_inner_rev( + self, + range: Option>, + ) -> impl Iterator + Send + Sync + 'a; + + fn iter(self) -> impl Iterator + Send + Sync + 'a { + self.iter_inner(None) + } + + fn iter_rev(self) -> impl Iterator + Send + Sync + 'a { + self.iter_inner_rev(None) } fn iter_window( self, r: Range, - ) -> impl DoubleEndedIterator + Send + Sync + 'a; + ) -> impl Iterator + Send + Sync + 'a { + self.iter_inner(Some(r)) + } - fn iter_window_t( + fn iter_window_rev( self, - r: Range, - ) -> impl DoubleEndedIterator + Send + Sync + 'a { + r: Range, + ) -> impl Iterator + Send + Sync + 'a { + self.iter_inner_rev(Some(r)) + } + + fn iter_t(self) -> impl Iterator + Send + Sync + 'a { + self.iter().map(|(t, v)| (t.t(), v)) + } + + fn iter_window_t(self, r: Range) -> impl Iterator + Send + Sync + 'a { self.iter_window(EventTime::range(r)) .map(|(t, v)| (t.t(), v)) } @@ -40,7 +62,7 @@ pub trait TPropOps<'a>: Clone + Send + Sync + Sized + 'a { fn iter_window_te( self, r: Range, - ) -> impl DoubleEndedIterator + Send + Sync + 'a { + ) -> impl Iterator + Send + Sync + 'a { self.iter_window(r).map(|(t, v)| (t.t(), v)) } diff --git a/raphtory-api/src/core/storage/dict_mapper.rs b/raphtory-api/src/core/storage/dict_mapper.rs index d8f37b0e5e..0cc0c6c57b 100644 --- a/raphtory-api/src/core/storage/dict_mapper.rs +++ b/raphtory-api/src/core/storage/dict_mapper.rs @@ -1,17 +1,23 @@ -use crate::core::storage::{arc_str::ArcStr, locked_vec::ArcReadLockedVec, FxDashMap}; -use dashmap::mapref::entry::Entry; -use parking_lot::RwLock; +use crate::core::{ + entities::properties::meta::STATIC_GRAPH_LAYER, + storage::{arc_str::ArcStr, ArcRwLockReadGuard}, +}; +use parking_lot::{RwLock, RwLockReadGuard, RwLockWriteGuard}; +use rustc_hash::FxHashMap; use serde::{Deserialize, Serialize}; use std::{ borrow::{Borrow, BorrowMut}, + collections::hash_map::Entry, hash::Hash, + ops::{Deref, DerefMut}, sync::Arc, }; -#[derive(Serialize, Deserialize, Default, Debug)] +#[derive(Serialize, Deserialize, Default, Debug, Clone)] pub struct DictMapper { - map: FxDashMap, - reverse_map: Arc>>, //FIXME: a boxcar vector would be a great fit if it was serializable... + map: Arc>>, + reverse_map: Arc>>, + num_private_fields: usize, } #[derive(Copy, Clone, Debug)] @@ -31,6 +37,11 @@ where } impl MaybeNew { + #[inline] + pub fn is_new(&self) -> bool { + matches!(self, MaybeNew::New(_)) + } + #[inline] pub fn inner(self) -> Index { match self { @@ -81,6 +92,13 @@ impl MaybeNew { MaybeNew::Existing(_) => None, } } + + pub fn into_inner_with_status(self) -> (Index, bool) { + match self { + MaybeNew::New(inner) => (inner, true), + MaybeNew::Existing(inner) => (inner, false), + } + } } impl Borrow for MaybeNew { @@ -97,33 +115,144 @@ impl BorrowMut for MaybeNew { } } +pub struct LockedDictMapper<'a> { + map: RwLockReadGuard<'a, FxHashMap>, + reverse_map: RwLockReadGuard<'a, Vec>, + num_private_fields: usize, +} + +pub struct WriteLockedDictMapper<'a> { + map: RwLockWriteGuard<'a, FxHashMap>, + reverse_map: RwLockWriteGuard<'a, Vec>, +} + +impl LockedDictMapper<'_> { + pub fn get_id(&self, name: &str) -> Option { + self.map.get(name).copied() + } + + pub fn map(&self) -> &FxHashMap { + &self.map + } + + pub fn iter_ids(&self) -> impl Iterator + '_ { + self.reverse_map + .iter() + .enumerate() + .skip(self.num_private_fields) + } +} + +impl WriteLockedDictMapper<'_> { + pub fn get_or_create_id(&mut self, name: &Q) -> MaybeNew + where + Q: Hash + Eq + ?Sized + ToOwned + Borrow, + T: Into, + { + let name = name.to_owned().into(); + let new_id = match self.map.entry(name.clone()) { + Entry::Occupied(entry) => MaybeNew::Existing(*entry.get()), + Entry::Vacant(entry) => { + let id = self.reverse_map.len(); + self.reverse_map.push(name); + entry.insert(id); + MaybeNew::New(id) + } + }; + new_id + } + + pub fn set_id(&mut self, name: impl Into, id: usize) { + let arc_name = name.into(); + let map_entry = self.map.entry(arc_name.clone()); + let keys = self.reverse_map.deref_mut(); + if keys.len() <= id { + keys.resize(id + 1, Default::default()) + } + keys[id] = arc_name; + map_entry.insert_entry(id); + } + + pub fn map(&self) -> &FxHashMap { + &self.map + } +} + impl DictMapper { + fn read_lock_reverse_map(&self) -> RwLockReadGuard<'_, Vec> { + self.reverse_map.read_recursive() + } + + fn write_lock_reverse_map(&self) -> RwLockWriteGuard<'_, Vec> { + self.reverse_map.write() + } + + fn read_arc_lock_reverse_map(&self) -> ArcRwLockReadGuard> { + self.reverse_map.read_arc_recursive() + } + + pub fn new_layer_mapper() -> Self { + Self::new_with_private_fields([STATIC_GRAPH_LAYER]) + } + + pub fn new_with_private_fields(fields: impl IntoIterator>) -> Self { + let fields: Vec<_> = fields.into_iter().map(|s| s.into()).collect(); + let num_private_fields = fields.len(); + DictMapper { + map: Arc::new(Default::default()), + reverse_map: Arc::new(RwLock::new(fields)), + num_private_fields, + } + } pub fn contains(&self, key: &str) -> bool { - self.map.contains_key(key) + self.map.read_recursive().contains_key(key) } pub fn deep_clone(&self) -> Self { - let reverse_map = self.reverse_map.read_recursive().clone(); + let map = self.map.read_recursive().clone(); + let reverse_map = self.read_lock_reverse_map().clone(); Self { - map: self.map.clone(), + map: Arc::new(RwLock::new(map)), reverse_map: Arc::new(RwLock::new(reverse_map)), + num_private_fields: self.num_private_fields, } } + + pub fn read(&self) -> LockedDictMapper<'_> { + LockedDictMapper { + map: self.map.read_recursive(), + reverse_map: self.read_lock_reverse_map(), + num_private_fields: self.num_private_fields, + } + } + + pub fn write(&self) -> WriteLockedDictMapper<'_> { + WriteLockedDictMapper { + map: self.map.write(), + reverse_map: self.write_lock_reverse_map(), + } + } + pub fn get_or_create_id(&self, name: &Q) -> MaybeNew where Q: Hash + Eq + ?Sized + ToOwned + Borrow, T: Into, { - if let Some(existing_id) = self.map.get(name.borrow()) { + let map = self.map.read_recursive(); + + if let Some(existing_id) = map.get(name.borrow()) { return MaybeNew::Existing(*existing_id); } + drop(map); + + let mut map = self.map.write(); let name = name.to_owned().into(); - let new_id = match self.map.entry(name.clone()) { + let new_id = match map.entry(name.clone()) { Entry::Occupied(entry) => MaybeNew::Existing(*entry.get()), Entry::Vacant(entry) => { - let mut reverse = self.reverse_map.write(); + let mut reverse = self.write_lock_reverse_map(); let id = reverse.len(); reverse.push(name); entry.insert(id); @@ -134,57 +263,164 @@ impl DictMapper { } pub fn get_id(&self, name: &str) -> Option { - self.map.get(name).map(|id| *id) + self.map.read_recursive().get(name).copied() } /// Explicitly set the id for a key (useful for initialising the map in parallel) pub fn set_id(&self, name: impl Into, id: usize) { + let mut map = self.map.write(); let arc_name = name.into(); - let map_entry = self.map.entry(arc_name.clone()); - let mut keys = self.reverse_map.write(); + let map_entry = map.entry(arc_name.clone()); + let mut keys = self.write_lock_reverse_map(); if keys.len() <= id { keys.resize(id + 1, Default::default()) } keys[id] = arc_name; - map_entry.insert(id); + map_entry.insert_entry(id); } - pub fn has_name(&self, id: usize) -> bool { - let guard = self.reverse_map.read_recursive(); + pub fn has_id(&self, id: usize) -> bool { + let guard = self.read_lock_reverse_map(); guard.get(id).is_some() } pub fn get_name(&self, id: usize) -> ArcStr { - let guard = self.reverse_map.read_recursive(); + let guard = self.read_lock_reverse_map(); guard .get(id) .cloned() .expect("internal ids should always be mapped to a name") } - pub fn get_keys(&self) -> ArcReadLockedVec { - ArcReadLockedVec { - guard: self.reverse_map.read_arc_recursive(), + /// Public ids + pub fn ids(&self) -> impl Iterator { + self.num_private_fields..self.num_all_fields() + } + + /// All ids, including private fields + pub fn all_ids(&self) -> impl Iterator { + 0..self.num_all_fields() + } + + /// Public keys + pub fn keys(&self) -> PublicKeys { + PublicKeys { + guard: self.read_arc_lock_reverse_map(), + num_private_fields: self.num_private_fields, + } + } + + /// All keys including private fields + pub fn all_keys(&self) -> AllKeys { + AllKeys { + guard: self.read_arc_lock_reverse_map(), } } - pub fn get_values(&self) -> Vec { - self.map.iter().map(|entry| *entry.value()).collect() + pub fn num_all_fields(&self) -> usize { + self.read_lock_reverse_map().len() + } + + pub fn num_fields(&self) -> usize { + self.map.read_recursive().len() + } + + pub fn num_private_fields(&self) -> usize { + self.num_private_fields + } +} + +#[derive(Debug)] +pub struct AllKeys { + pub(crate) guard: ArcRwLockReadGuard>, +} + +impl Deref for AllKeys { + type Target = [T]; + + #[inline] + fn deref(&self) -> &Self::Target { + self.guard.deref().deref() + } +} + +impl IntoIterator for AllKeys { + type Item = T; + type IntoIter = LockedIter; + + fn into_iter(self) -> Self::IntoIter { + let guard = self.guard; + let len = guard.len(); + let pos = 0; + LockedIter { guard, pos, len } + } +} + +pub struct PublicKeys { + guard: ArcRwLockReadGuard>, + num_private_fields: usize, +} + +impl PublicKeys { + fn items(&self) -> &[T] { + &self.guard[self.num_private_fields..] + } + pub fn iter(&self) -> impl Iterator + '_ { + self.items().iter() } pub fn len(&self) -> usize { - self.reverse_map.read_recursive().len() + self.items().len() } pub fn is_empty(&self) -> bool { - self.reverse_map.read_recursive().is_empty() + self.items().is_empty() + } +} + +impl IntoIterator for PublicKeys { + type Item = T; + type IntoIter = LockedIter; + + fn into_iter(self) -> Self::IntoIter { + let guard = self.guard; + let len = guard.len(); + let pos = self.num_private_fields; + LockedIter { guard, pos, len } + } +} + +pub struct LockedIter { + guard: ArcRwLockReadGuard>, + pos: usize, + len: usize, +} + +impl Iterator for LockedIter { + type Item = T; + + fn next(&mut self) -> Option { + if self.pos < self.len { + let next_val = Some(self.guard[self.pos].clone()); + self.pos += 1; + next_val + } else { + None + } + } + + fn size_hint(&self) -> (usize, Option) { + let len = self.len - self.pos; + (len, Some(len)) } } +impl ExactSizeIterator for LockedIter {} + #[cfg(test)] mod test { use crate::core::storage::dict_mapper::DictMapper; - use proptest::{arbitrary::any, prop_assert, proptest}; + use proptest::prelude::*; use rand::seq::SliceRandom; use rayon::prelude::*; use std::collections::HashMap; @@ -199,9 +435,39 @@ mod test { assert_eq!(mapper.get_or_create_id("test").inner(), 0); } + #[test] + fn test_dict_mapper_deep_clone() { + let mapper = DictMapper::new_with_private_fields(["_private"]); + let alpha_id = mapper.get_or_create_id("alpha").inner(); + let beta_id = mapper.get_or_create_id("beta").inner(); + + let cloned = mapper.deep_clone(); + + assert_eq!(cloned.num_private_fields(), mapper.num_private_fields()); + assert_eq!(cloned.get_id("alpha"), Some(alpha_id)); + assert_eq!(cloned.get_id("beta"), Some(beta_id)); + assert_eq!(cloned.get_name(alpha_id).as_ref(), "alpha"); + assert_eq!(cloned.get_name(beta_id).as_ref(), "beta"); + assert_eq!(cloned.num_fields(), mapper.num_fields()); + assert_eq!( + cloned.all_keys().into_iter().collect::>(), + mapper.all_keys().into_iter().collect::>() + ); + + let gamma_id = cloned.get_or_create_id("gamma").inner(); + assert_eq!(gamma_id, 3); + assert!(cloned.contains("gamma")); + assert!(!mapper.contains("gamma")); + + let delta_id = mapper.get_or_create_id("delta").inner(); + assert_eq!(delta_id, 3); + assert!(mapper.contains("delta")); + assert!(!cloned.contains("delta")); + } + #[test] fn check_dict_mapper_concurrent_write() { - proptest!(|(write in any::>())| { + proptest!(|(write: Vec)| { let n = 100; let mapper: DictMapper = DictMapper::default(); @@ -210,7 +476,7 @@ mod test { .into_par_iter() .map(|_| { let mut ids: HashMap = Default::default(); - let mut rng = rand::thread_rng(); + let mut rng = rand::rng(); let mut write_s = write.clone(); write_s.shuffle(&mut rng); for s in write_s { @@ -223,8 +489,8 @@ mod test { // check that all maps are the same and that all strings have been assigned an id let res_0 = &res[0]; - prop_assert!(res[1..n].iter().all(|v| res_0 == v) && write.iter().all(|v| mapper.get_id(v).is_some())) - }) + prop_assert!(res[1..n].iter().all(|v| res_0 == v) && write.iter().all(|v| mapper.get_id(v).is_some())); + }); } // map 5 strings to 5 ids from 4 threads concurrently 1000 times diff --git a/raphtory-api/src/core/storage/locked_vec.rs b/raphtory-api/src/core/storage/locked_vec.rs index c15f6fce59..8b13789179 100644 --- a/raphtory-api/src/core/storage/locked_vec.rs +++ b/raphtory-api/src/core/storage/locked_vec.rs @@ -1,55 +1 @@ -use crate::core::storage::ArcRwLockReadGuard; -use std::ops::Deref; -#[derive(Debug)] -pub struct ArcReadLockedVec { - pub(crate) guard: ArcRwLockReadGuard>, -} - -impl Deref for ArcReadLockedVec { - type Target = Vec; - - #[inline] - fn deref(&self) -> &Self::Target { - self.guard.deref() - } -} - -impl IntoIterator for ArcReadLockedVec { - type Item = T; - type IntoIter = LockedIter; - - fn into_iter(self) -> Self::IntoIter { - let guard = self.guard; - let len = guard.len(); - let pos = 0; - LockedIter { guard, pos, len } - } -} - -pub struct LockedIter { - guard: ArcRwLockReadGuard>, - pos: usize, - len: usize, -} - -impl Iterator for LockedIter { - type Item = T; - - fn next(&mut self) -> Option { - if self.pos < self.len { - let next_val = Some(self.guard[self.pos].clone()); - self.pos += 1; - next_val - } else { - None - } - } - - fn size_hint(&self) -> (usize, Option) { - let remaining = self.len - self.pos; - (remaining, Some(remaining)) - } -} - -impl ExactSizeIterator for LockedIter {} diff --git a/raphtory-api/src/core/storage/mod.rs b/raphtory-api/src/core/storage/mod.rs index c198014d22..ad33155ba7 100644 --- a/raphtory-api/src/core/storage/mod.rs +++ b/raphtory-api/src/core/storage/mod.rs @@ -1,5 +1,5 @@ use dashmap::DashMap; -use rustc_hash::FxHasher; +use rustc_hash::{FxBuildHasher, FxHasher}; use std::hash::BuildHasherDefault; pub mod arc_str; @@ -9,5 +9,6 @@ pub mod sorted_vec_map; pub mod timeindex; pub type FxDashMap = DashMap>; +pub type FxHashMap = std::collections::HashMap; pub type ArcRwLockReadGuard = lock_api::ArcRwLockReadGuard; diff --git a/raphtory-api/src/core/utils/time.rs b/raphtory-api/src/core/utils/time.rs index f81cdc726f..5623cc5f83 100644 --- a/raphtory-api/src/core/utils/time.rs +++ b/raphtory-api/src/core/utils/time.rs @@ -148,6 +148,9 @@ impl TryIntoTimeNeedsEventId for &str {} /// Used to handle automatic injection of event id if not explicitly provided. /// In many cases, we will want different behaviour if an event id was provided or not. +#[derive( + Debug, Clone, Copy, Eq, PartialEq, Ord, PartialOrd, Hash, serde::Serialize, serde::Deserialize, +)] pub enum InputTime { Simple(i64), Indexed(i64, usize), diff --git a/raphtory-api/src/lib.rs b/raphtory-api/src/lib.rs index e4b29b9e95..8861afe5fe 100644 --- a/raphtory-api/src/lib.rs +++ b/raphtory-api/src/lib.rs @@ -7,7 +7,9 @@ pub mod python; pub mod inherit; pub mod iter; -#[derive(PartialOrd, PartialEq, Debug)] +use serde::{Deserialize, Serialize}; + +#[derive(PartialOrd, PartialEq, Debug, Serialize, Deserialize)] pub enum GraphType { EventGraph, PersistentGraph, diff --git a/raphtory-api/src/python/arcstr.rs b/raphtory-api/src/python/arcstr.rs index 7a736bc5fa..89ee16d2d3 100644 --- a/raphtory-api/src/python/arcstr.rs +++ b/raphtory-api/src/python/arcstr.rs @@ -22,8 +22,9 @@ impl<'py> IntoPyObject<'py> for &ArcStr { } } -impl<'source> FromPyObject<'source> for ArcStr { - fn extract_bound(ob: &Bound<'source, PyAny>) -> PyResult { - ob.extract::().map(|v| v.into()) +impl<'py> FromPyObject<'_, 'py> for ArcStr { + type Error = PyErr; + fn extract(obj: Borrowed<'_, 'py, PyAny>) -> Result { + obj.extract::().map(|v| v.into()) } } diff --git a/raphtory-api/src/python/direction.rs b/raphtory-api/src/python/direction.rs index 6f367a85c6..4211eac4f3 100644 --- a/raphtory-api/src/python/direction.rs +++ b/raphtory-api/src/python/direction.rs @@ -1,8 +1,9 @@ use crate::core::Direction; use pyo3::{exceptions::PyTypeError, prelude::*}; -impl<'source> FromPyObject<'source> for Direction { - fn extract_bound(ob: &Bound<'source, PyAny>) -> PyResult { +impl<'py> FromPyObject<'_, 'py> for Direction { + type Error = PyErr; + fn extract(ob: Borrowed<'_, 'py, PyAny>) -> PyResult { let value: &str = ob.extract()?; match value { "out" => Ok(Direction::OUT), diff --git a/raphtory-api/src/python/gid.rs b/raphtory-api/src/python/gid.rs index 4883aafcbc..6c44aa7c28 100644 --- a/raphtory-api/src/python/gid.rs +++ b/raphtory-api/src/python/gid.rs @@ -28,11 +28,12 @@ impl<'py> IntoPyObject<'py> for &GID { } } -impl<'source> FromPyObject<'source> for GID { - fn extract_bound(id: &Bound<'source, PyAny>) -> PyResult { - id.extract::() +impl<'py> FromPyObject<'_, 'py> for GID { + type Error = PyErr; + fn extract(ob: Borrowed<'_, 'py, PyAny>) -> PyResult { + ob.extract::() .map(GID::Str) - .or_else(|_| id.extract::().map(GID::U64)) + .or_else(|_| ob.extract::().map(GID::U64)) .map_err(|_| { let msg = "IDs need to be strings or an unsigned integers"; PyTypeError::new_err(msg) diff --git a/raphtory-api/src/python/prop.rs b/raphtory-api/src/python/prop.rs index d00acc2f1e..4560904a11 100644 --- a/raphtory-api/src/python/prop.rs +++ b/raphtory-api/src/python/prop.rs @@ -3,11 +3,12 @@ use crate::core::{ storage::arc_str::ArcStr, }; use bigdecimal::BigDecimal; +use chrono::{DateTime, NaiveDateTime, Utc}; use pyo3::{ exceptions::PyTypeError, prelude::*, pybacked::PyBackedStr, - sync::GILOnceCell, + sync::PyOnceLock, types::{PyBool, PyDict, PyType}, Bound, FromPyObject, IntoPyObject, IntoPyObjectExt, Py, PyAny, PyErr, PyResult, Python, }; @@ -15,7 +16,6 @@ use pyo3_arrow::PyDataType; use rustc_hash::FxHashMap; use std::{collections::HashMap, ops::Deref, str::FromStr, sync::Arc}; -#[cfg(feature = "arrow")] mod array_ext { use pyo3::{intern, prelude::*, types::PyTuple}; use pyo3_arrow::PyArray; @@ -34,10 +34,11 @@ mod array_ext { } } -#[cfg(feature = "arrow")] -use {crate::core::entities::properties::prop::PropArray, array_ext::*, pyo3_arrow::PyArray}; +use crate::core::entities::properties::prop::PropArray; +use array_ext::*; +use pyo3_arrow::PyArray; -static DECIMAL_CLS: GILOnceCell> = GILOnceCell::new(); +static DECIMAL_CLS: PyOnceLock> = PyOnceLock::new(); fn get_decimal_cls(py: Python<'_>) -> PyResult<&Bound<'_, PyType>> { DECIMAL_CLS.import(py, "decimal", "Decimal") @@ -59,18 +60,45 @@ impl<'py> IntoPyObject<'py> for Prop { Prop::F64(f64) => f64.into_pyobject(py)?.into_any(), Prop::DTime(dtime) => dtime.into_pyobject(py)?.into_any(), Prop::NDTime(ndtime) => ndtime.into_pyobject(py)?.into_any(), - #[cfg(feature = "arrow")] - Prop::Array(blob) => { - if let Some(arr_ref) = blob.into_array_ref() { - PyArray::from_array_ref(arr_ref).into_pyarrow(py)? - } else { - py.None().into_bound(py) - } + Prop::I32(v) => v.into_pyobject(py)?.into_any(), + Prop::U32(v) => v.into_pyobject(py)?.into_any(), + Prop::F32(v) => v.into_pyobject(py)?.into_any(), + Prop::List(PropArray::Array(arr_ref)) => { + PyArray::from_array_ref(arr_ref).into_pyarrow(py)? } + Prop::List(PropArray::Vec(v)) => v.into_pyobject(py)?.into_any(), // Fixme: optimise the clone here? + Prop::Map(v) => v.deref().clone().into_pyobject(py)?.into_any(), + Prop::Decimal(d) => { + let decl_cls = get_decimal_cls(py)?; + decl_cls.call1((d.to_string(),))? + } + }) + } +} + +impl<'a, 'py: 'a> IntoPyObject<'py> for &'a Prop { + type Target = PyAny; + type Output = Bound<'py, PyAny>; + type Error = PyErr; + + fn into_pyobject(self, py: Python<'py>) -> Result { + Ok(match self { + Prop::Str(s) => s.into_pyobject(py)?.into_any(), + Prop::Bool(bool) => bool.into_bound_py_any(py)?, + Prop::U8(u8) => u8.into_pyobject(py)?.into_any(), + Prop::U16(u16) => u16.into_pyobject(py)?.into_any(), + Prop::I64(i64) => i64.into_pyobject(py)?.into_any(), + Prop::U64(u64) => u64.into_pyobject(py)?.into_any(), + Prop::F64(f64) => f64.into_pyobject(py)?.into_any(), + Prop::DTime(dtime) => dtime.into_pyobject(py)?.into_any(), + Prop::NDTime(ndtime) => ndtime.into_pyobject(py)?.into_any(), Prop::I32(v) => v.into_pyobject(py)?.into_any(), Prop::U32(v) => v.into_pyobject(py)?.into_any(), Prop::F32(v) => v.into_pyobject(py)?.into_any(), - Prop::List(v) => v.deref().clone().into_pyobject(py)?.into_any(), // Fixme: optimise the clone here? + Prop::List(PropArray::Array(arr_ref)) => { + PyArray::from_array_ref(arr_ref.clone()).into_pyarrow(py)? + } + Prop::List(PropArray::Vec(v)) => v.into_pyobject(py)?.into_any(), Prop::Map(v) => v.deref().clone().into_pyobject(py)?.into_any(), Prop::Decimal(d) => { let decl_cls = get_decimal_cls(py)?; @@ -80,81 +108,239 @@ impl<'py> IntoPyObject<'py> for Prop { } } -#[pyclass(name = "Prop", module = "raphtory")] +#[pyclass(name = "Prop", module = "raphtory", eq)] +#[derive(PartialEq)] pub struct PyProp(pub Prop); #[pymethods] impl PyProp { + /// Construct a `Prop` holding an unsigned 8-bit integer. + /// + /// Arguments: + /// value (int): the value to wrap. + /// + /// Returns: + /// Prop: #[staticmethod] pub fn u8(value: u8) -> Self { PyProp(Prop::U8(value)) } + /// Construct a `Prop` holding an unsigned 16-bit integer. + /// + /// Arguments: + /// value (int): the value to wrap. + /// + /// Returns: + /// Prop: #[staticmethod] pub fn u16(value: u16) -> Self { PyProp(Prop::U16(value)) } + /// Construct a `Prop` holding an unsigned 32-bit integer. + /// + /// Arguments: + /// value (int): the value to wrap. + /// + /// Returns: + /// Prop: #[staticmethod] pub fn u32(value: u32) -> Self { PyProp(Prop::U32(value)) } + /// Construct a `Prop` holding an unsigned 64-bit integer. + /// + /// Arguments: + /// value (int): the value to wrap. + /// + /// Returns: + /// Prop: #[staticmethod] pub fn u64(value: u64) -> Self { PyProp(Prop::U64(value)) } + /// Construct a `Prop` holding a signed 32-bit integer. + /// + /// Arguments: + /// value (int): the value to wrap. + /// + /// Returns: + /// Prop: #[staticmethod] pub fn i32(value: i32) -> Self { PyProp(Prop::I32(value)) } + /// Construct a `Prop` holding a signed 64-bit integer. + /// + /// Arguments: + /// value (int): the value to wrap. + /// + /// Returns: + /// Prop: #[staticmethod] pub fn i64(value: i64) -> Self { PyProp(Prop::I64(value)) } + /// Construct a `Prop` holding a 32-bit float. + /// + /// Arguments: + /// value (float): the value to wrap. + /// + /// Returns: + /// Prop: #[staticmethod] pub fn f32(value: f32) -> Self { PyProp(Prop::F32(value)) } + /// Construct a `Prop` holding a 64-bit float. + /// + /// Arguments: + /// value (float): the value to wrap. + /// + /// Returns: + /// Prop: #[staticmethod] pub fn f64(value: f64) -> Self { PyProp(Prop::F64(value)) } + /// Construct a `Prop` holding a string. + /// + /// Arguments: + /// value (str): the value to wrap. + /// + /// Returns: + /// Prop: #[staticmethod] pub fn str(value: &str) -> Self { PyProp(Prop::str(value)) } + /// Construct a `Prop` holding a boolean. + /// + /// Arguments: + /// value (bool): the value to wrap. + /// + /// Returns: + /// Prop: #[staticmethod] pub fn bool(value: bool) -> Self { PyProp(Prop::Bool(value)) } + /// Construct a `Prop` holding a list of values. + /// + /// Arguments: + /// values (list): the values to wrap. + /// + /// Returns: + /// Prop: #[staticmethod] pub fn list(values: &Bound<'_, PyAny>) -> PyResult { let elems: Vec = values.extract()?; - Ok(PyProp(Prop::List(Arc::new(elems)))) + Ok(PyProp(Prop::list(elems))) } + /// Construct a `Prop` holding a string-keyed map of values. + /// + /// Arguments: + /// dict (dict[str, Any]): the map to wrap. + /// + /// Returns: + /// Prop: #[staticmethod] pub fn map(dict: Bound<'_, PyDict>) -> PyResult { let items: HashMap = dict.extract()?; - let mut map: FxHashMap = - FxHashMap::with_capacity_and_hasher(items.len(), Default::default()); - - for (k, v) in items { - map.insert(ArcStr::from(k), v); - } + let map: FxHashMap = items + .into_iter() + .map(|(k, v)| (ArcStr::from(k), v)) + .collect(); Ok(PyProp(Prop::Map(Arc::new(map)))) } + /// Construct a `Prop` holding a timezone-aware datetime (stored as UTC). + /// Naive datetimes are accepted and interpreted as UTC, matching the + /// convention used elsewhere in Raphtory's time inputs. + /// + /// Arguments: + /// value (datetime): a datetime. Naive datetimes are treated as UTC. + /// + /// Returns: + /// Prop: + #[staticmethod] + pub fn aware_datetime(value: &Bound<'_, PyAny>) -> PyResult { + if let Ok(dt) = value.extract::>() { + return Ok(PyProp(Prop::DTime(dt))); + } + if let Ok(naive) = value.extract::() { + return Ok(PyProp(Prop::DTime(naive.and_utc()))); + } + Err(PyTypeError::new_err(format!( + "Could not convert {value:?} to a datetime" + ))) + } + + /// Construct a `Prop` holding a naive (timezone-unaware) datetime. + /// + /// Arguments: + /// value (datetime): the value to wrap (any tz info is dropped). + /// + /// Returns: + /// Prop: + #[staticmethod] + pub fn naive_datetime(value: NaiveDateTime) -> Self { + PyProp(Prop::NDTime(value)) + } + + /// Construct a `Prop` holding an arbitrary-precision decimal. + /// + /// Arguments: + /// value (Decimal | str | int | float): the value to wrap. Strings must + /// parse as a decimal. Note that floats only have ~15-17 digits of + /// precision — pass a string or `decimal.Decimal` for higher precision. + /// + /// Returns: + /// Prop: + #[staticmethod] + pub fn decimal(value: &Bound<'_, PyAny>) -> PyResult { + let bd = if value.get_type().name()?.contains("Decimal")? { + // decimal.Decimal — go via its str representation for full precision. + let s = value.str()?.to_cow()?.into_owned(); + BigDecimal::from_str(&s) + .map_err(|_| PyTypeError::new_err(format!("Could not convert {s} to Decimal")))? + } else if let Ok(i) = value.extract::() { + BigDecimal::from(i) + } else if let Ok(u) = value.extract::() { + BigDecimal::from(u) + } else if let Ok(f) = value.extract::() { + BigDecimal::try_from(f) + .map_err(|_| PyTypeError::new_err(format!("Could not convert {f} to Decimal")))? + } else if let Ok(s) = value.extract::() { + BigDecimal::from_str(&s) + .map_err(|_| PyTypeError::new_err(format!("Could not convert {s} to Decimal")))? + } else { + return Err(PyTypeError::new_err(format!( + "Could not convert {:?} to Decimal", + value + ))); + }; + let prop = Prop::try_from_bd(bd) + .map_err(|_| PyTypeError::new_err(format!("Decimal too large: {value:?}")))?; + Ok(PyProp(prop)) + } + + /// Returns the `PropType` of the wrapped value. + /// + /// Returns: + /// PropType: pub fn dtype(&self) -> PropType { self.0.dtype() } @@ -162,11 +348,19 @@ impl PyProp { pub fn __repr__(&self) -> String { format!("{}", self.0) } + + fn __hash__(&self) -> u64 { + use std::hash::{DefaultHasher, Hash, Hasher}; + let mut hasher = DefaultHasher::new(); + self.0.hash(&mut hasher); + hasher.finish() + } } // Manually implemented to make sure we don't end up with f32/i32/u32 from python ints/floats -impl<'source> FromPyObject<'source> for Prop { - fn extract_bound(ob: &Bound<'source, PyAny>) -> PyResult { +impl<'py> FromPyObject<'_, 'py> for Prop { + type Error = PyErr; + fn extract(ob: Borrowed<'_, 'py, PyAny>) -> PyResult { if let Ok(pyref) = ob.extract::>() { return Ok(pyref.0.clone()); } @@ -213,15 +407,12 @@ impl<'source> FromPyObject<'source> for Prop { if let Ok(s) = ob.extract::() { return Ok(Prop::Str(s.into())); } - - #[cfg(feature = "arrow")] if let Ok(arrow) = ob.extract::() { let (arr, _) = arrow.into_inner(); - return Ok(Prop::Array(PropArray::Array(arr))); + return Ok(Prop::List(PropArray::Array(arr))); } - - if let Ok(list) = ob.extract() { - return Ok(Prop::List(Arc::new(list))); + if let Ok(list) = ob.extract::>() { + return Ok(Prop::List(PropArray::Vec(list.into()))); } if let Ok(map) = ob.extract() { @@ -229,8 +420,9 @@ impl<'source> FromPyObject<'source> for Prop { } Err(PyTypeError::new_err(format!( - "Could not convert {:?} to Prop", - ob + "Could not convert {:?} of type {:?} to Prop", + ob, + ob.get_type() ))) } } @@ -244,81 +436,151 @@ pub struct PyPropType(pub PropType); #[pymethods] impl PyPropType { + /// Unsigned 8-bit integer type. + /// + /// Returns: + /// PropType: #[staticmethod] pub fn u8() -> PropType { PropType::U8 } + /// Unsigned 16-bit integer type. + /// + /// Returns: + /// PropType: #[staticmethod] pub fn u16() -> PropType { PropType::U16 } + /// Unsigned 32-bit integer type. + /// + /// Returns: + /// PropType: #[staticmethod] pub fn u32() -> PropType { PropType::U32 } + /// Unsigned 64-bit integer type. + /// + /// Returns: + /// PropType: #[staticmethod] pub fn u64() -> PropType { PropType::U64 } + /// Signed 32-bit integer type. + /// + /// Returns: + /// PropType: #[staticmethod] pub fn i32() -> PropType { PropType::I32 } + /// Signed 64-bit integer type. + /// + /// Returns: + /// PropType: #[staticmethod] pub fn i64() -> PropType { PropType::I64 } + /// 32-bit float type. + /// + /// Returns: + /// PropType: #[staticmethod] pub fn f32() -> PropType { PropType::F32 } + /// 64-bit float type. + /// + /// Returns: + /// PropType: #[staticmethod] pub fn f64() -> PropType { PropType::F64 } + /// String type. + /// + /// Returns: + /// PropType: #[staticmethod] pub fn str() -> PropType { PropType::Str } + /// Boolean type. + /// + /// Returns: + /// PropType: #[staticmethod] pub fn bool() -> PropType { PropType::Bool } + /// Naive datetime type (timezone-unaware). + /// + /// Returns: + /// PropType: #[staticmethod] pub fn naive_datetime() -> PropType { PropType::NDTime } + /// Datetime type (timezone-aware). + /// + /// Returns: + /// PropType: #[staticmethod] pub fn datetime() -> PropType { PropType::DTime } + /// Arbitrary-precision decimal type with a fixed scale (number of digits + /// after the decimal point). + /// + /// Arguments: + /// scale (int): the number of digits after the decimal point. + /// + /// Returns: + /// PropType: + #[staticmethod] + pub fn decimal(scale: i64) -> PropType { + PropType::Decimal { scale } + } + + /// List type with a single element type. + /// + /// Arguments: + /// p (PropType): element type. + /// + /// Returns: + /// PropType: #[staticmethod] pub fn list(p: PropType) -> PropType { PropType::List(Box::new(p)) } + /// Map type with string keys and typed values. + /// + /// Arguments: + /// hash_map (dict[str, PropType]): mapping from key name to value type. + /// + /// Returns: + /// PropType: #[staticmethod] pub fn map(hash_map: HashMap) -> PropType { PropType::Map(Arc::new(hash_map)) } - #[staticmethod] - pub fn array(p: PropType) -> PropType { - PropType::Array(Box::new(p)) - } - fn __repr__(&self) -> String { format!("PropType.{}", self.0) } @@ -342,9 +604,10 @@ impl<'py> IntoPyObject<'py> for PropType { } } -impl<'source> FromPyObject<'source> for PropType { - fn extract_bound(ob: &Bound<'source, PyAny>) -> PyResult { - if let Ok(prop_type) = ob.downcast::() { +impl<'source> FromPyObject<'_, 'source> for PropType { + type Error = PyErr; + fn extract(ob: Borrowed<'_, 'source, PyAny>) -> PyResult { + if let Ok(prop_type) = ob.cast::() { Ok(prop_type.get().0.clone()) } else if let Ok(prop_type_str) = ob.extract::() { match prop_type_str.deref().to_ascii_lowercase().as_str() { diff --git a/raphtory-api/src/python/timeindex.rs b/raphtory-api/src/python/timeindex.rs index 1dcb3774a2..067bcfe6ed 100644 --- a/raphtory-api/src/python/timeindex.rs +++ b/raphtory-api/src/python/timeindex.rs @@ -24,9 +24,10 @@ impl<'py> IntoPyObject<'py> for EventTime { } } -impl<'source> FromPyObject<'source> for EventTime { - fn extract_bound(time: &Bound<'source, PyAny>) -> PyResult { - InputTime::extract_bound(time).map(|input_time| input_time.as_time()) +impl<'source> FromPyObject<'_, 'source> for EventTime { + type Error = PyErr; + fn extract(time: Borrowed<'_, 'source, PyAny>) -> PyResult { + InputTime::extract(time).map(|input_time| input_time.as_time()) } } @@ -55,13 +56,14 @@ impl EventTimeComponent { } } -impl<'source> FromPyObject<'source> for EventTimeComponent { - fn extract_bound(component: &Bound<'source, PyAny>) -> PyResult { +impl<'source> FromPyObject<'_, 'source> for EventTimeComponent { + type Error = PyErr; + fn extract(component: Borrowed<'_, 'source, PyAny>) -> PyResult { extract_time_index_component(component).map_err(|e| match e { ParsingError::Matched(err) => err, ParsingError::Unmatched => { let message = format!( - "Time component '{component}' must be a str, datetime, float, or an integer." + "Time component '{component:?}' must be a str, datetime, float, or an integer." ); PyTypeError::new_err(message) } @@ -73,8 +75,8 @@ enum ParsingError { Unmatched, } -fn extract_time_index_component<'source>( - component: &Bound<'source, PyAny>, +fn extract_time_index_component( + component: Borrowed<'_, '_, PyAny>, ) -> Result { if let Ok(string) = component.extract::() { let timestamp = string.as_str(); @@ -108,7 +110,7 @@ fn extract_time_index_component<'source>( parsed_datetime.and_utc().timestamp_millis(), )); } - if let Ok(py_datetime) = component.downcast::() { + if let Ok(py_datetime) = component.cast::() { let time = (py_datetime .call_method0("timestamp") .map_err(ParsingError::Matched)? @@ -129,7 +131,7 @@ fn extract_time_index_component<'source>( naive_dt.and_utc().timestamp_millis(), )); } - if let Ok(py_date) = component.downcast::() { + if let Ok(py_date) = component.cast::() { let year: i32 = py_date.get_year(); let month: u32 = py_date.get_month() as u32; let day: u32 = py_date.get_day() as u32; @@ -154,7 +156,7 @@ fn extract_time_index_component<'source>( } fn parse_email_timestamp(timestamp: &str) -> PyResult { - Python::with_gil(|py| { + Python::attach(|py| { let email_utils = PyModule::import(py, "email.utils")?; let datetime = email_utils.call_method1("parsedate_to_datetime", (timestamp,))?; let py_seconds = datetime.call_method1("timestamp", ())?; @@ -163,8 +165,8 @@ fn parse_email_timestamp(timestamp: &str) -> PyResult { }) } -/// Raphtory’s EventTime. -/// Represents a unique timepoint in the graph’s history as (timestamp, event_id). +/// Raphtory's EventTime. +/// Represents a unique timepoint in the graph's history as (timestamp, event_id). /// /// - timestamp: Number of milliseconds since the Unix epoch. /// - event_id: ID used for ordering between equal timestamps. @@ -174,6 +176,10 @@ fn parse_email_timestamp(timestamp: &str) -> PyResult { /// EventTime can be converted into a timestamp or a Python datetime, and compared /// either by timestamp (against ints/floats/datetimes/strings), by tuple of (timestamp, event_id), /// or against another EventTime. +/// +/// Arguments: +/// timestamp (int | float | datetime | str): A time input convertible to an EventTime. +/// event_id (int | float | datetime | str | None): Optionally, specify the event id. Defaults to None. #[pyclass(name = "EventTime", module = "raphtory", frozen)] #[derive(Debug, Clone, Copy, Serialize, PartialEq, Ord, PartialOrd, Eq)] pub struct PyEventTime { @@ -365,7 +371,7 @@ impl PyOptionalEventTime { /// Returns the timestamp in milliseconds since the Unix epoch if an EventTime is contained, or else None. /// /// Returns: - /// int | None: Milliseconds since the Unix epoch. + /// Optional[int]: Milliseconds since the Unix epoch. #[getter] pub fn t(&self) -> Option { self.inner.map(|t| t.t()) @@ -374,7 +380,7 @@ impl PyOptionalEventTime { /// Returns the UTC datetime representation of this EventTime's timestamp if an EventTime is contained, or else None. /// /// Returns: - /// datetime | None: The UTC datetime. + /// Optional[datetime]: The UTC datetime. /// /// Raises: /// TimeError: Returns TimeError on timestamp conversion errors (e.g. out-of-range timestamp). @@ -386,7 +392,7 @@ impl PyOptionalEventTime { /// Returns the event id used to order events within the same timestamp if an EventTime is contained, or else None. /// /// Returns: - /// int | None: The event id. + /// Optional[int]: The event id. #[getter] pub fn event_id(&self) -> Option { self.inner.map(|t| t.i()) @@ -411,7 +417,7 @@ impl PyOptionalEventTime { /// Returns the contained EventTime if it exists, or else None. /// /// Returns: - /// EventTime | None: + /// Optional[EventTime]: pub fn get_event_time(&self) -> Option { self.inner } @@ -419,7 +425,7 @@ impl PyOptionalEventTime { /// Return this entry as a tuple of (timestamp, event_id), where the timestamp is in milliseconds if an EventTime is contained, or else None. /// /// Returns: - /// tuple[int,int] | None: (timestamp, event_id). + /// Optional[tuple[int, int]]: (timestamp, event_id). #[getter] pub fn as_tuple(&self) -> Option<(i64, usize)> { self.inner.map(|t| t.as_tuple()) @@ -504,9 +510,10 @@ impl From for Option { } } -impl<'source> FromPyObject<'source> for InputTime { - fn extract_bound(input: &Bound<'source, PyAny>) -> PyResult { - if let Ok(py_time) = input.downcast::() { +impl<'source> FromPyObject<'_, 'source> for InputTime { + type Error = PyErr; + fn extract(input: Borrowed<'_, 'source, PyAny>) -> PyResult { + if let Ok(py_time) = input.cast::() { return Ok(py_time.get().try_into_input_time()?); } else if let Ok(opt_py_time) = input.extract::() { return match opt_py_time.inner { @@ -515,9 +522,9 @@ impl<'source> FromPyObject<'source> for InputTime { }; } // Handle list/tuple case: [timestamp, event_id] - if input.downcast::().is_ok() || input.downcast::().is_ok() { + if input.cast::().is_ok() || input.cast::().is_ok() { let py = input.py(); - if let Ok(items) = input.extract::>() { + if let Ok(items) = input.extract::>>() { let len = items.len(); if len != 2 { return Err(PyTypeError::new_err(format!( @@ -525,19 +532,19 @@ impl<'source> FromPyObject<'source> for InputTime { len ))); } - let first = items[0].bind(py); - let second = items[1].bind(py); + let first = items[0].bind_borrowed(py); + let second = items[1].bind_borrowed(py); let first_entry = extract_time_index_component(first).map_err(|e| match e { ParsingError::Matched(err) => err, ParsingError::Unmatched => { - let message = format!("Time component '{first}' must be a str, datetime, float, or an integer."); + let message = format!("Time component '{first:?}' must be a str, datetime, float, or an integer."); PyTypeError::new_err(message) } })?; let second_entry = extract_time_index_component(second).map_err(|e| match e { ParsingError::Matched(err) => err, ParsingError::Unmatched => { - let message = format!("Time component '{second}' must be a str, datetime, float, or an integer."); + let message = format!("Time component '{second:?}' must be a str, datetime, float, or an integer."); PyTypeError::new_err(message) } })?; @@ -552,7 +559,7 @@ impl<'source> FromPyObject<'source> for InputTime { Ok(component) => Ok(InputTime::Simple(component.t())), Err(ParsingError::Matched(err)) => Err(err), Err(ParsingError::Unmatched) => { - let message = format!("Time '{input}' must be a str, datetime, float, integer, or a tuple/list of two of those types."); + let message = format!("Time '{input:?}' must be a str, datetime, float, integer, or a tuple/list of two of those types."); Err(PyTypeError::new_err(message)) } } diff --git a/raphtory-auth-noop/Cargo.toml b/raphtory-auth-noop/Cargo.toml new file mode 100644 index 0000000000..7b20f3a52c --- /dev/null +++ b/raphtory-auth-noop/Cargo.toml @@ -0,0 +1,6 @@ +[package] +name = "raphtory-auth-noop" +version.workspace = true +edition.workspace = true + +[dependencies] diff --git a/raphtory-auth-noop/src/lib.rs b/raphtory-auth-noop/src/lib.rs new file mode 100644 index 0000000000..12cd021c75 --- /dev/null +++ b/raphtory-auth-noop/src/lib.rs @@ -0,0 +1 @@ +pub fn init() {} diff --git a/raphtory-benchmark/Cargo.toml b/raphtory-benchmark/Cargo.toml index 0971dbf88f..aa53e69535 100644 --- a/raphtory-benchmark/Cargo.toml +++ b/raphtory-benchmark/Cargo.toml @@ -9,7 +9,6 @@ edition = "2021" criterion = { workspace = true } raphtory = { workspace = true, features = [ "io", - "proto", "vectors", ] } raphtory-api = { workspace = true } @@ -61,15 +60,12 @@ harness = false [[bench]] name = "proto_encode" harness = false +required-features = ["proto"] [[bench]] name = "proto_decode" harness = false - -[[bench]] -name = "arrow_algobench" -harness = false -required-features = ["storage"] +required-features = ["proto"] [[bench]] name = "search_bench" @@ -87,4 +83,4 @@ required-features = ["search"] [features] search = ["raphtory/search"] -storage = ["raphtory/storage"] +proto = ["raphtory/proto"] diff --git a/raphtory-benchmark/benches/arrow_algobench.rs b/raphtory-benchmark/benches/arrow_algobench.rs deleted file mode 100644 index c50db642bf..0000000000 --- a/raphtory-benchmark/benches/arrow_algobench.rs +++ /dev/null @@ -1,181 +0,0 @@ -use criterion::{criterion_group, criterion_main}; - -#[cfg(feature = "storage")] -pub mod arrow_bench { - use criterion::{black_box, BenchmarkId, Criterion, SamplingMode}; - use raphtory::{ - algorithms::{ - centrality::pagerank::unweighted_page_rank, - components::weakly_connected_components, - metrics::clustering_coefficient::{ - global_clustering_coefficient::global_clustering_coefficient, - local_clustering_coefficient::local_clustering_coefficient, - }, - motifs::local_triangle_count::local_triangle_count, - }, - graphgen::random_attachment::random_attachment, - prelude::*, - }; - use raphtory_benchmark::common::bench; - use rayon::prelude::*; - use tempfile::TempDir; - - pub fn local_triangle_count_analysis(c: &mut Criterion) { - let mut group = c.benchmark_group("local_triangle_count"); - group.sample_size(10); - bench(&mut group, "local_triangle_count", None, |b| { - let g = raphtory::graph_loader::lotr_graph::lotr_graph(); - let test_dir = TempDir::new().unwrap(); - let g = g.persist_as_disk_graph(test_dir.path()).unwrap(); - let windowed_graph = g.window(i64::MIN, i64::MAX); - - b.iter(|| { - let node_ids = windowed_graph.nodes().collect(); - - node_ids.into_par_iter().for_each(|v| { - local_triangle_count(&windowed_graph, v).unwrap(); - }); - }) - }); - - group.finish(); - } - - pub fn local_clustering_coefficient_analysis(c: &mut Criterion) { - let mut group = c.benchmark_group("local_clustering_coefficient"); - - bench(&mut group, "local_clustering_coefficient", None, |b| { - let g: Graph = Graph::new(); - - let vs = vec![ - (1, 2, 1), - (1, 3, 2), - (1, 4, 3), - (3, 1, 4), - (3, 4, 5), - (3, 5, 6), - (4, 5, 7), - (5, 6, 8), - (5, 8, 9), - (7, 5, 10), - (8, 5, 11), - (1, 9, 12), - (9, 1, 13), - (6, 3, 14), - (4, 8, 15), - (8, 3, 16), - (5, 10, 17), - (10, 5, 18), - (10, 8, 19), - (1, 11, 20), - (11, 1, 21), - (9, 11, 22), - (11, 9, 23), - ]; - - for (src, dst, t) in &vs { - g.add_edge(*t, *src, *dst, NO_PROPS, None).unwrap(); - } - - let test_dir = TempDir::new().unwrap(); - let g = g.persist_as_disk_graph(test_dir.path()).unwrap(); - - let windowed_graph = g.window(0, 5); - b.iter(|| local_clustering_coefficient(&windowed_graph, 1)) - }); - - group.finish(); - } - - pub fn graphgen_large_clustering_coeff(c: &mut Criterion) { - let mut group = c.benchmark_group("graphgen_large_clustering_coeff"); - // generate graph - let graph = Graph::new(); - let seed: [u8; 32] = [1; 32]; - random_attachment(&graph, 500000, 4, Some(seed)); - - let test_dir = TempDir::new().unwrap(); - let graph = graph.persist_as_disk_graph(test_dir.path()).unwrap(); - - group.sampling_mode(SamplingMode::Flat); - group.measurement_time(std::time::Duration::from_secs(60)); - group.sample_size(10); - group.bench_with_input( - BenchmarkId::new("graphgen_large_clustering_coeff", &graph), - &graph, - |b, graph| { - b.iter(|| { - let result = global_clustering_coefficient(graph); - black_box(result); - }); - }, - ); - group.finish() - } - - pub fn graphgen_large_pagerank(c: &mut Criterion) { - let mut group = c.benchmark_group("graphgen_large_pagerank"); - // generate graph - let graph = Graph::new(); - let seed: [u8; 32] = [1; 32]; - random_attachment(&graph, 500000, 4, Some(seed)); - - let test_dir = TempDir::new().unwrap(); - let graph = graph.persist_as_disk_graph(test_dir.path()).unwrap(); - group.sampling_mode(SamplingMode::Flat); - group.measurement_time(std::time::Duration::from_secs(20)); - group.sample_size(10); - group.bench_with_input( - BenchmarkId::new("graphgen_large_pagerank", &graph), - &graph, - |b, graph| { - b.iter(|| { - let result = unweighted_page_rank(graph, Some(100), None, None, true, None); - black_box(result); - }); - }, - ); - group.finish() - } - - pub fn graphgen_large_concomp(c: &mut Criterion) { - let mut group = c.benchmark_group("graphgen_large_concomp"); - // generate graph - let graph = Graph::new(); - let seed: [u8; 32] = [1; 32]; - random_attachment(&graph, 500000, 4, Some(seed)); - let test_dir = TempDir::new().unwrap(); - let graph = graph.persist_as_disk_graph(test_dir.path()).unwrap(); - - group.sampling_mode(SamplingMode::Flat); - group.measurement_time(std::time::Duration::from_secs(60)); - group.sample_size(10); - group.bench_with_input( - BenchmarkId::new("graphgen_large_concomp", &graph), - &graph, - |b, graph| { - b.iter(|| { - let result = weakly_connected_components(graph); - black_box(result); - }); - }, - ); - group.finish() - } -} - -#[cfg(feature = "storage")] -pub use arrow_bench::*; - -#[cfg(feature = "storage")] -criterion_group!( - benches, - local_triangle_count_analysis, - local_clustering_coefficient_analysis, - graphgen_large_clustering_coeff, - graphgen_large_pagerank, - graphgen_large_concomp, -); - -#[cfg(feature = "storage")] -criterion_main!(benches); diff --git a/raphtory-benchmark/benches/edge_add.rs b/raphtory-benchmark/benches/edge_add.rs index ff88954ef1..91de49089e 100644 --- a/raphtory-benchmark/benches/edge_add.rs +++ b/raphtory-benchmark/benches/edge_add.rs @@ -1,12 +1,12 @@ use criterion::{criterion_group, criterion_main, Criterion}; use rand::{ - distributions::{Alphanumeric, DistString}, - thread_rng, Rng, + distr::{Alphanumeric, SampleString}, + rng, Rng, }; use raphtory::prelude::*; fn random_string(n: usize) -> String { - Alphanumeric.sample_string(&mut thread_rng(), n) + Alphanumeric.sample_string(&mut rng(), n) } pub fn graph(c: &mut Criterion) { @@ -17,13 +17,13 @@ pub fn graph(c: &mut Criterion) { }); id_group.bench_function("numeric string input", |bencher| { - let id: u64 = thread_rng().gen(); + let id: u64 = rng().random(); let id_str = id.to_string(); bencher.iter(|| id_str.id()) }); id_group.bench_function("numeric input", |bencher| { - let id: u64 = thread_rng().gen(); + let id: u64 = rng().random(); bencher.iter(|| id.id()) }); @@ -33,7 +33,7 @@ pub fn graph(c: &mut Criterion) { graph_group.bench_function("string input", |bencher| { let src: String = random_string(16); let dst: String = random_string(16); - let t: i64 = thread_rng().gen(); + let t: i64 = rng().random(); bencher.iter(|| g.add_edge(t, src.clone(), dst.clone(), NO_PROPS, None)) }); graph_group.finish(); diff --git a/raphtory-benchmark/benches/index_bench.rs b/raphtory-benchmark/benches/index_bench.rs index 77fc7fb676..e1534c4bc4 100644 --- a/raphtory-benchmark/benches/index_bench.rs +++ b/raphtory-benchmark/benches/index_bench.rs @@ -27,7 +27,6 @@ fn bench_graph_index_load(c: &mut Criterion) { let mut group = c.benchmark_group("graph_index_load"); group.sample_size(100); - group.bench_function(BenchmarkId::from_parameter("load_once"), |b| { b.iter(|| Graph::decode(black_box(&path)).unwrap()); }); diff --git a/raphtory-benchmark/benches/search_bench.rs b/raphtory-benchmark/benches/search_bench.rs index 4212c07469..c877d43e06 100644 --- a/raphtory-benchmark/benches/search_bench.rs +++ b/raphtory-benchmark/benches/search_bench.rs @@ -1,9 +1,6 @@ use criterion::{criterion_group, criterion_main, BatchSize, Criterion}; use once_cell::sync::Lazy; -use rand::{ - seq::{IteratorRandom, SliceRandom}, - thread_rng, Rng, -}; +use rand::{prelude::IndexedRandom, rng, seq::IteratorRandom, Rng}; use raphtory::{ db::{ api::{ @@ -55,7 +52,7 @@ fn setup_graph() -> Arc { } fn get_random_node_names(graph: &Graph) -> Vec { - let mut rng = thread_rng(); + let mut rng = rng(); iter::repeat_with(move || graph.nodes().into_iter().choose(&mut rng)) .filter_map(|opt| opt.map(|n| n.name().to_string())) .take(100) @@ -63,7 +60,7 @@ fn get_random_node_names(graph: &Graph) -> Vec { } fn get_random_edges_by_src_dst_names(graph: &Graph) -> Vec<(String, String)> { - let mut rng = thread_rng(); + let mut rng = rng(); iter::repeat_with(move || graph.edges().into_iter().choose(&mut rng)) .filter_map(|opt| opt.map(|e| (e.src().name().to_string(), e.dst().name().to_string()))) .take(100) @@ -202,15 +199,16 @@ where ::PropertyBuilder: PropertyFilterOps + InternalPropertyFilterBuilder>, { - let mut rng = thread_rng(); + let mut rng = rng(); match prop_value.dtype() { PropType::Str => { if let Some(full_str) = prop_value.into_str() { let tokens: Vec<&str> = full_str.split_whitespace().collect(); - if tokens.len() > 1 && rng.gen_bool(0.3) { - let start = rng.gen_range(0..tokens.len()); - let end = rng.gen_range(start..tokens.len()); + if tokens.len() > 1 && rng.random_bool(0.3) { + // 30% chance to use a random substring + let start = rng.random_range(0..tokens.len()); + let end = rng.random_range(start..tokens.len()); let sub_str = tokens[start..=end].join(" "); match filter_op { @@ -290,7 +288,7 @@ where // Get list of properties from multiple random nodes for IN, NOT_IN filters fn get_node_property_samples(graph: &Graph, prop_id: &usize, is_const: bool) -> Vec { - let mut rng = thread_rng(); + let mut rng = rng(); let node_names = get_random_node_names(graph); let mut samples = Vec::new(); @@ -306,7 +304,7 @@ fn get_node_property_samples(graph: &Graph, prop_id: &usize, is_const: bool) -> samples.push(prop_value); } - if samples.len() >= rng.gen_range(3..=5) { + if samples.len() >= rng.random_range(3..=5) { break; } } @@ -323,7 +321,7 @@ fn pick_node_property_filter( is_const: bool, filter_op: FilterOperator, ) -> Option> { - let mut rng = thread_rng(); + let mut rng = rng(); if let Some((prop_name, prop_id)) = props.choose(&mut rng) { let prop_value = if is_const { node.get_metadata(*prop_id) @@ -346,7 +344,7 @@ fn get_random_node_property_filters( graph: &Graph, filter_op: FilterOperator, ) -> Vec> { - let mut rng = thread_rng(); + let mut rng = rng(); let node_names = get_random_node_names(graph); let mut filters = Vec::new(); @@ -371,7 +369,7 @@ fn get_random_node_property_filters( // Fallback to other property list if one is empty i.e., if const properties are empty // fallback to temporal properties and vice versa. This ensures, we always have as many // property filters as there are nodes. - let choice = rng.gen_bool(0.5); + let choice = rng.random_bool(0.5); if choice { chosen_filter = pick_node_property_filter(graph, &node, &metadata, true, filter_op); if chosen_filter.is_none() { @@ -399,7 +397,7 @@ fn get_random_node_property_filters( // Get list of properties from multiple random edges for IN, NOT_IN filters fn get_edge_property_samples(graph: &Graph, prop_id: &usize, is_const: bool) -> Vec { - let mut rng = thread_rng(); + let mut rng = rng(); let edges = get_random_edges_by_src_dst_names(graph); let mut samples = Vec::new(); @@ -415,7 +413,7 @@ fn get_edge_property_samples(graph: &Graph, prop_id: &usize, is_const: bool) -> samples.push(prop_value); } - if samples.len() >= rng.gen_range(3..=5) { + if samples.len() >= rng.random_range(3..=5) { break; } } @@ -432,7 +430,7 @@ fn pick_edge_property_filter( is_const: bool, filter_op: FilterOperator, ) -> Option> { - let mut rng = thread_rng(); + let mut rng = rng(); if let Some((prop_name, prop_id)) = props.choose(&mut rng) { let prop_value = if is_const { @@ -456,7 +454,7 @@ fn get_random_edge_property_filters( graph: &Graph, filter_op: FilterOperator, ) -> Vec> { - let mut rng = thread_rng(); + let mut rng = rng(); let edges = get_random_edges_by_src_dst_names(graph); let mut filters = Vec::new(); @@ -481,7 +479,7 @@ fn get_random_edge_property_filters( // Fallback to other property list if one is empty i.e., if const properties are empty // fallback to temporal properties and vice versa. This ensures, we always have as many // property filters as there are edges. - let choice = rng.gen_bool(0.5); + let choice = rng.random_bool(0.5); if choice { chosen_filter = pick_edge_property_filter(graph, &edge, &metadata, true, filter_op); if chosen_filter.is_none() { @@ -683,7 +681,7 @@ fn bench_search_nodes_by_name(c: &mut Criterion) { fn bench_search_nodes_by_node_type(c: &mut Criterion) { let graph = setup_graph(); - let mut rng = thread_rng(); + let mut rng = rng(); let node_types = get_node_types(&graph); let sample_inputs: Vec<_> = (0..100) .map(|_| node_types.choose(&mut rng).unwrap().clone()) @@ -730,7 +728,7 @@ fn bench_search_nodes_by_composite_property_filter_and(c: &mut Criterion) { let graph = setup_graph(); let binding = get_random_node_property_filters(&graph, Eq); let property_filters = binding.iter().cloned(); - let mut rng = thread_rng(); + let mut rng = rng(); c.bench_function("bench_search_nodes_by_composite_property_filter_and", |b| { b.iter_batched( @@ -751,7 +749,7 @@ fn bench_search_nodes_by_composite_property_filter_or(c: &mut Criterion) { let graph = setup_graph(); let binding = get_random_node_property_filters(&graph, Eq); let property_filters = binding.iter().cloned(); - let mut rng = thread_rng(); + let mut rng = rng(); c.bench_function("bench_search_nodes_by_composite_property_filter_or", |b| { b.iter_batched( @@ -822,7 +820,7 @@ fn bench_search_edges_by_composite_property_filter_and(c: &mut Criterion) { let graph = setup_graph(); let binding = get_random_edge_property_filters(&graph, Eq); let property_filters = binding.iter().cloned(); - let mut rng = thread_rng(); + let mut rng = rng(); c.bench_function("bench_search_edges_by_composite_property_filter_and", |b| { b.iter_batched( @@ -843,7 +841,7 @@ fn bench_search_edges_by_composite_property_filter_or(c: &mut Criterion) { let graph = setup_graph(); let binding = get_random_edge_property_filters(&graph, Eq); let property_filters = binding.iter().cloned(); - let mut rng = thread_rng(); + let mut rng = rng(); c.bench_function("bench_search_edges_by_composite_property_filter_or", |b| { b.iter_batched( diff --git a/raphtory-benchmark/benches/tgraph_benchmarks.rs b/raphtory-benchmark/benches/tgraph_benchmarks.rs index 2595e88443..b2af702edc 100644 --- a/raphtory-benchmark/benches/tgraph_benchmarks.rs +++ b/raphtory-benchmark/benches/tgraph_benchmarks.rs @@ -1,5 +1,5 @@ use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput}; -use rand::{distributions::Uniform, Rng}; +use rand::{distr::Uniform, Rng}; use raphtory::core::entities::nodes::structure::adjset::AdjSet; use sorted_vector_map::SortedVectorSet; use std::collections::BTreeSet; @@ -9,8 +9,8 @@ fn btree_set_u64(c: &mut Criterion) { for size in [10, 100, 300, 500, 1000].iter() { group.throughput(Throughput::Elements(*size as u64)); - let mut rng = rand::thread_rng(); - let range = Uniform::new(u64::MIN, u64::MAX); + let mut rng = rand::rng(); + let range = Uniform::new(u64::MIN, u64::MAX).unwrap(); let init_vals: Vec = (&mut rng).sample_iter(&range).take(*size).collect(); group.bench_with_input( @@ -49,8 +49,9 @@ fn bm_tadjset(c: &mut Criterion) { for size in [10, 100, 1000, 10_000, 100_000, 1_000_000].iter() { group.throughput(Throughput::Elements(*size as u64)); - let mut rng = rand::thread_rng(); - let range = Uniform::new(0, size * 10); + let mut rng = rand::rng(); + let range = Uniform::new(0, size * 10).unwrap(); + let init_srcs: Vec = (&mut rng) .sample_iter(&range) .take(*size as usize) @@ -59,7 +60,7 @@ fn bm_tadjset(c: &mut Criterion) { .sample_iter(&range) .take(*size as usize) .collect(); - let t_range = Uniform::new(1646838523i64, 1678374523); + let t_range = Uniform::new(1646838523i64, 1678374523).unwrap(); let init_time: Vec = (&mut rng) .sample_iter(&t_range) .take(*size as usize) diff --git a/raphtory-benchmark/src/common/mod.rs b/raphtory-benchmark/src/common/mod.rs index 539135265a..2745efabbb 100644 --- a/raphtory-benchmark/src/common/mod.rs +++ b/raphtory-benchmark/src/common/mod.rs @@ -5,22 +5,22 @@ pub mod vectors; use criterion::{ black_box, measurement::WallTime, BatchSize, Bencher, BenchmarkGroup, BenchmarkId, Criterion, }; -use rand::{distributions::Uniform, seq::*, Rng, SeedableRng}; +use rand::{distr::Uniform, seq::*, Rng, SeedableRng}; use raphtory::{db::api::view::StaticGraphViewOps, prelude::*}; use raphtory_api::core::{storage::timeindex::AsTime, utils::logging::global_info_logger}; -use std::collections::HashSet; +use std::{collections::HashSet, iter}; use tempfile::TempDir; use tracing::info; fn make_index_gen() -> Box> { - let rng = rand::thread_rng(); - let range = Uniform::new(u64::MIN, u64::MAX); + let rng = rand::rng(); + let range = Uniform::new(u64::MIN, u64::MAX).unwrap(); Box::new(rng.sample_iter(range)) } fn make_time_gen() -> Box> { - let rng = rand::thread_rng(); - let range = Uniform::new(i64::MIN, i64::MAX); + let rng = rand::rng(); + let range = Uniform::new(i64::MIN, i64::MAX).unwrap(); Box::new(rng.sample_iter(range)) } @@ -77,7 +77,7 @@ pub fn run_ingestion_benchmarks( |b: &mut Bencher| { b.iter_batched_ref( || (make_graph(), time_sample()), - |(g, t): &mut (Graph, i64)| g.add_node(*t, 0, NO_PROPS, None), + |(g, t): &mut (Graph, i64)| g.add_node(*t, 0, NO_PROPS, None, None), BatchSize::SmallInput, ) }, @@ -89,7 +89,7 @@ pub fn run_ingestion_benchmarks( |b: &mut Bencher| { b.iter_batched_ref( || (make_graph(), index_sample()), - |(g, v): &mut (Graph, u64)| g.add_node(0, *v, NO_PROPS, None), + |(g, v): &mut (Graph, u64)| g.add_node(0, *v, NO_PROPS, None, None), BatchSize::SmallInput, ) }, @@ -300,7 +300,7 @@ pub fn run_analysis_benchmarks( }); bench(group, "has_edge_existing", parameter, |b: &mut Bencher| { - let mut rng = rand::thread_rng(); + let mut rng = rand::rng(); let (src, dst) = edges .iter() .choose(&mut rng) @@ -313,7 +313,7 @@ pub fn run_analysis_benchmarks( "has_edge_nonexisting", parameter, |b: &mut Bencher| { - let mut rng = rand::thread_rng(); + let mut rng = rand::rng(); let edge = loop { let edge: (&GID, &GID) = ( nodes.iter().choose(&mut rng).expect("has_edge_nonexisting: non-empty graph (graph().nodes().id().iter() is empty)"), @@ -328,7 +328,7 @@ pub fn run_analysis_benchmarks( ); bench(group, "active edge", parameter, |b: &mut Bencher| { - let mut rng = rand::thread_rng(); + let mut rng = rand::rng(); let (edge, active_t) = edges_t .choose(&mut rng) .and_then(|(src, dst, t)| graph.edge(src, dst).map(|e| (e, t.t()))) @@ -344,7 +344,7 @@ pub fn run_analysis_benchmarks( }); bench(group, "edge has layer", parameter, |b: &mut Bencher| { - let mut rng = rand::thread_rng(); + let mut rng = rand::rng(); let edge = edges .iter() .choose(&mut rng) @@ -364,7 +364,7 @@ pub fn run_analysis_benchmarks( }); bench(group, "has_node_existing", parameter, |b: &mut Bencher| { - let mut rng = rand::thread_rng(); + let mut rng = rand::rng(); let v = nodes .iter() .choose(&mut rng) @@ -377,9 +377,9 @@ pub fn run_analysis_benchmarks( "has_node_nonexisting", parameter, |b: &mut Bencher| { - let mut rng = rand::thread_rng(); + let mut rng = rand::rng(); let v: u64 = loop { - let v: u64 = rng.gen(); + let v: u64 = rng.random(); if !nodes.contains(&GID::U64(v)) { break v; } @@ -513,12 +513,12 @@ pub fn run_graph_ops_benches( // subgraph let mut rng = rand::rngs::StdRng::seed_from_u64(73); - let nodes = graph + let nodes = (&&graph) .nodes() .into_iter() - .choose_multiple(&mut rng, graph.count_nodes() / 10) + .choose_multiple(&mut rng, 1.max(graph.count_nodes() / 10)) .into_iter() - .map(|n| n.id()) + .flat_map(|n| iter::once(n.id()).chain(n.out_neighbours().id().next())) // at least one edge per node .collect::>(); let subgraph = graph.subgraph(nodes); let group_name = format!("{graph_name}_subgraph_10pc"); diff --git a/raphtory-benchmark/src/common/vectors.rs b/raphtory-benchmark/src/common/vectors.rs index f699fec569..75ca800aba 100644 --- a/raphtory-benchmark/src/common/vectors.rs +++ b/raphtory-benchmark/src/common/vectors.rs @@ -17,7 +17,7 @@ pub fn gen_embedding_for_bench(text: &str) -> Embedding { let hash = hasher.finish(); let mut rng: StdRng = SeedableRng::seed_from_u64(hash); - (0..1536).map(|_| rng.gen()).collect() + (0..1536).map(|_| rng.random()).collect() } async fn embedding_model(texts: Vec) -> EmbeddingResult> { @@ -30,7 +30,7 @@ async fn embedding_model(texts: Vec) -> EmbeddingResult> pub fn create_graph_for_vector_bench(size: usize) -> Graph { let graph = Graph::new(); for id in 0..size { - graph.add_node(0, id as u64, NO_PROPS, None).unwrap(); + graph.add_node(0, id as u64, NO_PROPS, None, None).unwrap(); } graph } diff --git a/raphtory-benchmark/src/graph_gen/raph_social.rs b/raphtory-benchmark/src/graph_gen/raph_social.rs index cd75f1277e..7cb4693ce5 100644 --- a/raphtory-benchmark/src/graph_gen/raph_social.rs +++ b/raphtory-benchmark/src/graph_gen/raph_social.rs @@ -15,7 +15,7 @@ use fake::{ }, Fake, }; -use rand::{prelude::SliceRandom, thread_rng, Rng}; +use rand::{rng, seq::IndexedRandom, Rng}; use raphtory::prelude::*; use serde::{de::DeserializeOwned, Deserialize, Serialize}; use std::{collections::HashMap, error::Error, fmt::Debug}; @@ -89,7 +89,7 @@ pub struct CommentPost { } fn gen_timestamp(rng: &mut impl Rng) -> i64 { - rng.gen_range(946684800000..1609459200000) // Random timestamp from 2000 to 2020 + rng.random_range(946684800000..1609459200000) // Random timestamp from 2000 to 2020 } pub fn generate_data_write_to_csv( @@ -101,7 +101,7 @@ pub fn generate_data_write_to_csv( ) -> Result<(), Box> { fs::create_dir_all(output_dir)?; - let mut rng = thread_rng(); + let mut rng = rng(); // Create writers for each file let mut people_writer = Writer::from_path(format!("{}/people.csv", output_dir))?; @@ -118,7 +118,7 @@ pub fn generate_data_write_to_csv( id: format!("person_{}", i), first_name: FirstName().fake(), last_name: LastName().fake(), - gender: if rng.gen_bool(0.5) { + gender: if rng.random_bool(0.5) { "male".to_string() } else { "female".to_string() @@ -141,14 +141,14 @@ pub fn generate_data_write_to_csv( // Person-Forum Relationships for i in 1..=num_people { - let membership_count = rng.gen_range(1..=3); + let membership_count = rng.random_range(1..=3); for _ in 0..membership_count { person_forum_writer.serialize(PersonForum { person_id: format!("person_{}", i), - forum_id: format!("forum_{}", rng.gen_range(1..=num_forums)), - is_moderator: rng.gen_bool(0.1), + forum_id: format!("forum_{}", rng.random_range(1..=num_forums)), + is_moderator: rng.random_bool(0.1), join_date: gen_timestamp(&mut rng), - activity_score: rng.gen_range(0.0..100.0), + activity_score: rng.random_range(0.0..100.0), })?; } } @@ -159,7 +159,7 @@ pub fn generate_data_write_to_csv( let creation_date = gen_timestamp(&mut rng); posts_writer.serialize(Post { id: format!("post_{}", i), - creator_id: format!("person_{}", rng.gen_range(1..=num_people)), + creator_id: format!("person_{}", rng.random_range(1..=num_people)), creation_date, location_ip: IP().fake(), browser_used: ["Chrome", "Firefox", "Safari", "Edge"] @@ -167,15 +167,15 @@ pub fn generate_data_write_to_csv( .unwrap() .to_string(), content: Sentence(5..15).fake(), - length: rng.gen_range(20..200), + length: rng.random_range(20..200), })?; post_forum_writer.serialize(PostForum { post_id: format!("post_{}", i), - forum_id: format!("forum_{}", rng.gen_range(1..=num_forums)), + forum_id: format!("forum_{}", rng.random_range(1..=num_forums)), creation_date, // Use post's creation date - is_featured: rng.gen_bool(0.2), - likes_count: rng.gen_range(0..500), - comments_count: rng.gen_range(0..200), + is_featured: rng.random_bool(0.2), + likes_count: rng.random_range(0..500), + comments_count: rng.random_range(0..200), })?; } posts_writer.flush()?; @@ -186,7 +186,7 @@ pub fn generate_data_write_to_csv( let creation_date = gen_timestamp(&mut rng); comments_writer.serialize(Comment { id: format!("comment_{}", i), - creator_id: format!("person_{}", rng.gen_range(1..=num_people)), + creator_id: format!("person_{}", rng.random_range(1..=num_people)), creation_date, location_ip: IP().fake(), browser_used: ["Chrome", "Firefox", "Safari", "Edge"] @@ -194,15 +194,15 @@ pub fn generate_data_write_to_csv( .unwrap() .to_string(), content: Sentence(5..15).fake(), - length: rng.gen_range(50..500), + length: rng.random_range(50..500), })?; comment_post_writer.serialize(CommentPost { comment_id: format!("comment_{}", i), - post_id: format!("post_{}", rng.gen_range(1..=num_posts)), + post_id: format!("post_{}", rng.random_range(1..=num_posts)), creation_date, // Use comment's creation date - is_edited: rng.gen_bool(0.1), - upvotes: rng.gen_range(0..200), - reply_count: rng.gen_range(0..20), + is_edited: rng.random_bool(0.1), + upvotes: rng.random_range(0..200), + reply_count: rng.random_range(0..20), })?; } comments_writer.flush()?; @@ -278,6 +278,7 @@ pub fn load_graph_save(data_dir: &str, output_dir: &str) -> Result Result Result Result Graph { - let mut rng = thread_rng(); + let mut rng = rng(); let graph = Graph::new(); // People @@ -408,6 +412,7 @@ pub fn generate_graph( person_id.clone(), NO_PROPS, Some("person"), + None, ) .expect("Failed to add person node") .add_metadata([ @@ -421,7 +426,7 @@ pub fn generate_graph( ), ( "gender", - Prop::Str(ArcStr::from(if rng.gen_bool(0.5) { + Prop::Str(ArcStr::from(if rng.random_bool(0.5) { "male" } else { "female" @@ -443,6 +448,7 @@ pub fn generate_graph( forum_id.clone(), NO_PROPS, Some("forum"), + None, ) .expect("Failed to add forum node") .add_metadata([( @@ -455,17 +461,17 @@ pub fn generate_graph( // Person Forum for i in 1..=num_people { let person_id = format!("person_{}", i); - let membership_count = rng.gen_range(1..=3); + let membership_count = rng.random_range(1..=3); for _ in 0..membership_count { - let forum_id = format!("forum_{}", rng.gen_range(1..=num_forums)); + let forum_id = format!("forum_{}", rng.random_range(1..=num_forums)); graph .add_edge( DateTime::from_timestamp(gen_timestamp(&mut rng), 0).unwrap(), person_id.clone(), forum_id.clone(), [ - ("activity_score", Prop::F64(rng.gen_range(0.0..100.0))), - ("is_moderator", Prop::Bool(rng.gen_bool(0.1))), + ("activity_score", Prop::F64(rng.random_range(0.0..100.0))), + ("is_moderator", Prop::Bool(rng.random_bool(0.1))), ], None, ) @@ -476,7 +482,7 @@ pub fn generate_graph( // Posts, Post Forum for i in 1..=num_posts { let post_id = format!("post_{}", i); - let creator_id = format!("person_{}", rng.gen_range(1..=num_people)); + let creator_id = format!("person_{}", rng.random_range(1..=num_people)); let creation_date = gen_timestamp(&mut rng); graph @@ -488,7 +494,7 @@ pub fn generate_graph( "content", Prop::Str(ArcStr::from(Sentence(5..15).fake::())), ), - ("length", Prop::U64(rng.gen_range(20..200))), + ("length", Prop::U64(rng.random_range(20..200))), ( "location_ip", Prop::Str(ArcStr::from(IP().fake::())), @@ -504,21 +510,22 @@ pub fn generate_graph( ), ], Some("post"), + None, ) .expect("Failed to add post node") .add_metadata([("creator_id", Prop::Str(ArcStr::from(creator_id.clone())))]) .expect("Failed to add post properties"); - let forum_id = format!("forum_{}", rng.gen_range(1..=num_forums)); + let forum_id = format!("forum_{}", rng.random_range(1..=num_forums)); graph .add_edge( DateTime::from_timestamp(creation_date, 0).unwrap(), post_id.clone(), forum_id.clone(), [ - ("is_featured", Prop::Bool(rng.gen_bool(0.2))), - ("likes_count", Prop::U64(rng.gen_range(0..500))), - ("comments_count", Prop::U64(rng.gen_range(0..200))), + ("is_featured", Prop::Bool(rng.random_bool(0.2))), + ("likes_count", Prop::U64(rng.random_range(0..500))), + ("comments_count", Prop::U64(rng.random_range(0..200))), ], None, ) @@ -528,7 +535,7 @@ pub fn generate_graph( // Comments, Comment Forum for i in 1..=num_comments { let comment_id = format!("comment_{}", i); - let creator_id = format!("person_{}", rng.gen_range(1..=num_people)); + let creator_id = format!("person_{}", rng.random_range(1..=num_people)); let creation_date = gen_timestamp(&mut rng); graph @@ -540,7 +547,7 @@ pub fn generate_graph( "content", Prop::Str(ArcStr::from(Sentence(5..15).fake::())), ), - ("length", Prop::U64(rng.gen_range(50..500))), + ("length", Prop::U64(rng.random_range(50..500))), ( "location_ip", Prop::Str(ArcStr::from(IP().fake::())), @@ -556,21 +563,22 @@ pub fn generate_graph( ), ], Some("comment"), + None, ) .expect("Failed to add comment node") .add_metadata([("creator_id", Prop::Str(ArcStr::from(creator_id.clone())))]) .expect("Failed to add comment properties"); - let post_id = format!("post_{}", rng.gen_range(1..=num_posts)); + let post_id = format!("post_{}", rng.random_range(1..=num_posts)); graph .add_edge( DateTime::from_timestamp(creation_date, 0).unwrap(), comment_id.clone(), post_id.clone(), [ - ("is_edited", Prop::Bool(rng.gen_bool(0.1))), - ("upvotes", Prop::U64(rng.gen_range(0..200))), - ("reply_count", Prop::U64(rng.gen_range(0..20))), + ("is_edited", Prop::Bool(rng.random_bool(0.1))), + ("upvotes", Prop::U64(rng.random_range(0..200))), + ("reply_count", Prop::U64(rng.random_range(0..20))), ], None, ) diff --git a/raphtory-core/Cargo.toml b/raphtory-core/Cargo.toml index 2b888a58c8..7e63a1bae3 100644 --- a/raphtory-core/Cargo.toml +++ b/raphtory-core/Cargo.toml @@ -14,7 +14,8 @@ edition.workspace = true [dependencies] raphtory-api = { workspace = true } -dashmap = { workspace = true } +dashmap = { workspace = true, features = ["raw-api"] } +hashbrown = { workspace = true } either = { workspace = true } serde = { workspace = true, features = ["derive"] } rustc-hash = { workspace = true } @@ -28,6 +29,10 @@ parking_lot = { workspace = true } itertools = { workspace = true } once_cell = { workspace = true } ouroboros = { workspace = true } +arrow-array = { workspace = true } +arrow-buffer = { workspace = true } +arrow-data = { workspace = true } +arrow-schema = { workspace = true } regex = { workspace = true } pyo3 = { workspace = true, optional = true } @@ -35,5 +40,4 @@ pyo3 = { workspace = true, optional = true } proptest = { workspace = true } [features] -arrow = ["raphtory-api/arrow"] python = ["dep:pyo3", "raphtory-api/python"] diff --git a/raphtory-core/src/entities/edges/edge_store.rs b/raphtory-core/src/entities/edges/edge_store.rs deleted file mode 100644 index d05a920f47..0000000000 --- a/raphtory-core/src/entities/edges/edge_store.rs +++ /dev/null @@ -1,178 +0,0 @@ -use crate::{ - entities::{ - properties::props::{MetadataError, Props, TPropError}, - EID, VID, - }, - storage::{ - raw_edges::EdgeShard, - timeindex::{EventTime, TimeIndex}, - }, - utils::iter::GenLockedIter, -}; -use itertools::Itertools; -use raphtory_api::core::entities::{edges::edge_ref::EdgeRef, properties::prop::Prop}; -use serde::{Deserialize, Serialize}; -use std::{ - fmt::{Debug, Formatter}, - ops::Deref, -}; - -#[derive(Clone, Serialize, Deserialize, Debug, Default, PartialEq)] -pub struct EdgeStore { - pub eid: EID, - pub src: VID, - pub dst: VID, -} - -pub trait EdgeDataLike<'a> { - fn temporal_prop_ids(self) -> impl Iterator + 'a; - fn metadata_ids(self) -> impl Iterator + 'a; -} - -impl<'a, T: Deref + 'a> EdgeDataLike<'a> for T { - fn temporal_prop_ids(self) -> impl Iterator + 'a { - GenLockedIter::from(self, |layer| { - Box::new( - layer - .props() - .into_iter() - .flat_map(|props| props.temporal_prop_ids()), - ) - }) - } - - fn metadata_ids(self) -> impl Iterator + 'a { - GenLockedIter::from(self, |layer| { - Box::new( - layer - .props() - .into_iter() - .flat_map(|props| props.metadata_ids()), - ) - }) - } -} - -#[derive(Serialize, Deserialize, Debug, Default, PartialEq)] -pub struct EdgeLayer { - props: Option, // memory optimisation: only allocate props if needed -} - -impl EdgeLayer { - pub fn props(&self) -> Option<&Props> { - self.props.as_ref() - } - - pub fn into_props(self) -> Option { - self.props - } - - pub fn add_prop(&mut self, t: EventTime, prop_id: usize, prop: Prop) -> Result<(), TPropError> { - let props = self.props.get_or_insert_with(Props::new); - props.add_prop(t, prop_id, prop) - } - - pub fn add_metadata(&mut self, prop_id: usize, prop: Prop) -> Result<(), MetadataError> { - let props = self.props.get_or_insert_with(Props::new); - props.add_metadata(prop_id, prop) - } - - pub fn update_metadata(&mut self, prop_id: usize, prop: Prop) -> Result<(), MetadataError> { - let props = self.props.get_or_insert_with(Props::new); - props.update_metadata(prop_id, prop) - } -} - -impl EdgeStore { - pub fn new(src: VID, dst: VID) -> Self { - Self { - eid: 0.into(), - src, - dst, - } - } - - pub fn initialised(&self) -> bool { - self.eid != EID::default() - } - - pub fn as_edge_ref(&self) -> EdgeRef { - EdgeRef::new_outgoing(self.eid, self.src, self.dst) - } -} - -#[derive(Clone, Copy)] -pub struct MemEdge<'a> { - edges: &'a EdgeShard, - offset: usize, -} - -impl<'a> Debug for MemEdge<'a> { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - f.debug_struct("Edge") - .field("src", &self.src()) - .field("dst", &self.dst()) - .field("eid", &self.eid()) - .field( - "props", - &(0..self.internal_num_layers()) - .map(|i| (i, self.props(i))) - .collect_vec(), - ) - .finish() - } -} - -impl<'a> MemEdge<'a> { - pub fn new(edges: &'a EdgeShard, offset: usize) -> Self { - MemEdge { edges, offset } - } - - pub fn src(&self) -> VID { - self.edge_store().src - } - - pub fn dst(&self) -> VID { - self.edge_store().dst - } - pub fn edge_store(&self) -> &'a EdgeStore { - self.edges.edge_store(self.offset) - } - - #[inline] - pub fn props(self, layer_id: usize) -> Option<&'a Props> { - self.edges - .props(self.offset, layer_id) - .and_then(|el| el.props()) - } - - pub fn eid(self) -> EID { - self.edge_store().eid - } - - pub fn as_edge_ref(&self) -> EdgeRef { - EdgeRef::new_outgoing(self.eid(), self.src(), self.dst()) - } - - pub fn internal_num_layers(self) -> usize { - self.edges.internal_num_layers() - } - - pub fn get_additions(self, layer_id: usize) -> Option<&'a TimeIndex> { - self.edges.additions(self.offset, layer_id) - } - - pub fn get_deletions(self, layer_id: usize) -> Option<&'a TimeIndex> { - self.edges.deletions(self.offset, layer_id) - } - - pub fn has_layer_inner(self, layer_id: usize) -> bool { - self.get_additions(layer_id) - .filter(|t_index| !t_index.is_empty()) - .is_some() - || self - .get_deletions(layer_id) - .filter(|t_index| !t_index.is_empty()) - .is_some() - } -} diff --git a/raphtory-core/src/entities/edges/mod.rs b/raphtory-core/src/entities/edges/mod.rs deleted file mode 100644 index d1f7224234..0000000000 --- a/raphtory-core/src/entities/edges/mod.rs +++ /dev/null @@ -1,3 +0,0 @@ -pub mod edge_store; - -pub use raphtory_api::core::entities::edges::*; diff --git a/raphtory-core/src/entities/graph/logical_to_physical.rs b/raphtory-core/src/entities/graph/logical_to_physical.rs index cf4d1afe93..8b13789179 100644 --- a/raphtory-core/src/entities/graph/logical_to_physical.rs +++ b/raphtory-core/src/entities/graph/logical_to_physical.rs @@ -1,232 +1 @@ -use crate::{ - entities::nodes::node_store::NodeStore, - storage::{NodeSlot, UninitialisedEntry}, -}; -use dashmap::mapref::entry::Entry; -use either::Either; -use once_cell::sync::OnceCell; -use raphtory_api::core::{ - entities::{GidRef, GidType, VID}, - storage::{dict_mapper::MaybeNew, FxDashMap}, -}; -use serde::{Deserialize, Deserializer, Serialize}; -use std::hash::Hash; -use thiserror::Error; -#[derive(Debug, Deserialize, Serialize)] -enum Map { - U64(FxDashMap), - Str(FxDashMap), -} - -#[derive(Error, Debug)] -pub enum InvalidNodeId { - #[error("Node id {0} does not have the correct type, expected String")] - InvalidNodeIdU64(u64), - #[error("Node id {0} does not have the correct type, expected Numeric")] - InvalidNodeIdStr(String), -} - -impl Map { - fn as_u64(&self) -> Option<&FxDashMap> { - match self { - Map::U64(map) => Some(map), - _ => None, - } - } - - fn as_str(&self) -> Option<&FxDashMap> { - match self { - Map::Str(map) => Some(map), - _ => None, - } - } -} - -impl Default for Map { - fn default() -> Self { - Map::U64(FxDashMap::default()) - } -} - -#[derive(Debug, Default)] -pub struct Mapping { - map: OnceCell, -} - -impl Mapping { - pub fn dtype(&self) -> Option { - self.map.get().map(|map| match map { - Map::U64(_) => GidType::U64, - Map::Str(_) => GidType::Str, - }) - } - pub fn new() -> Self { - Mapping { - map: OnceCell::new(), - } - } - - pub fn set(&self, gid: GidRef, vid: VID) -> Result<(), InvalidNodeId> { - let map = self.map.get_or_init(|| match gid { - GidRef::U64(_) => Map::U64(FxDashMap::default()), - GidRef::Str(_) => Map::Str(FxDashMap::default()), - }); - match gid { - GidRef::U64(id) => { - map.as_u64() - .ok_or(InvalidNodeId::InvalidNodeIdU64(id))? - .insert(id, vid); - } - GidRef::Str(id) => { - let id = id.to_owned(); - match map.as_str() { - None => return Err(InvalidNodeId::InvalidNodeIdStr(id)), - Some(map) => { - map.insert(id, vid); - } - } - } - } - Ok(()) - } - - pub fn get_or_init( - &self, - gid: GidRef, - next_id: impl FnOnce() -> VID, - ) -> Result, InvalidNodeId> { - let map = self.map.get_or_init(|| match &gid { - GidRef::U64(_) => Map::U64(FxDashMap::default()), - GidRef::Str(_) => Map::Str(FxDashMap::default()), - }); - let vid = match gid { - GidRef::U64(id) => { - let map = map.as_u64().ok_or(InvalidNodeId::InvalidNodeIdU64(id))?; - match map.entry(id) { - Entry::Occupied(id) => MaybeNew::Existing(*id.get()), - Entry::Vacant(entry) => { - let vid = next_id(); - entry.insert(vid); - MaybeNew::New(vid) - } - } - } - GidRef::Str(id) => { - let map = map - .as_str() - .ok_or_else(|| InvalidNodeId::InvalidNodeIdStr(id.into()))?; - map.get(id) - .map(|vid| MaybeNew::Existing(*vid)) - .unwrap_or_else(|| match map.entry(id.to_owned()) { - Entry::Occupied(entry) => MaybeNew::Existing(*entry.get()), - Entry::Vacant(entry) => { - let vid = next_id(); - entry.insert(vid); - MaybeNew::New(vid) - } - }) - } - }; - Ok(vid) - } - - pub fn get_or_init_node<'a>( - &self, - gid: GidRef, - f_init: impl FnOnce() -> UninitialisedEntry<'a, NodeStore, NodeSlot>, - ) -> Result, InvalidNodeId> { - let map = self.map.get_or_init(|| match &gid { - GidRef::U64(_) => Map::U64(FxDashMap::default()), - GidRef::Str(_) => Map::Str(FxDashMap::default()), - }); - match gid { - GidRef::U64(id) => map - .as_u64() - .map(|m| get_or_new(m, id, f_init)) - .ok_or(InvalidNodeId::InvalidNodeIdU64(id)), - GidRef::Str(id) => map - .as_str() - .map(|m| optim_get_or_insert(m, id, f_init)) - .ok_or_else(|| InvalidNodeId::InvalidNodeIdStr(id.into())), - } - } - - #[inline] - pub fn get_str(&self, gid: &str) -> Option { - let map = self.map.get()?; - map.as_str().and_then(|m| m.get(gid).map(|id| *id)) - } - - #[inline] - pub fn get_u64(&self, gid: u64) -> Option { - let map = self.map.get()?; - map.as_u64().and_then(|m| m.get(&gid).map(|id| *id)) - } -} - -#[inline] -fn optim_get_or_insert<'a>( - m: &FxDashMap, - id: &str, - f_init: impl FnOnce() -> UninitialisedEntry<'a, NodeStore, NodeSlot>, -) -> MaybeNew { - m.get(id) - .map(|vid| MaybeNew::Existing(*vid)) - .unwrap_or_else(|| get_or_new(m, id.to_owned(), f_init)) -} - -#[inline] -fn get_or_new<'a, K: Eq + Hash>( - m: &FxDashMap, - id: K, - f_init: impl FnOnce() -> UninitialisedEntry<'a, NodeStore, NodeSlot>, -) -> MaybeNew { - let entry = match m.entry(id) { - Entry::Occupied(entry) => Either::Left(*entry.get()), - Entry::Vacant(entry) => { - // This keeps the underlying storage shard locked for deferred initialisation but - // allows unlocking the map again. - let node = f_init(); - entry.insert(node.value().vid); - Either::Right(node) - } - }; - match entry { - Either::Left(vid) => MaybeNew::Existing(vid), - Either::Right(node_entry) => { - let vid = node_entry.value().vid; - node_entry.init(); - MaybeNew::New(vid) - } - } -} - -impl<'de> Deserialize<'de> for Mapping { - fn deserialize(deserializer: D) -> Result - where - D: Deserializer<'de>, - { - if let Some(map) = Option::::deserialize(deserializer)? { - let once = OnceCell::with_value(map); - Ok(Mapping { map: once }) - } else { - Ok(Mapping { - map: OnceCell::new(), - }) - } - } -} - -impl Serialize for Mapping { - fn serialize(&self, serializer: S) -> Result - where - S: serde::Serializer, - { - if let Some(map) = self.map.get() { - Some(map).serialize(serializer) - } else { - serializer.serialize_none() - } - } -} diff --git a/raphtory-core/src/entities/graph/mod.rs b/raphtory-core/src/entities/graph/mod.rs index fc072dffdb..e16922dcc9 100644 --- a/raphtory-core/src/entities/graph/mod.rs +++ b/raphtory-core/src/entities/graph/mod.rs @@ -1,4 +1,3 @@ pub mod logical_to_physical; pub mod tgraph; -pub mod tgraph_storage; pub mod timer; diff --git a/raphtory-core/src/entities/graph/tgraph.rs b/raphtory-core/src/entities/graph/tgraph.rs index 8822c164ca..1182d97800 100644 --- a/raphtory-core/src/entities/graph/tgraph.rs +++ b/raphtory-core/src/entities/graph/tgraph.rs @@ -1,58 +1,7 @@ -use super::logical_to_physical::{InvalidNodeId, Mapping}; -use crate::{ - entities::{ - edges::edge_store::EdgeStore, - graph::{ - tgraph_storage::GraphStorage, - timer::{MaxCounter, MinCounter, TimeCounterTrait}, - }, - nodes::{node_ref::NodeRef, node_store::NodeStore}, - properties::graph_meta::GraphMeta, - LayerIds, EID, VID, - }, - storage::{ - raw_edges::EdgeWGuard, - timeindex::{AsTime, EventTime}, - NodeEntry, PairEntryMut, - }, -}; -use dashmap::DashSet; -use either::Either; -use raphtory_api::core::{ - entities::{ - properties::{meta::Meta, prop::Prop}, - GidRef, Layer, Multiple, MAX_LAYER, - }, - input::input_node::InputNode, - storage::{arc_str::ArcStr, dict_mapper::MaybeNew}, - Direction, -}; -use rustc_hash::FxHasher; -use serde::{Deserialize, Serialize}; -use std::{fmt::Debug, hash::BuildHasherDefault, sync::atomic::AtomicUsize}; +use raphtory_api::core::{entities::MAX_LAYER, storage::arc_str::ArcStr}; +use std::fmt::Debug; use thiserror::Error; -pub(crate) type FxDashSet = DashSet>; - -#[derive(Serialize, Deserialize, Debug)] -pub struct TemporalGraph { - pub storage: GraphStorage, - // mapping between logical and physical ids - pub logical_to_physical: Mapping, - string_pool: FxDashSet, - pub event_counter: AtomicUsize, - //earliest time seen in this graph - pub earliest_time: MinCounter, - //latest time seen in this graph - pub latest_time: MaxCounter, - // props meta data for nodes (mapping between strings and ids) - pub node_meta: Meta, - // props meta data for edges (mapping between strings and ids) - pub edge_meta: Meta, - // graph properties - pub graph_meta: GraphMeta, -} - #[derive(Error, Debug)] #[error("Invalid layer: {invalid_layer}. Valid layers: {valid_layers:?}")] pub struct InvalidLayer { @@ -72,278 +21,3 @@ impl InvalidLayer { } } } - -impl std::fmt::Display for TemporalGraph { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!( - f, - "Graph(num_nodes={}, num_edges={})", - self.storage.nodes_len(), - self.storage.edges_len() - ) - } -} - -impl Default for TemporalGraph { - fn default() -> Self { - Self::new(rayon::current_num_threads()) - } -} - -impl TemporalGraph { - pub fn new(num_locks: usize) -> Self { - TemporalGraph { - logical_to_physical: Mapping::new(), - string_pool: Default::default(), - storage: GraphStorage::new(num_locks), - event_counter: AtomicUsize::new(0), - earliest_time: MinCounter::new(), - latest_time: MaxCounter::new(), - node_meta: Meta::new(), - edge_meta: Meta::new(), - graph_meta: GraphMeta::new(), - } - } - - pub fn process_prop_value(&self, prop: &Prop) -> Prop { - match prop { - Prop::Str(value) => Prop::Str(self.resolve_str(value)), - _ => prop.clone(), - } - } - - fn get_valid_layers(edge_meta: &Meta) -> Vec { - edge_meta - .layer_meta() - .get_keys() - .iter() - .map(|x| x.to_string()) - .collect::>() - } - - pub fn num_layers(&self) -> usize { - self.edge_meta.layer_meta().len() - } - - pub fn resolve_node_inner(&self, id: NodeRef) -> Result, InvalidNodeId> { - match id { - NodeRef::External(id) => self.logical_to_physical.get_or_init_node(id, || { - let node_store = NodeStore::empty(id.into()); - self.storage.push_node(node_store) - }), - NodeRef::Internal(id) => Ok(MaybeNew::Existing(id)), - } - } - - /// map layer name to id and allocate a new layer if needed - pub fn resolve_layer_inner( - &self, - layer: Option<&str>, - ) -> Result, TooManyLayers> { - let id = self.edge_meta.get_or_create_layer_id(layer); - if let MaybeNew::New(id) = id { - if id > MAX_LAYER { - Err(TooManyLayers)?; - } - } - Ok(id) - } - - pub fn layer_ids(&self, key: Layer) -> Result { - match key { - Layer::None => Ok(LayerIds::None), - Layer::All => Ok(LayerIds::All), - Layer::Default => Ok(LayerIds::One(0)), - Layer::One(id) => match self.edge_meta.get_layer_id(&id) { - Some(id) => Ok(LayerIds::One(id)), - None => Err(InvalidLayer::new( - id, - Self::get_valid_layers(&self.edge_meta), - )), - }, - Layer::Multiple(ids) => { - let mut new_layers = ids - .iter() - .map(|id| { - self.edge_meta.get_layer_id(id).ok_or_else(|| { - InvalidLayer::new(id.clone(), Self::get_valid_layers(&self.edge_meta)) - }) - }) - .collect::, InvalidLayer>>()?; - let num_layers = self.num_layers(); - let num_new_layers = new_layers.len(); - if num_new_layers == 0 { - Ok(LayerIds::None) - } else if num_new_layers == 1 { - Ok(LayerIds::One(new_layers[0])) - } else if num_new_layers == num_layers { - Ok(LayerIds::All) - } else { - new_layers.sort_unstable(); - new_layers.dedup(); - Ok(LayerIds::Multiple(new_layers.into())) - } - } - } - } - - pub fn valid_layer_ids(&self, key: Layer) -> LayerIds { - match key { - Layer::None => LayerIds::None, - Layer::All => LayerIds::All, - Layer::Default => LayerIds::One(0), - Layer::One(id) => match self.edge_meta.get_layer_id(&id) { - Some(id) => LayerIds::One(id), - None => LayerIds::None, - }, - Layer::Multiple(ids) => { - let new_layers: Multiple = ids - .iter() - .flat_map(|id| self.edge_meta.get_layer_id(id)) - .collect(); - let num_layers = self.num_layers(); - let num_new_layers = new_layers.len(); - if num_new_layers == 0 { - LayerIds::None - } else if num_new_layers == 1 { - LayerIds::One(new_layers.get_id_by_index(0).unwrap()) - } else if num_new_layers == num_layers { - LayerIds::All - } else { - LayerIds::Multiple(new_layers) - } - } - } - } - - pub fn get_layer_name(&self, layer: usize) -> ArcStr { - self.edge_meta.get_layer_name_by_id(layer) - } - - #[inline] - pub fn graph_earliest_time(&self) -> Option { - Some(self.earliest_time.get()).filter(|t| *t != i64::MAX) - } - - #[inline] - pub fn graph_latest_time(&self) -> Option { - Some(self.latest_time.get()).filter(|t| *t != i64::MIN) - } - - #[inline] - pub fn internal_num_nodes(&self) -> usize { - self.storage.nodes.len() - } - - #[inline] - pub fn update_time(&self, time: EventTime) { - let t = time.t(); - self.earliest_time.update(t); - self.latest_time.update(t); - } - - pub(crate) fn link_nodes_inner( - &self, - node_pair: &mut PairEntryMut, - edge_id: EID, - t: EventTime, - layer: usize, - is_deletion: bool, - ) { - self.update_time(t); - let src_id = node_pair.get_i().vid; - let dst_id = node_pair.get_j().vid; - let src = node_pair.get_mut_i(); - let elid = if is_deletion { - edge_id.with_layer_deletion(layer) - } else { - edge_id.with_layer(layer) - }; - src.add_edge(dst_id, Direction::OUT, layer, edge_id); - src.update_time(t, elid); - let dst = node_pair.get_mut_j(); - dst.add_edge(src_id, Direction::IN, layer, edge_id); - dst.update_time(t, elid); - } - - pub fn link_edge( - &self, - eid: EID, - t: EventTime, - layer: usize, - is_deletion: bool, - ) -> EdgeWGuard<'_> { - let (src, dst) = { - let edge_r = self.storage.edges.get_edge(eid); - let edge_r = edge_r.as_mem_edge().edge_store(); - (edge_r.src, edge_r.dst) - }; - // need to get the node pair first to avoid deadlocks with link_nodes - let mut node_pair = self.storage.pair_node_mut(src, dst); - self.link_nodes_inner(&mut node_pair, eid, t, layer, is_deletion); - self.storage.edges.get_edge_mut(eid) - } - - pub fn link_nodes( - &self, - src_id: VID, - dst_id: VID, - t: EventTime, - layer: usize, - is_deletion: bool, - ) -> MaybeNew> { - let edge = { - let mut node_pair = self.storage.pair_node_mut(src_id, dst_id); - let src = node_pair.get_i(); - let mut edge = match src.find_edge_eid(dst_id, &LayerIds::All) { - Some(edge_id) => Either::Left(self.storage.get_edge_mut(edge_id)), - None => Either::Right(self.storage.push_edge(EdgeStore::new(src_id, dst_id))), - }; - let eid = match edge.as_mut() { - Either::Left(edge) => edge.as_ref().eid(), - Either::Right(edge) => edge.value().eid, - }; - self.link_nodes_inner(&mut node_pair, eid, t, layer, is_deletion); - edge - }; - - match edge { - Either::Left(edge) => MaybeNew::Existing(edge), - Either::Right(edge) => { - let edge = edge.init(); - MaybeNew::New(edge) - } - } - } - - #[inline] - pub fn resolve_node_ref(&self, v: NodeRef) -> Option { - match v { - NodeRef::Internal(vid) => Some(vid), - NodeRef::External(GidRef::U64(gid)) => self.logical_to_physical.get_u64(gid), - NodeRef::External(GidRef::Str(string)) => self - .logical_to_physical - .get_str(string) - .or_else(|| self.logical_to_physical.get_u64(string.id())), - } - } - - /// Checks if the same string value already exists and returns a pointer to the same existing value if it exists, - /// otherwise adds the string to the pool. - fn resolve_str(&self, value: &ArcStr) -> ArcStr { - match self.string_pool.get(value) { - Some(value) => value.clone(), - None => { - self.string_pool.insert(value.clone()); - self.string_pool - .get(value) - .expect("value should exist as inserted above") - .clone() - } - } - } - - pub fn node(&self, id: VID) -> NodeEntry<'_> { - self.storage.get_node(id) - } -} diff --git a/raphtory-core/src/entities/graph/tgraph_storage.rs b/raphtory-core/src/entities/graph/tgraph_storage.rs deleted file mode 100644 index 2860dfa151..0000000000 --- a/raphtory-core/src/entities/graph/tgraph_storage.rs +++ /dev/null @@ -1,88 +0,0 @@ -use crate::{ - entities::{edges::edge_store::EdgeStore, nodes::node_store::NodeStore, EID, VID}, - storage::{ - self, - raw_edges::{EdgeRGuard, EdgeWGuard, EdgesStorage, LockedEdges, UninitialisedEdge}, - EntryMut, NodeEntry, NodeSlot, NodeStorage, PairEntryMut, UninitialisedEntry, - }, -}; -use parking_lot::RwLockWriteGuard; -use serde::{Deserialize, Serialize}; - -#[derive(Debug, Deserialize, Serialize, PartialEq)] -pub struct GraphStorage { - // node storage with having (id, time_index, properties, adj list for each layer) - pub nodes: NodeStorage, - pub edges: EdgesStorage, -} - -impl GraphStorage { - pub fn new(num_locks: usize) -> Self { - Self { - nodes: storage::NodeStorage::new(num_locks), - edges: EdgesStorage::new(num_locks), - } - } - - pub fn num_shards(&self) -> usize { - self.nodes.data.len() - } - - #[inline] - pub fn nodes_read_lock(&self) -> storage::ReadLockedStorage { - self.nodes.read_lock() - } - - #[inline] - pub fn edges_read_lock(&self) -> LockedEdges { - self.edges.read_lock() - } - - #[inline] - pub fn nodes_len(&self) -> usize { - self.nodes.len() - } - - #[inline] - pub fn edges_len(&self) -> usize { - self.edges.len() - } - - #[inline] - pub fn push_node(&self, node: NodeStore) -> UninitialisedEntry<'_, NodeStore, NodeSlot> { - self.nodes.push(node) - } - #[inline] - pub fn push_edge(&self, edge: EdgeStore) -> UninitialisedEdge<'_> { - self.edges.push(edge) - } - - #[inline] - pub fn get_node_mut(&self, id: VID) -> EntryMut<'_, RwLockWriteGuard<'_, NodeSlot>> { - self.nodes.entry_mut(id) - } - - #[inline] - pub fn get_edge_mut(&self, eid: EID) -> EdgeWGuard<'_> { - self.edges.get_edge_mut(eid) - } - - #[inline] - pub fn get_node(&self, id: VID) -> NodeEntry<'_> { - self.nodes.entry(id) - } - - #[inline] - pub fn edge_entry(&self, eid: EID) -> EdgeRGuard<'_> { - self.edges.get_edge(eid) - } - - pub fn try_edge_entry(&self, eid: EID) -> Option> { - self.edges.try_get_edge(eid) - } - - #[inline] - pub fn pair_node_mut(&self, i: VID, j: VID) -> PairEntryMut<'_> { - self.nodes.loop_pair_entry_mut(i, j) - } -} diff --git a/raphtory-core/src/entities/graph/timer.rs b/raphtory-core/src/entities/graph/timer.rs index 7128bee016..69edcdfdf7 100644 --- a/raphtory-core/src/entities/graph/timer.rs +++ b/raphtory-core/src/entities/graph/timer.rs @@ -36,6 +36,13 @@ impl Default for MinCounter { } } +impl From for MinCounter { + fn from(value: i64) -> Self { + let counter = AtomicI64::new(value); + Self { counter } + } +} + impl MinCounter { pub fn new() -> Self { Self { @@ -60,6 +67,13 @@ pub struct MaxCounter { counter: AtomicI64, } +impl From for MaxCounter { + fn from(value: i64) -> Self { + let counter = AtomicI64::new(value); + Self { counter } + } +} + impl Default for MaxCounter { fn default() -> Self { Self::new() diff --git a/raphtory-core/src/entities/mod.rs b/raphtory-core/src/entities/mod.rs index 0147447eaf..cd2323bd4d 100644 --- a/raphtory-core/src/entities/mod.rs +++ b/raphtory-core/src/entities/mod.rs @@ -1,4 +1,3 @@ -pub mod edges; pub mod graph; pub mod nodes; pub mod properties; diff --git a/raphtory-core/src/entities/nodes/mod.rs b/raphtory-core/src/entities/nodes/mod.rs index 094e8f0f17..3128f25de8 100644 --- a/raphtory-core/src/entities/nodes/mod.rs +++ b/raphtory-core/src/entities/nodes/mod.rs @@ -1,3 +1,2 @@ pub mod node_ref; -pub mod node_store; pub mod structure; diff --git a/raphtory-core/src/entities/nodes/node_ref.rs b/raphtory-core/src/entities/nodes/node_ref.rs index 86730b671f..732dbe0f8a 100644 --- a/raphtory-core/src/entities/nodes/node_ref.rs +++ b/raphtory-core/src/entities/nodes/node_ref.rs @@ -21,10 +21,10 @@ pub trait AsNodeRef: Send + Sync { } } - fn as_gid_ref(&self) -> Either, VID> { + fn as_gid_ref(&self) -> Option> { match self.as_node_ref() { - NodeRef::Internal(vid) => Either::Right(vid), - NodeRef::External(u) => Either::Left(u), + NodeRef::Internal(_) => None, + NodeRef::External(u) => Some(u), } } } diff --git a/raphtory-core/src/entities/nodes/node_store.rs b/raphtory-core/src/entities/nodes/node_store.rs deleted file mode 100644 index dbfd327775..0000000000 --- a/raphtory-core/src/entities/nodes/node_store.rs +++ /dev/null @@ -1,443 +0,0 @@ -use crate::{ - entities::{ - edges::edge_ref::EdgeRef, - nodes::structure::adj::Adj, - properties::{ - props::{MetadataError, Props}, - tcell::TCell, - }, - LayerIds, EID, GID, VID, - }, - storage::{ - timeindex::{EventTime, TimeIndexWindow}, - NodeEntry, - }, - utils::iter::GenLockedIter, -}; -use itertools::Itertools; -use raphtory_api::{ - core::{ - entities::{properties::prop::Prop, GidRef, LayerVariants, ELID}, - storage::timeindex::{TimeIndexLike, TimeIndexOps}, - Direction, - }, - iter::BoxedLIter, -}; -use serde::{Deserialize, Serialize}; -use std::{iter, ops::Range}; - -#[derive(Serialize, Deserialize, Debug, Default, PartialEq)] -pub struct NodeStore { - pub global_id: GID, - pub vid: VID, - // each layer represents a separate view of the graph - pub(crate) layers: Vec, - // props for node - pub(crate) props: Option, - pub node_type: usize, - - /// For every property id keep a hash map of timestamps to values pointing to the property entries in the props vector - timestamps: NodeTimestamps, -} - -#[derive(Serialize, Deserialize, Debug, Default, PartialEq)] -pub struct NodeTimestamps { - // all the timestamps that have been seen by this node - pub edge_ts: TCell, - pub props_ts: TCell>, -} - -impl NodeTimestamps { - pub fn edge_ts(&self) -> &TCell { - &self.edge_ts - } - - pub fn props_ts(&self) -> &TCell> { - &self.props_ts - } -} - -impl<'a> TimeIndexOps<'a> for &'a NodeTimestamps { - type IndexType = EventTime; - type RangeType = TimeIndexWindow<'a, EventTime, NodeTimestamps>; - - #[inline] - fn active(&self, w: Range) -> bool { - self.edge_ts().active(w.clone()) || self.props_ts().active(w) - } - - fn range(&self, w: Range) -> Self::RangeType { - TimeIndexWindow::Range { - timeindex: *self, - range: w, - } - } - - fn first(&self) -> Option { - let first = self.edge_ts().first(); - let other = self.props_ts().first(); - - first - .zip(other) - .map(|(a, b)| a.min(b)) - .or_else(|| first.or(other)) - } - - fn last(&self) -> Option { - let last = self.edge_ts().last(); - let other = self.props_ts().last(); - - last.zip(other) - .map(|(a, b)| a.max(b)) - .or_else(|| last.or(other)) - } - - fn iter(self) -> impl Iterator + Send + Sync + 'a { - self.edge_ts - .iter() - .map(|(t, _)| *t) - .merge(self.props_ts.iter().map(|(t, _)| *t)) - } - - fn iter_rev(self) -> impl Iterator + Send + Sync + 'a { - self.edge_ts - .iter() - .rev() - .map(|(t, _)| *t) - .merge_by(self.props_ts.iter().rev().map(|(t, _)| *t), |lt, rt| { - lt >= rt - }) - } - - fn len(&self) -> usize { - self.edge_ts.len() + self.props_ts.len() - } -} - -impl<'a> TimeIndexLike<'a> for &'a NodeTimestamps { - fn range_iter( - self, - w: Range, - ) -> impl Iterator + Send + Sync + 'a { - self.edge_ts() - .range_iter(w.clone()) - .merge(self.props_ts().range_iter(w)) - } - - fn range_iter_rev( - self, - w: Range, - ) -> impl Iterator + Send + Sync + 'a { - self.edge_ts() - .range_iter_rev(w.clone()) - .merge_by(self.props_ts().range_iter_rev(w), |lt, rt| lt >= rt) - } - - fn range_count(&self, w: Range) -> usize { - self.edge_ts().range_count(w.clone()) + self.props_ts().range_count(w) - } - - fn first_range(&self, w: Range) -> Option { - let first = self - .edge_ts() - .iter_window(w.clone()) - .next() - .map(|(t, _)| *t); - let other = self.props_ts().iter_window(w).next().map(|(t, _)| *t); - - first - .zip(other) - .map(|(a, b)| a.min(b)) - .or_else(|| first.or(other)) - } - - fn last_range(&self, w: Range) -> Option { - let last = self - .edge_ts - .iter_window(w.clone()) - .next_back() - .map(|(t, _)| *t); - let other = self.props_ts.iter_window(w).next_back().map(|(t, _)| *t); - - last.zip(other) - .map(|(a, b)| a.max(b)) - .or_else(|| last.or(other)) - } -} - -impl NodeStore { - #[inline] - pub fn is_initialised(&self) -> bool { - self.vid != VID::default() - } - - #[inline] - pub fn init(&mut self, vid: VID, gid: GidRef) { - if !self.is_initialised() { - self.vid = vid; - self.global_id = gid.to_owned(); - } - } - - pub fn empty(global_id: GID) -> Self { - let layers = vec![Adj::Solo]; - Self { - global_id, - vid: VID(0), - timestamps: Default::default(), - layers, - props: None, - node_type: 0, - } - } - - pub fn resolved(global_id: GID, vid: VID) -> Self { - Self { - global_id, - vid, - timestamps: Default::default(), - layers: vec![], - props: None, - node_type: 0, - } - } - - pub fn global_id(&self) -> &GID { - &self.global_id - } - - pub fn timestamps(&self) -> &NodeTimestamps { - &self.timestamps - } - - #[inline] - pub fn update_time(&mut self, t: EventTime, eid: ELID) { - self.timestamps.edge_ts.set(t, eid); - } - - pub fn update_node_type(&mut self, node_type: usize) -> usize { - self.node_type = node_type; - node_type - } - - pub fn add_metadata(&mut self, prop_id: usize, prop: Prop) -> Result<(), MetadataError> { - let props = self.props.get_or_insert_with(Props::new); - props.add_metadata(prop_id, prop) - } - - pub fn update_metadata(&mut self, prop_id: usize, prop: Prop) -> Result<(), MetadataError> { - let props = self.props.get_or_insert_with(Props::new); - props.update_metadata(prop_id, prop) - } - - pub fn update_t_prop_time(&mut self, t: EventTime, prop_i: Option) { - self.timestamps.props_ts.set(t, prop_i); - } - - #[inline(always)] - pub fn find_edge_eid(&self, dst: VID, layer_id: &LayerIds) -> Option { - match layer_id { - LayerIds::All => match self.layers.len() { - 0 => None, - 1 => self.layers[0].get_edge(dst, Direction::OUT), - _ => self - .layers - .iter() - .find_map(|layer| layer.get_edge(dst, Direction::OUT)), - }, - LayerIds::One(layer_id) => self - .layers - .get(*layer_id) - .and_then(|layer| layer.get_edge(dst, Direction::OUT)), - LayerIds::Multiple(layers) => layers.iter().find_map(|layer_id| { - self.layers - .get(layer_id) - .and_then(|layer| layer.get_edge(dst, Direction::OUT)) - }), - LayerIds::None => None, - } - } - - pub fn add_edge(&mut self, v_id: VID, dir: Direction, layer: usize, edge_id: EID) { - if layer >= self.layers.len() { - self.layers.resize_with(layer + 1, || Adj::Solo); - } - - match dir { - Direction::IN => self.layers[layer].add_edge_into(v_id, edge_id), - Direction::OUT => self.layers[layer].add_edge_out(v_id, edge_id), - _ => {} - } - } - - #[inline] - pub fn edge_tuples<'a>(&'a self, layers: &LayerIds, d: Direction) -> BoxedLIter<'a, EdgeRef> { - let self_id = self.vid; - let iter: BoxedLIter<'a, EdgeRef> = match d { - Direction::OUT => self.merge_layers(layers, Direction::OUT, self_id), - Direction::IN => self.merge_layers(layers, Direction::IN, self_id), - Direction::BOTH => Box::new( - self.edge_tuples(layers, Direction::OUT) - .filter(|e| e.src() != e.dst()) - .merge_by(self.edge_tuples(layers, Direction::IN), |e1, e2| { - e1.remote() < e2.remote() - }), - ), - }; - iter - } - - fn merge_layers( - &self, - layers: &LayerIds, - d: Direction, - self_id: VID, - ) -> BoxedLIter<'_, EdgeRef> { - match layers { - LayerIds::All => Box::new( - self.layers - .iter() - .map(|adj| self.iter_adj(adj, d, self_id)) - .kmerge_by(|e1, e2| e1.remote() < e2.remote()) - .dedup(), - ), - LayerIds::One(id) => { - if let Some(layer) = self.layers.get(*id) { - Box::new(self.iter_adj(layer, d, self_id)) - } else { - Box::new(iter::empty()) - } - } - LayerIds::Multiple(ids) => Box::new( - ids.into_iter() - .filter_map(|id| self.layers.get(id)) - .map(|layer| self.iter_adj(layer, d, self_id)) - .kmerge_by(|e1, e2| e1.remote() < e2.remote()) - .dedup(), - ), - LayerIds::None => Box::new(iter::empty()), - } - } - - fn iter_adj<'a>( - &'a self, - layer: &'a Adj, - d: Direction, - self_id: VID, - ) -> impl Iterator + Send + Sync + 'a { - let iter: BoxedLIter<'a, EdgeRef> = match d { - Direction::IN => Box::new( - layer - .iter(d) - .map(move |(src_pid, e_id)| EdgeRef::new_incoming(e_id, src_pid, self_id)), - ), - Direction::OUT => Box::new( - layer - .iter(d) - .map(move |(dst_pid, e_id)| EdgeRef::new_outgoing(e_id, self_id, dst_pid)), - ), - _ => Box::new(iter::empty()), - }; - iter - } - - pub fn degree(&self, layers: &LayerIds, d: Direction) -> usize { - match layers { - LayerIds::All => match self.layers.len() { - 0 => 0, - 1 => self.layers[0].degree(d), - _ => self - .layers - .iter() - .map(|l| l.node_iter(d)) - .kmerge() - .dedup() - .count(), - }, - LayerIds::One(l) => self - .layers - .get(*l) - .map(|layer| layer.degree(d)) - .unwrap_or(0), - LayerIds::None => 0, - LayerIds::Multiple(ids) => ids - .iter() - .flat_map(|l_id| self.layers.get(l_id).map(|layer| layer.node_iter(d))) - .kmerge() - .dedup() - .count(), - } - } - - // every neighbour apears once in the iterator - // this is important because it calculates degree - pub fn neighbours<'a>( - &'a self, - layers: &LayerIds, - d: Direction, - ) -> impl Iterator + use<'a> { - match layers { - LayerIds::All => { - let iter = self - .layers - .iter() - .map(move |layer| layer.node_iter(d)) - .kmerge() - .dedup(); - LayerVariants::All(iter) - } - LayerIds::One(one) => { - let iter = self - .layers - .get(*one) - .into_iter() - .flat_map(move |layer| layer.node_iter(d)); - LayerVariants::One(iter) - } - LayerIds::Multiple(layers) => { - let iter = layers - .into_iter() - .filter_map(|l| self.layers.get(l)) - .map(move |layer| self.neighbours_from_adj(layer, d)) - .kmerge() - .dedup(); - LayerVariants::Multiple(iter) - } - LayerIds::None => LayerVariants::None(iter::empty()), - } - } - - fn neighbours_from_adj<'a>(&'a self, layer: &'a Adj, d: Direction) -> BoxedLIter<'a, VID> { - let iter: BoxedLIter<'a, VID> = match d { - Direction::IN => Box::new(layer.iter(d).map(|(from_v, _)| from_v)), - Direction::OUT => Box::new(layer.iter(d).map(|(to_v, _)| to_v)), - Direction::BOTH => Box::new( - self.neighbours_from_adj(layer, Direction::OUT) - .merge(self.neighbours_from_adj(layer, Direction::IN)) - .dedup(), - ), - }; - iter - } - - pub fn metadata_ids(&self) -> impl Iterator + '_ { - self.props - .as_ref() - .into_iter() - .flat_map(|ps| ps.metadata_ids()) - } - - pub fn metadata(&self, prop_id: usize) -> Option<&Prop> { - self.props.as_ref().and_then(|ps| ps.metadata(prop_id)) - } -} - -impl<'a> NodeEntry<'a> { - pub fn into_edges( - self, - layers: &LayerIds, - dir: Direction, - ) -> impl Iterator + 'a { - GenLockedIter::from(self, |node| node.as_ref().node().edge_tuples(layers, dir)) - } -} diff --git a/raphtory-core/src/entities/nodes/structure/adj.rs b/raphtory-core/src/entities/nodes/structure/adj.rs index 622b6ae939..743d1533d6 100644 --- a/raphtory-core/src/entities/nodes/structure/adj.rs +++ b/raphtory-core/src/entities/nodes/structure/adj.rs @@ -1,4 +1,5 @@ use crate::entities::{edges::edge_ref::Dir, nodes::structure::adjset::AdjSet, EID, VID}; +use either::Either; use itertools::Itertools; use raphtory_api::{ core::{Direction, DirectionVariants}, @@ -18,7 +19,7 @@ pub enum Adj { } impl Adj { - pub(crate) fn get_edge(&self, v: VID, dir: Direction) -> Option { + pub fn get_edge(&self, v: VID, dir: Direction) -> Option { match self { Adj::Solo => None, Adj::List { out, into } => match dir { @@ -45,16 +46,24 @@ impl Adj { } } - pub(crate) fn add_edge_into(&mut self, v: VID, e: EID) { + pub fn add_edge_into(&mut self, v: VID, e: EID) -> bool { match self { - Adj::Solo => *self = Self::new_into(v, e), + Adj::Solo => { + *self = Self::new_into(v, e); + true + } Adj::List { into, .. } => into.push(v, e), } } - pub(crate) fn add_edge_out(&mut self, v: VID, e: EID) { + /// Adds an edge in the out direction, creating a new adjacency if necessary. + /// Returns `true` if the edge was added, `false` if it already exists. + pub fn add_edge_out(&mut self, v: VID, e: EID) -> bool { match self { - Adj::Solo => *self = Self::new_out(v, e), + Adj::Solo => { + *self = Self::new_out(v, e); + true + } Adj::List { out, .. } => out.push(v, e), } } @@ -70,6 +79,20 @@ impl Adj { } } + pub fn out_iter(&self) -> impl Iterator + Send + Sync + '_ { + match self { + Adj::Solo => Either::Left(std::iter::empty()), + Adj::List { out, .. } => Either::Right(out.iter()), + } + } + + pub fn inb_iter(&self) -> impl Iterator + Send + Sync + '_ { + match self { + Adj::Solo => Either::Left(std::iter::empty()), + Adj::List { into, .. } => Either::Right(into.iter()), + } + } + pub fn node_iter(&self, dir: Direction) -> impl Iterator + Send + '_ { let iter = self.iter(dir).map(|(v, _)| v); match dir { diff --git a/raphtory-core/src/entities/nodes/structure/adjset.rs b/raphtory-core/src/entities/nodes/structure/adjset.rs index 692fd9eea5..1409f93529 100644 --- a/raphtory-core/src/entities/nodes/structure/adjset.rs +++ b/raphtory-core/src/entities/nodes/structure/adjset.rs @@ -48,26 +48,36 @@ impl + Copy + Send + Sync> Ad Self::One(v, e) } - pub fn push(&mut self, v: K, e: V) { + /// Push a new node and edge into the adjacency set. + /// + /// If the node already exists, it will not be added again. + /// Returns `true` if the node was added, `false` if it already existed + pub fn push(&mut self, v: K, e: V) -> bool { match self { AdjSet::Empty => { *self = Self::new(v, e); + true } AdjSet::One(vv, ee) => { if *vv < v { *self = Self::Small { vs: vec![*vv, v], edges: vec![*ee, e], - } + }; + true } else if *vv > v { *self = Self::Small { vs: vec![v, *vv], edges: vec![e, *ee], - } + }; + true + } else { + // already exists + false } } AdjSet::Small { vs, edges } => match vs.binary_search(&v) { - Ok(_) => {} + Ok(_) => false, Err(i) => { if vs.len() < SMALL_SET { vs.insert(i, v); @@ -78,11 +88,10 @@ impl + Copy + Send + Sync> Ad map.insert(v, e); *self = Self::Large { vs: map } } + true } }, - AdjSet::Large { vs } => { - vs.insert(v, e); - } + AdjSet::Large { vs } => vs.insert(v, e).is_none(), } } diff --git a/raphtory-core/src/entities/properties/graph_meta.rs b/raphtory-core/src/entities/properties/graph_meta.rs index 42e89afce2..70a222a186 100644 --- a/raphtory-core/src/entities/properties/graph_meta.rs +++ b/raphtory-core/src/entities/properties/graph_meta.rs @@ -10,12 +10,16 @@ use raphtory_api::core::{ meta::PropMapper, prop::{Prop, PropError, PropType}, }, - storage::{arc_str::ArcStr, dict_mapper::MaybeNew, locked_vec::ArcReadLockedVec, FxDashMap}, + storage::{ + arc_str::ArcStr, + dict_mapper::{MaybeNew, PublicKeys}, + FxDashMap, + }, }; -use serde::{Deserialize, Serialize}; +use serde::Serialize; use std::ops::{Deref, DerefMut}; -#[derive(Serialize, Deserialize, Debug, Default)] +#[derive(Serialize, Debug, Default)] pub struct GraphMeta { metadata_mapper: PropMapper, temporal_mapper: PropMapper, @@ -134,20 +138,20 @@ impl GraphMeta { self.metadata_mapper.get_dtype(prop_id) } - pub fn metadata_names(&self) -> ArcReadLockedVec { - self.metadata_mapper.get_keys() + pub fn metadata_names(&self) -> PublicKeys { + self.metadata_mapper.keys() } pub fn metadata_ids(&self) -> impl Iterator { - 0..self.metadata_mapper.len() + self.metadata_mapper.ids() } - pub fn temporal_names(&self) -> ArcReadLockedVec { - self.temporal_mapper.get_keys() + pub fn temporal_names(&self) -> PublicKeys { + self.temporal_mapper.keys() } pub fn temporal_ids(&self) -> impl Iterator { - 0..self.temporal_mapper.len() + self.temporal_mapper.ids() } pub fn metadata(&self) -> impl Iterator + '_ { @@ -159,6 +163,8 @@ impl GraphMeta { pub fn temporal_props( &self, ) -> impl Iterator + '_)> + '_ { - (0..self.temporal_mapper.len()).filter_map(|id| self.temporal.get(&id).map(|v| (id, v))) + self.temporal_mapper + .ids() + .filter_map(|id| self.temporal.get(&id).map(|v| (id, v))) } } diff --git a/raphtory-core/src/entities/properties/props.rs b/raphtory-core/src/entities/properties/props.rs index 59028aecc1..63f6331afe 100644 --- a/raphtory-core/src/entities/properties/props.rs +++ b/raphtory-core/src/entities/properties/props.rs @@ -1,34 +1,27 @@ use crate::{ - entities::properties::tprop::{IllegalPropType, TProp}, - storage::{ - lazy_vec::{IllegalSet, LazyVec}, - timeindex::EventTime, - }, + entities::properties::tprop::IllegalPropType, + storage::{lazy_vec::IllegalSet, TPropColumnError}, }; use raphtory_api::core::entities::properties::prop::Prop; -use serde::{Deserialize, Serialize}; use std::fmt::Debug; use thiserror::Error; -#[derive(Serialize, Deserialize, Default, Debug, PartialEq)] -pub struct Props { - // properties - pub(crate) metadata: LazyVec>, - pub(crate) temporal_props: LazyVec, -} - #[derive(Error, Debug)] pub enum TPropError { #[error(transparent)] - IllegalSet(#[from] IllegalSet), - #[error(transparent)] - IllegalPropType(#[from] IllegalPropType), + ColumnError(#[from] TPropColumnError), } #[derive(Error, Debug)] pub enum MetadataError { #[error("Attempted to change value of metadata, old: {old}, new: {new}")] IllegalUpdate { old: Prop, new: Prop }, + + #[error(transparent)] + IllegalPropType(#[from] IllegalPropType), + + #[error(transparent)] + ColumnError(#[from] TPropColumnError), } impl From>> for MetadataError { @@ -39,47 +32,6 @@ impl From>> for MetadataError { } } -impl Props { - pub fn new() -> Self { - Self { - metadata: Default::default(), - temporal_props: Default::default(), - } - } - - pub fn add_prop(&mut self, t: EventTime, prop_id: usize, prop: Prop) -> Result<(), TPropError> { - self.temporal_props.update(prop_id, |p| Ok(p.set(t, prop)?)) - } - - pub fn add_metadata(&mut self, prop_id: usize, prop: Prop) -> Result<(), MetadataError> { - Ok(self.metadata.set(prop_id, Some(prop))?) - } - - pub fn update_metadata(&mut self, prop_id: usize, prop: Prop) -> Result<(), MetadataError> { - self.metadata.update(prop_id, |n| { - *n = Some(prop); - Ok(()) - }) - } - - pub fn metadata(&self, prop_id: usize) -> Option<&Prop> { - let prop = self.metadata.get(prop_id)?; - prop.as_ref() - } - - pub fn temporal_prop(&self, prop_id: usize) -> Option<&TProp> { - self.temporal_props.get(prop_id) - } - - pub fn metadata_ids(&self) -> impl Iterator + '_ { - self.metadata.filled_ids() - } - - pub fn temporal_prop_ids(&self) -> impl Iterator + Send + Sync + '_ { - self.temporal_props.filled_ids() - } -} - #[cfg(test)] mod test { use super::*; diff --git a/raphtory-core/src/entities/properties/tprop.rs b/raphtory-core/src/entities/properties/tprop.rs index 66f63b0596..b6d426ba23 100644 --- a/raphtory-core/src/entities/properties/tprop.rs +++ b/raphtory-core/src/entities/properties/tprop.rs @@ -1,25 +1,25 @@ use crate::{ entities::properties::tcell::TCell, - storage::{timeindex::EventTime, TPropColumn}, + storage::{timeindex::EventTime, PropColumn}, }; use bigdecimal::BigDecimal; use chrono::{DateTime, NaiveDateTime, Utc}; +use either::Either; use iter_enum::{DoubleEndedIterator, ExactSizeIterator, FusedIterator, Iterator}; -#[cfg(feature = "arrow")] -use raphtory_api::core::entities::properties::prop::PropArray; + use raphtory_api::core::{ entities::properties::{ - prop::{Prop, PropType}, + prop::{Prop, PropArray, PropType}, tprop::TPropOps, }, storage::arc_str::ArcStr, }; use rustc_hash::FxHashMap; -use serde::{Deserialize, Serialize}; +use serde::Serialize; use std::{collections::HashMap, iter, ops::Range, sync::Arc}; use thiserror::Error; -#[derive(Debug, Default, PartialEq, Clone, Serialize, Deserialize)] +#[derive(Debug, Default, PartialEq, Clone, Serialize)] pub enum TProp { #[default] Empty, @@ -34,10 +34,8 @@ pub enum TProp { F64(TCell), Bool(TCell), DTime(TCell>), - #[cfg(feature = "arrow")] - Array(TCell), + List(TCell), NDTime(TCell), - List(TCell>>), Map(TCell>>), Decimal(TCell), } @@ -63,7 +61,6 @@ pub enum TPropVariants< F64, Bool, DTime, - #[cfg(feature = "arrow")] Array, NDTime, List, Map, @@ -81,49 +78,78 @@ pub enum TPropVariants< F64(F64), Bool(Bool), DTime(DTime), - #[cfg(feature = "arrow")] - Array(Array), NDTime(NDTime), List(List), Map(Map), Decimal(Decimal), } -#[derive(Copy, Clone, Debug)] +#[derive(Copy, Clone, Debug, Default)] pub struct TPropCell<'a> { t_cell: Option<&'a TCell>>, - log: Option<&'a TPropColumn>, + log: Option<&'a PropColumn>, } impl<'a> TPropCell<'a> { - pub(crate) fn new(t_cell: &'a TCell>, log: Option<&'a TPropColumn>) -> Self { + pub fn new(t_cell: &'a TCell>, log: Option<&'a PropColumn>) -> Self { Self { t_cell: Some(t_cell), log, } } -} -impl<'a> TPropOps<'a> for TPropCell<'a> { - fn iter(self) -> impl DoubleEndedIterator + Send + Sync + 'a { - let log = self.log; + fn iter_window_inner( + self, + r: Range, + ) -> impl DoubleEndedIterator + Send + 'a { self.t_cell.into_iter().flat_map(move |t_cell| { t_cell - .iter() - .filter_map(move |(t, &id)| log?.get(id?).map(|prop| (*t, prop))) + .iter_window(r.clone()) + .filter_map(move |(t, &id)| self.log?.get(id?).map(|prop| (*t, prop))) }) } - fn iter_window( - self, - r: Range, - ) -> impl DoubleEndedIterator + Send + Sync + 'a { + fn iter_inner(self) -> impl DoubleEndedIterator + Send + 'a { self.t_cell.into_iter().flat_map(move |t_cell| { t_cell - .iter_window(r.clone()) + .iter() .filter_map(move |(t, &id)| self.log?.get(id?).map(|prop| (*t, prop))) }) } +} + +impl<'a> TPropOps<'a> for TPropCell<'a> { + fn iter_inner( + self, + range: Option>, + ) -> impl Iterator + Send + Sync + 'a { + match range { + Some(w) => { + let iter = self.iter_window_inner(w); + Either::Right(iter) + } + None => { + let iter = self.iter_inner(); + Either::Left(iter) + } + } + } + + fn iter_inner_rev( + self, + range: Option>, + ) -> impl Iterator + Send + Sync + 'a { + match range { + Some(w) => { + let iter = self.iter_window_inner(w).rev(); + Either::Right(iter) + } + None => { + let iter = self.iter_inner().rev(); + Either::Left(iter) + } + } + } fn at(&self, ti: &EventTime) -> Option { self.t_cell?.at(ti).and_then(|&id| self.log?.get(id?)) @@ -145,8 +171,6 @@ impl TProp { Prop::Bool(value) => TProp::Bool(TCell::new(t, value)), Prop::DTime(value) => TProp::DTime(TCell::new(t, value)), Prop::NDTime(value) => TProp::NDTime(TCell::new(t, value)), - #[cfg(feature = "arrow")] - Prop::Array(value) => TProp::Array(TCell::new(t, value)), Prop::List(value) => TProp::List(TCell::new(t, value)), Prop::Map(value) => TProp::Map(TCell::new(t, value)), Prop::Decimal(value) => TProp::Decimal(TCell::new(t, value)), @@ -167,8 +191,6 @@ impl TProp { TProp::F64(_) => PropType::F64, TProp::Bool(_) => PropType::Bool, TProp::DTime(_) => PropType::DTime, - #[cfg(feature = "arrow")] - TProp::Array(_) => PropType::Array(Box::new(PropType::Empty)), TProp::NDTime(_) => PropType::NDTime, TProp::List(_) => PropType::List(Box::new(PropType::Empty)), TProp::Map(_) => PropType::Map(HashMap::new().into()), @@ -219,10 +241,6 @@ impl TProp { (TProp::NDTime(cell), Prop::NDTime(a)) => { cell.set(t, a); } - #[cfg(feature = "arrow")] - (TProp::Array(cell), Prop::Array(a)) => { - cell.set(t, a); - } (TProp::List(cell), Prop::List(a)) => { cell.set(t, a); } @@ -242,98 +260,11 @@ impl TProp { } Ok(()) } -} - -impl<'a> TPropOps<'a> for &'a TProp { - fn last_before(&self, t: EventTime) -> Option<(EventTime, Prop)> { - match self { - TProp::Empty => None, - TProp::Str(cell) => cell.last_before(t).map(|(t, v)| (t, Prop::Str(v.clone()))), - TProp::I32(cell) => cell.last_before(t).map(|(t, v)| (t, Prop::I32(*v))), - TProp::I64(cell) => cell.last_before(t).map(|(t, v)| (t, Prop::I64(*v))), - TProp::U8(cell) => cell.last_before(t).map(|(t, v)| (t, Prop::U8(*v))), - TProp::U16(cell) => cell.last_before(t).map(|(t, v)| (t, Prop::U16(*v))), - TProp::U32(cell) => cell.last_before(t).map(|(t, v)| (t, Prop::U32(*v))), - TProp::U64(cell) => cell.last_before(t).map(|(t, v)| (t, Prop::U64(*v))), - TProp::F32(cell) => cell.last_before(t).map(|(t, v)| (t, Prop::F32(*v))), - TProp::F64(cell) => cell.last_before(t).map(|(t, v)| (t, Prop::F64(*v))), - TProp::Bool(cell) => cell.last_before(t).map(|(t, v)| (t, Prop::Bool(*v))), - TProp::DTime(cell) => cell.last_before(t).map(|(t, v)| (t, Prop::DTime(*v))), - TProp::NDTime(cell) => cell.last_before(t).map(|(t, v)| (t, Prop::NDTime(*v))), - #[cfg(feature = "arrow")] - TProp::Array(cell) => cell - .last_before(t) - .map(|(t, v)| (t, Prop::Array(v.clone()))), - TProp::List(cell) => cell.last_before(t).map(|(t, v)| (t, Prop::List(v.clone()))), - TProp::Map(cell) => cell.last_before(t).map(|(t, v)| (t, Prop::Map(v.clone()))), - TProp::Decimal(cell) => cell - .last_before(t) - .map(|(t, v)| (t, Prop::Decimal(v.clone()))), - } - } - - fn iter(self) -> impl DoubleEndedIterator + Send + Sync + 'a { - match self { - TProp::Empty => TPropVariants::Empty(iter::empty()), - TProp::Str(cell) => { - TPropVariants::Str(cell.iter().map(|(t, value)| (*t, Prop::Str(value.clone())))) - } - TProp::I32(cell) => { - TPropVariants::I32(cell.iter().map(|(t, value)| (*t, Prop::I32(*value)))) - } - TProp::I64(cell) => { - TPropVariants::I64(cell.iter().map(|(t, value)| (*t, Prop::I64(*value)))) - } - TProp::U8(cell) => { - TPropVariants::U8(cell.iter().map(|(t, value)| (*t, Prop::U8(*value)))) - } - TProp::U16(cell) => { - TPropVariants::U16(cell.iter().map(|(t, value)| (*t, Prop::U16(*value)))) - } - TProp::U32(cell) => { - TPropVariants::U32(cell.iter().map(|(t, value)| (*t, Prop::U32(*value)))) - } - TProp::U64(cell) => { - TPropVariants::U64(cell.iter().map(|(t, value)| (*t, Prop::U64(*value)))) - } - TProp::F32(cell) => { - TPropVariants::F32(cell.iter().map(|(t, value)| (*t, Prop::F32(*value)))) - } - TProp::F64(cell) => { - TPropVariants::F64(cell.iter().map(|(t, value)| (*t, Prop::F64(*value)))) - } - TProp::Bool(cell) => { - TPropVariants::Bool(cell.iter().map(|(t, value)| (*t, Prop::Bool(*value)))) - } - TProp::DTime(cell) => { - TPropVariants::DTime(cell.iter().map(|(t, value)| (*t, Prop::DTime(*value)))) - } - TProp::NDTime(cell) => { - TPropVariants::NDTime(cell.iter().map(|(t, value)| (*t, Prop::NDTime(*value)))) - } - #[cfg(feature = "arrow")] - TProp::Array(cell) => TPropVariants::Array( - cell.iter() - .map(|(t, value)| (*t, Prop::Array(value.clone()))), - ), - TProp::List(cell) => TPropVariants::List( - cell.iter() - .map(|(t, value)| (*t, Prop::List(value.clone()))), - ), - TProp::Map(cell) => { - TPropVariants::Map(cell.iter().map(|(t, value)| (*t, Prop::Map(value.clone())))) - } - TProp::Decimal(cell) => TPropVariants::Decimal( - cell.iter() - .map(|(t, value)| (*t, Prop::Decimal(value.clone()))), - ), - } - } - fn iter_window( - self, + pub(crate) fn iter_window_inner( + &self, r: Range, - ) -> impl DoubleEndedIterator + Send + Sync + 'a { + ) -> impl DoubleEndedIterator + Send + Sync + '_ { match self { TProp::Empty => TPropVariants::Empty(iter::empty()), TProp::Str(cell) => TPropVariants::Str( @@ -383,11 +314,6 @@ impl<'a> TPropOps<'a> for &'a TProp { cell.iter_window(r) .map(|(t, value)| (*t, Prop::NDTime(*value))), ), - #[cfg(feature = "arrow")] - TProp::Array(cell) => TPropVariants::Array( - cell.iter_window(r) - .map(|(t, value)| (*t, Prop::Array(value.clone()))), - ), TProp::List(cell) => TPropVariants::List( cell.iter_window(r) .map(|(t, value)| (*t, Prop::List(value.clone()))), @@ -403,6 +329,86 @@ impl<'a> TPropOps<'a> for &'a TProp { } } + pub(crate) fn iter_inner( + &self, + ) -> impl DoubleEndedIterator + Send + Sync + '_ { + match self { + TProp::Empty => TPropVariants::Empty(iter::empty()), + TProp::Str(cell) => { + TPropVariants::Str(cell.iter().map(|(t, value)| (*t, Prop::Str(value.clone())))) + } + TProp::I32(cell) => { + TPropVariants::I32(cell.iter().map(|(t, value)| (*t, Prop::I32(*value)))) + } + TProp::I64(cell) => { + TPropVariants::I64(cell.iter().map(|(t, value)| (*t, Prop::I64(*value)))) + } + TProp::U8(cell) => { + TPropVariants::U8(cell.iter().map(|(t, value)| (*t, Prop::U8(*value)))) + } + TProp::U16(cell) => { + TPropVariants::U16(cell.iter().map(|(t, value)| (*t, Prop::U16(*value)))) + } + TProp::U32(cell) => { + TPropVariants::U32(cell.iter().map(|(t, value)| (*t, Prop::U32(*value)))) + } + TProp::U64(cell) => { + TPropVariants::U64(cell.iter().map(|(t, value)| (*t, Prop::U64(*value)))) + } + TProp::F32(cell) => { + TPropVariants::F32(cell.iter().map(|(t, value)| (*t, Prop::F32(*value)))) + } + TProp::F64(cell) => { + TPropVariants::F64(cell.iter().map(|(t, value)| (*t, Prop::F64(*value)))) + } + TProp::Bool(cell) => { + TPropVariants::Bool(cell.iter().map(|(t, value)| (*t, Prop::Bool(*value)))) + } + TProp::DTime(cell) => { + TPropVariants::DTime(cell.iter().map(|(t, value)| (*t, Prop::DTime(*value)))) + } + TProp::NDTime(cell) => { + TPropVariants::NDTime(cell.iter().map(|(t, value)| (*t, Prop::NDTime(*value)))) + } + TProp::List(cell) => TPropVariants::List( + cell.iter() + .map(|(t, value)| (*t, Prop::List(value.clone()))), + ), + TProp::Map(cell) => { + TPropVariants::Map(cell.iter().map(|(t, value)| (*t, Prop::Map(value.clone())))) + } + TProp::Decimal(cell) => TPropVariants::Decimal( + cell.iter() + .map(|(t, value)| (*t, Prop::Decimal(value.clone()))), + ), + } + } +} + +impl<'a> TPropOps<'a> for &'a TProp { + fn last_before(&self, t: EventTime) -> Option<(EventTime, Prop)> { + match self { + TProp::Empty => None, + TProp::Str(cell) => cell.last_before(t).map(|(t, v)| (t, Prop::Str(v.clone()))), + TProp::I32(cell) => cell.last_before(t).map(|(t, v)| (t, Prop::I32(*v))), + TProp::I64(cell) => cell.last_before(t).map(|(t, v)| (t, Prop::I64(*v))), + TProp::U8(cell) => cell.last_before(t).map(|(t, v)| (t, Prop::U8(*v))), + TProp::U16(cell) => cell.last_before(t).map(|(t, v)| (t, Prop::U16(*v))), + TProp::U32(cell) => cell.last_before(t).map(|(t, v)| (t, Prop::U32(*v))), + TProp::U64(cell) => cell.last_before(t).map(|(t, v)| (t, Prop::U64(*v))), + TProp::F32(cell) => cell.last_before(t).map(|(t, v)| (t, Prop::F32(*v))), + TProp::F64(cell) => cell.last_before(t).map(|(t, v)| (t, Prop::F64(*v))), + TProp::Bool(cell) => cell.last_before(t).map(|(t, v)| (t, Prop::Bool(*v))), + TProp::DTime(cell) => cell.last_before(t).map(|(t, v)| (t, Prop::DTime(*v))), + TProp::NDTime(cell) => cell.last_before(t).map(|(t, v)| (t, Prop::NDTime(*v))), + TProp::List(cell) => cell.last_before(t).map(|(t, v)| (t, Prop::List(v.clone()))), + TProp::Map(cell) => cell.last_before(t).map(|(t, v)| (t, Prop::Map(v.clone()))), + TProp::Decimal(cell) => cell + .last_before(t) + .map(|(t, v)| (t, Prop::Decimal(v.clone()))), + } + } + fn at(&self, ti: &EventTime) -> Option { match self { TProp::Empty => None, @@ -418,13 +424,43 @@ impl<'a> TPropOps<'a> for &'a TProp { TProp::Bool(cell) => cell.at(ti).map(|v| Prop::Bool(*v)), TProp::DTime(cell) => cell.at(ti).map(|v| Prop::DTime(*v)), TProp::NDTime(cell) => cell.at(ti).map(|v| Prop::NDTime(*v)), - #[cfg(feature = "arrow")] - TProp::Array(cell) => cell.at(ti).map(|v| Prop::Array(v.clone())), TProp::List(cell) => cell.at(ti).map(|v| Prop::List(v.clone())), TProp::Map(cell) => cell.at(ti).map(|v| Prop::Map(v.clone())), TProp::Decimal(cell) => cell.at(ti).map(|v| Prop::Decimal(v.clone())), } } + + fn iter_inner( + self, + range: Option>, + ) -> impl Iterator + Send + Sync + 'a { + match range { + Some(w) => { + let iter = self.iter_window_inner(w); + Either::Right(iter) + } + None => { + let iter = self.iter_inner(); + Either::Left(iter) + } + } + } + + fn iter_inner_rev( + self, + range: Option>, + ) -> impl Iterator + Send + Sync + 'a { + match range { + Some(w) => { + let iter = self.iter_window_inner(w).rev(); + Either::Right(iter) + } + None => { + let iter = self.iter_inner().rev(); + Either::Left(iter) + } + } + } } #[cfg(test)] @@ -435,7 +471,7 @@ mod tprop_tests { #[test] fn t_prop_cell() { - let col = TPropColumn::Bool(LazyVec::from(0, true)); + let col = PropColumn::Bool(LazyVec::from(0, true)); assert_eq!(col.get(0), Some(Prop::Bool(true))); let t_prop = TPropCell::new(&TCell::TCell1(EventTime(0, 0), Some(0)), Some(&col)); diff --git a/raphtory-core/src/lib.rs b/raphtory-core/src/lib.rs index 791b0765ae..c754214f76 100644 --- a/raphtory-core/src/lib.rs +++ b/raphtory-core/src/lib.rs @@ -24,24 +24,8 @@ //! * `macOS` //! -use std::{thread, time::Duration}; - -use parking_lot::RwLock; - pub mod entities; #[cfg(feature = "python")] mod python; pub mod storage; pub mod utils; - -pub(crate) fn loop_lock_write(l: &RwLock) -> parking_lot::RwLockWriteGuard<'_, A> { - const MAX_BACKOFF_US: u64 = 1000; // 1ms max - let mut backoff_us = 1; - loop { - if let Some(guard) = l.try_write_for(Duration::from_micros(50)) { - return guard; - } - thread::park_timeout(Duration::from_micros(backoff_us)); - backoff_us = (backoff_us * 2).min(MAX_BACKOFF_US); - } -} diff --git a/raphtory-core/src/python/time.rs b/raphtory-core/src/python/time.rs index 4c988dcee9..3d22db4146 100644 --- a/raphtory-core/src/python/time.rs +++ b/raphtory-core/src/python/time.rs @@ -1,8 +1,9 @@ use crate::utils::time::{AlignmentUnit, Interval}; -use pyo3::{exceptions::PyTypeError, prelude::*, Bound, FromPyObject, PyAny, PyResult}; +use pyo3::{exceptions::PyTypeError, prelude::*, FromPyObject, PyAny, PyResult}; -impl<'source> FromPyObject<'source> for Interval { - fn extract_bound(interval: &Bound<'source, PyAny>) -> PyResult { +impl<'py> FromPyObject<'_, 'py> for Interval { + type Error = PyErr; + fn extract(interval: Borrowed<'_, 'py, PyAny>) -> PyResult { if let Ok(string) = interval.extract::() { return Ok(string.try_into()?); }; @@ -12,17 +13,20 @@ impl<'source> FromPyObject<'source> for Interval { }; Err(PyTypeError::new_err(format!( - "interval '{interval}' must be a str or an unsigned integer" + "interval '{interval:?}' must be a str or an unsigned integer" ))) } } -impl<'source> FromPyObject<'source> for AlignmentUnit { - fn extract_bound(unit: &Bound<'source, PyAny>) -> PyResult { +impl<'py> FromPyObject<'_, 'py> for AlignmentUnit { + type Error = PyErr; + fn extract(unit: Borrowed<'_, 'py, PyAny>) -> PyResult { if let Ok(string) = unit.extract::() { return Ok(string.try_into()?); }; - Err(PyTypeError::new_err(format!("unit '{unit}' must be a str"))) + Err(PyTypeError::new_err(format!( + "unit '{unit:?}' must be a str" + ))) } } diff --git a/raphtory-core/src/storage/lazy_vec.rs b/raphtory-core/src/storage/lazy_vec.rs index bafe78a507..c2085f432a 100644 --- a/raphtory-core/src/storage/lazy_vec.rs +++ b/raphtory-core/src/storage/lazy_vec.rs @@ -1,6 +1,6 @@ -use raphtory_api::iter::BoxedLIter; +use arrow_array::BooleanArray; use serde::{Deserialize, Serialize}; -use std::{fmt::Debug, iter}; +use std::fmt::Debug; #[derive(thiserror::Error, Debug, PartialEq)] #[error("Cannot set previous value '{previous_value:?}' to '{new_value:?}' in position '{index}'")] @@ -167,49 +167,81 @@ impl LazyVec where A: PartialEq + Default + Debug + Sync + Send + Clone, { + pub fn append(&mut self, items: impl IntoIterator>, mask: &BooleanArray) { + if !matches!(self, LazyVec::LazyVecN(_, _)) { + match self { + LazyVec::Empty => { + *self = LazyVec::LazyVecN(A::default(), MaskedCol::default()); + } + LazyVec::LazyVec1(_, tuples) => { + let mut take = TupleCol::default(); + std::mem::swap(&mut take, tuples); + *self = LazyVec::LazyVecN(A::default(), MaskedCol::from(take)); + } + _ => {} + } + } + + match self { + LazyVec::LazyVecN(_, vector) => { + for (item, is_valid) in items.into_iter().zip(mask.values().iter()) { + if is_valid { + vector.push(item); + } + } + } + _ => unreachable!(), + } + } + // fails if there is already a value set for the given id to a different value - pub(crate) fn set(&mut self, id: usize, value: A) -> Result<(), IllegalSet> { + pub fn upsert(&mut self, id: usize, value: A) { match self { LazyVec::Empty => { *self = Self::from(id, value); - Ok(()) } + LazyVec::LazyVec1(_, tuples) => { + tuples.upsert(id, Some(value)); + self.swap_lazy_types(); + } + LazyVec::LazyVecN(_, vector) => { + vector.upsert(id, Some(value)); + } + } + } + + /// checks if there is already a different value for a given id + pub fn check(&self, id: usize, value: &A) -> Result<(), IllegalSet> { + match self { + LazyVec::Empty => {} LazyVec::LazyVec1(_, tuples) => { if let Some(only_value) = tuples.get(id) { - if only_value != &value { - return Err(IllegalSet::new(id, only_value.clone(), value)); + if only_value != value { + return Err(IllegalSet::new(id, only_value.clone(), value.clone())); } - } else { - tuples.upsert(id, Some(value)); - - self.swap_lazy_types(); } - Ok(()) } LazyVec::LazyVecN(_, vector) => { if let Some(only_value) = vector.get(id) { - if only_value != &value { - return Err(IllegalSet::new(id, only_value.clone(), value)); + if only_value != value { + return Err(IllegalSet::new(id, only_value.clone(), value.clone())); } - } else { - vector.upsert(id, Some(value)); } - Ok(()) } } + Ok(()) } - pub(crate) fn update(&mut self, id: usize, updater: F) -> Result + pub fn update(&mut self, id: usize, updater: F) -> Result where F: FnOnce(&mut A) -> Result, - E: From>, { let b = match self.get_mut(id) { Some(value) => updater(value)?, None => { let mut value = A::default(); let b = updater(&mut value)?; - self.set(id, value)?; + self.upsert(id, value); b } }; @@ -241,28 +273,9 @@ where LazyVec::LazyVec1(A::default(), TupleCol::from(inner)) } - pub(crate) fn filled_ids(&self) -> BoxedLIter<'_, usize> { + pub fn iter(&self) -> Box + Send + '_> { match self { - LazyVec::Empty => Box::new(iter::empty()), - LazyVec::LazyVec1(_, tuples) => Box::new( - tuples - .iter() - .enumerate() - .filter_map(|(id, value)| value.map(|_| id)), - ), - LazyVec::LazyVecN(_, vector) => Box::new( - vector - .iter() - .enumerate() - .filter_map(|(id, value)| value.map(|_| id)), - ), - } - } - - #[cfg(test)] - fn iter(&self) -> Box + Send + '_> { - match self { - LazyVec::Empty => Box::new(iter::empty()), + LazyVec::Empty => Box::new(std::iter::empty()), LazyVec::LazyVec1(default, tuples) => { Box::new(tuples.iter().map(|value| value.unwrap_or(default))) } @@ -272,16 +285,15 @@ where } } - #[cfg(test)] - fn iter_opt(&self) -> Box> + Send + '_> { + pub fn iter_opt(&self) -> Box> + Send + '_> { match self { - LazyVec::Empty => Box::new(iter::empty()), + LazyVec::Empty => Box::new(std::iter::empty()), LazyVec::LazyVec1(_, tuples) => Box::new(tuples.iter()), LazyVec::LazyVecN(_, vector) => Box::new(vector.iter()), } } - pub(crate) fn get(&self, id: usize) -> Option<&A> { + pub fn get(&self, id: usize) -> Option<&A> { match self { LazyVec::LazyVec1(default, tuples) => tuples .get(id) @@ -293,7 +305,7 @@ where } } - pub(crate) fn get_opt(&self, id: usize) -> Option<&A> { + pub fn get_opt(&self, id: usize) -> Option<&A> { match self { LazyVec::LazyVec1(_, tuples) => tuples.get(id), LazyVec::LazyVecN(_, vec) => vec.get(id), @@ -341,7 +353,6 @@ where #[cfg(test)] mod lazy_vec_tests { use super::*; - use itertools::Itertools; use proptest::{arbitrary::Arbitrary, proptest}; fn check_lazy_vec(lazy_vec: &LazyVec, v: Vec>) { @@ -404,9 +415,9 @@ mod lazy_vec_tests { fn normal_operation() { let mut vec = LazyVec::::Empty; - vec.set(5, 55).unwrap(); - vec.set(1, 11).unwrap(); - vec.set(8, 88).unwrap(); + vec.upsert(5, 55); + vec.upsert(1, 11); + vec.upsert(8, 88); assert_eq!(vec.get(5), Some(&55)); assert_eq!(vec.get(1), Some(&11)); assert_eq!(vec.get(0), Some(&0)); @@ -431,14 +442,12 @@ mod lazy_vec_tests { }) .unwrap(); assert_eq!(vec.get(9), Some(&1)); - - assert_eq!(vec.filled_ids().collect_vec(), vec![1, 5, 6, 8, 9]); } #[test] - fn set_fails_if_present() { - let mut vec = LazyVec::from(5, 55); - let result = vec.set(5, 555); + fn check_fails_if_present() { + let vec = LazyVec::from(5, 55); + let result = vec.check(5, &555); assert_eq!(result, Err(IllegalSet::new(5, 55, 555))) } } diff --git a/raphtory-core/src/storage/mod.rs b/raphtory-core/src/storage/mod.rs index 5390f90170..96107238cc 100644 --- a/raphtory-core/src/storage/mod.rs +++ b/raphtory-core/src/storage/mod.rs @@ -1,124 +1,95 @@ use crate::{ - entities::{ - nodes::node_store::NodeStore, - properties::{props::TPropError, tprop::IllegalPropType}, - }, - loop_lock_write, + entities::properties::{props::TPropError, tprop::IllegalPropType}, storage::lazy_vec::IllegalSet, }; -use bigdecimal::BigDecimal; -use itertools::Itertools; +use arrow_schema::ArrowError; +use bigdecimal::{num_bigint::BigInt, BigDecimal}; use lazy_vec::LazyVec; -use lock_api; -use node_entry::NodePtr; -use parking_lot::{RwLock, RwLockReadGuard, RwLockWriteGuard}; -#[cfg(feature = "arrow")] -use raphtory_api::core::entities::properties::prop::PropArray; use raphtory_api::core::{ - entities::{ - properties::prop::{Prop, PropType}, - GidRef, VID, - }, + entities::properties::prop::{prop_col::PropCol, AsPropRef, Prop, PropRef, PropType}, storage::arc_str::ArcStr, }; -use rayon::prelude::*; use rustc_hash::FxHashMap; -use serde::{Deserialize, Serialize}; -use std::{ - collections::HashMap, - fmt::{Debug, Formatter}, - marker::PhantomData, - ops::{Deref, DerefMut, Index, IndexMut}, - sync::{ - atomic::{AtomicUsize, Ordering}, - Arc, - }, -}; +use std::{borrow::Cow, collections::HashMap, fmt::Debug, sync::Arc}; use thiserror::Error; +use crate::storage::string_col::StringCol; +use raphtory_api::core::entities::properties::prop::{ + IntoProp, PropArray, PropMapRef, PropNum, PropUnwrap, +}; + pub mod lazy_vec; pub mod locked_view; -pub mod node_entry; -pub mod raw_edges; +mod string_col; pub mod timeindex; -type ArcRwLockReadGuard = lock_api::ArcRwLockReadGuard; -#[must_use] -pub struct UninitialisedEntry<'a, T, TS> { - offset: usize, - guard: RwLockWriteGuard<'a, TS>, - value: T, -} - -impl<'a, T: Default, TS: DerefMut>> UninitialisedEntry<'a, T, TS> { - pub fn init(mut self) { - if self.offset >= self.guard.len() { - self.guard.resize_with(self.offset + 1, Default::default); - } - self.guard[self.offset] = self.value; - } - pub fn value(&self) -> &T { - &self.value - } -} - -#[inline] -fn resolve(index: usize, num_buckets: usize) -> (usize, usize) { - let bucket = index % num_buckets; - let offset = index / num_buckets; - (bucket, offset) -} - -#[derive(Debug, Serialize, Deserialize, Clone)] -pub struct NodeVec { - data: Arc>, -} - -#[derive(Debug, Serialize, Deserialize, PartialEq, Default)] -pub struct NodeSlot { - nodes: Vec, - t_props_log: TColumns, // not the same size as nodes -} - -#[derive(Debug, Serialize, Deserialize, PartialEq, Default)] +#[derive(Debug, Default)] pub struct TColumns { - t_props_log: Vec, + t_props_log: Vec, num_rows: usize, } impl TColumns { - pub fn push( + pub fn push( &mut self, - row: impl IntoIterator, + props: impl IntoIterator, ) -> Result, TPropError> { let id = self.num_rows; let mut has_props = false; - for (prop_id, prop) in row { + for (prop_id, prop) in props { match self.t_props_log.get_mut(prop_id) { - Some(col) => col.push(prop)?, + Some(col) => col.push(prop.as_prop_ref())?, None => { - let col: TPropColumn = TPropColumn::new(self.num_rows, prop); + let col = PropColumn::new(self.num_rows, prop.as_prop_ref()); + self.t_props_log - .resize_with(prop_id + 1, || TPropColumn::Empty(id)); + .resize_with(prop_id + 1, || PropColumn::Empty(id)); self.t_props_log[prop_id] = col; } } + has_props = true; } if has_props { self.num_rows += 1; + for col in self.t_props_log.iter_mut() { col.grow(self.num_rows); } + Ok(Some(id)) } else { Ok(None) } } - pub(crate) fn get(&self, prop_id: usize) -> Option<&TPropColumn> { + pub fn ensure_column(&mut self, prop_id: usize) { + if self.t_props_log.len() <= prop_id { + self.t_props_log + .resize_with(prop_id + 1, || PropColumn::Empty(self.num_rows)); + } + } + + pub fn push_null(&mut self) -> usize { + let id = self.num_rows; + for col in self.t_props_log.iter_mut() { + col.push_null(); + } + self.num_rows += 1; + id + } + + pub fn get(&self, prop_id: usize) -> Option<&PropColumn> { + self.t_props_log.get(prop_id) + } + + pub fn get_mut(&mut self, prop_id: usize) -> Option<&mut PropColumn> { + self.t_props_log.get_mut(prop_id) + } + + pub fn getx(&self, prop_id: usize) -> Option<&PropColumn> { self.t_props_log.get(prop_id) } @@ -130,13 +101,29 @@ impl TColumns { self.num_rows == 0 } - pub fn iter(&self) -> impl Iterator { + pub fn iter(&self) -> impl Iterator { self.t_props_log.iter() } + + pub fn num_columns(&self) -> usize { + self.t_props_log.len() + } + + pub fn reset_len(&mut self) { + self.num_rows = self + .t_props_log + .iter() + .map(|col| col.len()) + .max() + .unwrap_or(0); + self.t_props_log + .iter_mut() + .for_each(|col| col.grow(self.num_rows)); + } } -#[derive(Debug, Serialize, Deserialize, PartialEq)] -pub enum TPropColumn { +#[derive(Debug)] +pub enum PropColumn { Empty(usize), Bool(LazyVec), U8(LazyVec), @@ -147,10 +134,8 @@ pub enum TPropColumn { I64(LazyVec), F32(LazyVec), F64(LazyVec), - Str(LazyVec), - #[cfg(feature = "arrow")] - Array(LazyVec), - List(LazyVec>>), + Str(StringCol), + List(LazyVec), Map(LazyVec>>), NDTime(LazyVec), DTime(LazyVec>), @@ -160,75 +145,56 @@ pub enum TPropColumn { #[derive(Error, Debug)] pub enum TPropColumnError { #[error(transparent)] - IllegalSetBool(#[from] IllegalSet), - #[error(transparent)] - IllegalSetU8(#[from] IllegalSet), + IllegalSet(IllegalSet), #[error(transparent)] - IllegalSetU16(#[from] IllegalSet), + IllegalType(#[from] IllegalPropType), #[error(transparent)] - IllegalSetU32(#[from] IllegalSet), - #[error(transparent)] - IllegalSetU64(#[from] IllegalSet), - #[error(transparent)] - IllegalSetI32(#[from] IllegalSet), - #[error(transparent)] - IllegalSetI64(#[from] IllegalSet), - #[error(transparent)] - IllegalSetF32(#[from] IllegalSet), - #[error(transparent)] - IllegalSetF64(#[from] IllegalSet), - #[error(transparent)] - IllegalSetStr(#[from] IllegalSet), - #[cfg(feature = "arrow")] - #[error(transparent)] - IllegalSetArray(#[from] IllegalSet), - #[error(transparent)] - IllegalSetList(#[from] IllegalSet>>), - #[error(transparent)] - IllegalSetMap(#[from] IllegalSet>>), - #[error(transparent)] - IllegalSetNDTime(#[from] IllegalSet), - #[error(transparent)] - IllegalSetDTime(#[from] IllegalSet>), - #[error(transparent)] - Decimal(#[from] IllegalSet), - #[error(transparent)] - IllegalPropType(#[from] IllegalPropType), + Arrow(#[from] ArrowError), +} + +impl + Debug> From> for TPropColumnError { + fn from(value: IllegalSet) -> Self { + let previous_value = value.previous_value.into(); + let new_value = value.new_value.into(); + TPropColumnError::IllegalSet(IllegalSet { + index: value.index, + previous_value, + new_value, + }) + } } -impl Default for TPropColumn { +impl Default for PropColumn { fn default() -> Self { - TPropColumn::Empty(0) + PropColumn::Empty(0) } } -impl TPropColumn { - pub(crate) fn new(idx: usize, prop: Prop) -> Self { - let mut col = TPropColumn::default(); - col.set(idx, prop).unwrap(); +impl PropColumn { + pub(crate) fn new(idx: usize, prop: PropRef<'_>) -> Self { + let mut col = PropColumn::default(); + col.upsert(idx, prop).unwrap(); col } pub(crate) fn dtype(&self) -> PropType { match self { - TPropColumn::Empty(_) => PropType::Empty, - TPropColumn::Bool(_) => PropType::Bool, - TPropColumn::U8(_) => PropType::U8, - TPropColumn::U16(_) => PropType::U16, - TPropColumn::U32(_) => PropType::U32, - TPropColumn::U64(_) => PropType::U64, - TPropColumn::I32(_) => PropType::I32, - TPropColumn::I64(_) => PropType::I64, - TPropColumn::F32(_) => PropType::F32, - TPropColumn::F64(_) => PropType::F64, - TPropColumn::Str(_) => PropType::Str, - #[cfg(feature = "arrow")] - TPropColumn::Array(_) => PropType::Array(Box::new(PropType::Empty)), - TPropColumn::List(_) => PropType::List(Box::new(PropType::Empty)), - TPropColumn::Map(_) => PropType::Map(HashMap::new().into()), - TPropColumn::NDTime(_) => PropType::NDTime, - TPropColumn::DTime(_) => PropType::DTime, - TPropColumn::Decimal(_) => PropType::Decimal { scale: 0 }, + PropColumn::Empty(_) => PropType::Empty, + PropColumn::Bool(_) => PropType::Bool, + PropColumn::U8(_) => PropType::U8, + PropColumn::U16(_) => PropType::U16, + PropColumn::U32(_) => PropType::U32, + PropColumn::U64(_) => PropType::U64, + PropColumn::I32(_) => PropType::I32, + PropColumn::I64(_) => PropType::I64, + PropColumn::F32(_) => PropType::F32, + PropColumn::F64(_) => PropType::F64, + PropColumn::Str(_) => PropType::Str, + PropColumn::List(_) => PropType::List(Box::new(PropType::Empty)), + PropColumn::Map(_) => PropType::Map(HashMap::new().into()), + PropColumn::NDTime(_) => PropType::NDTime, + PropColumn::DTime(_) => PropType::DTime, + PropColumn::Decimal(_) => PropType::Decimal { scale: 0 }, } } @@ -238,761 +204,249 @@ impl TPropColumn { } } - pub(crate) fn set(&mut self, index: usize, prop: Prop) -> Result<(), TPropColumnError> { + pub fn upsert(&mut self, index: usize, prop: PropRef<'_>) -> Result<(), TPropColumnError> { self.init_empty_col(&prop); match (self, prop) { - (TPropColumn::Bool(col), Prop::Bool(v)) => col.set(index, v)?, - (TPropColumn::I64(col), Prop::I64(v)) => col.set(index, v)?, - (TPropColumn::U32(col), Prop::U32(v)) => col.set(index, v)?, - (TPropColumn::U64(col), Prop::U64(v)) => col.set(index, v)?, - (TPropColumn::F32(col), Prop::F32(v)) => col.set(index, v)?, - (TPropColumn::F64(col), Prop::F64(v)) => col.set(index, v)?, - (TPropColumn::Str(col), Prop::Str(v)) => col.set(index, v)?, - #[cfg(feature = "arrow")] - (TPropColumn::Array(col), Prop::Array(v)) => col.set(index, v)?, - (TPropColumn::U8(col), Prop::U8(v)) => col.set(index, v)?, - (TPropColumn::U16(col), Prop::U16(v)) => col.set(index, v)?, - (TPropColumn::I32(col), Prop::I32(v)) => col.set(index, v)?, - (TPropColumn::List(col), Prop::List(v)) => col.set(index, v)?, - (TPropColumn::Map(col), Prop::Map(v)) => col.set(index, v)?, - (TPropColumn::NDTime(col), Prop::NDTime(v)) => col.set(index, v)?, - (TPropColumn::DTime(col), Prop::DTime(v)) => col.set(index, v)?, - (TPropColumn::Decimal(col), Prop::Decimal(v)) => col.set(index, v)?, + (PropColumn::Bool(col), PropRef::Bool(v)) => col.upsert(index, v), + (PropColumn::I64(col), PropRef::Num(PropNum::I64(v))) => col.upsert(index, v), + (PropColumn::U32(col), PropRef::Num(PropNum::U32(v))) => col.upsert(index, v), + (PropColumn::U64(col), PropRef::Num(PropNum::U64(v))) => col.upsert(index, v), + (PropColumn::F32(col), PropRef::Num(PropNum::F32(v))) => col.upsert(index, v), + (PropColumn::F64(col), PropRef::Num(PropNum::F64(v))) => col.upsert(index, v), + (PropColumn::Str(col), PropRef::Str(v)) => col.upsert(index, v)?, + (PropColumn::U8(col), PropRef::Num(PropNum::U8(v))) => col.upsert(index, v), + (PropColumn::U16(col), PropRef::Num(PropNum::U16(v))) => col.upsert(index, v), + (PropColumn::I32(col), PropRef::Num(PropNum::I32(v))) => col.upsert(index, v), + (PropColumn::List(col), PropRef::List(v)) => col.upsert(index, v.into_owned()), + (PropColumn::Map(col), PropRef::Map(v)) => match v { + PropMapRef::Mem(map) => col.upsert(index, map.clone()), + PropMapRef::PropCol { map, i } => { + if let Some(entry) = map.get(i).and_then(|prop| prop.into_map()) { + col.upsert(index, entry); + } + } + PropMapRef::Arrow(arc_map) => { + if let Some(prop) = arc_map.into_prop() { + if let Some(map_ref) = prop.as_prop_ref().as_map_ref() { + if let Some(map) = map_ref.as_map() { + col.upsert(index, map.clone()); + } + } + } + } + }, + (PropColumn::NDTime(col), PropRef::NDTime(v)) => col.upsert(index, v), + (PropColumn::DTime(col), PropRef::DTime(v)) => col.upsert(index, v), + (PropColumn::Decimal(col), PropRef::Decimal { num, scale }) => { + col.upsert(index, BigDecimal::from_bigint(num.into(), scale as i64)) + } (col, prop) => { Err(IllegalPropType { expected: col.dtype(), - actual: prop.dtype(), + actual: prop.into_prop().dtype(), })?; } } Ok(()) } - pub(crate) fn push(&mut self, prop: Prop) -> Result<(), IllegalPropType> { - self.init_empty_col(&prop); + pub fn check(&self, index: usize, prop: &PropRef<'_>) -> Result<(), TPropColumnError> { match (self, prop) { - (TPropColumn::Bool(col), Prop::Bool(v)) => col.push(Some(v)), - (TPropColumn::U8(col), Prop::U8(v)) => col.push(Some(v)), - (TPropColumn::I64(col), Prop::I64(v)) => col.push(Some(v)), - (TPropColumn::U32(col), Prop::U32(v)) => col.push(Some(v)), - (TPropColumn::U64(col), Prop::U64(v)) => col.push(Some(v)), - (TPropColumn::F32(col), Prop::F32(v)) => col.push(Some(v)), - (TPropColumn::F64(col), Prop::F64(v)) => col.push(Some(v)), - (TPropColumn::Str(col), Prop::Str(v)) => col.push(Some(v)), - #[cfg(feature = "arrow")] - (TPropColumn::Array(col), Prop::Array(v)) => col.push(Some(v)), - (TPropColumn::U16(col), Prop::U16(v)) => col.push(Some(v)), - (TPropColumn::I32(col), Prop::I32(v)) => col.push(Some(v)), - (TPropColumn::List(col), Prop::List(v)) => col.push(Some(v)), - (TPropColumn::Map(col), Prop::Map(v)) => col.push(Some(v)), - (TPropColumn::NDTime(col), Prop::NDTime(v)) => col.push(Some(v)), - (TPropColumn::DTime(col), Prop::DTime(v)) => col.push(Some(v)), - (TPropColumn::Decimal(col), Prop::Decimal(v)) => col.push(Some(v)), + (PropColumn::Empty(_), _) => {} + (PropColumn::Bool(col), PropRef::Bool(v)) => col.check(index, v)?, + (PropColumn::I64(col), PropRef::Num(PropNum::I64(v))) => col.check(index, v)?, + (PropColumn::U32(col), PropRef::Num(PropNum::U32(v))) => col.check(index, v)?, + (PropColumn::U64(col), PropRef::Num(PropNum::U64(v))) => col.check(index, v)?, + (PropColumn::F32(col), PropRef::Num(PropNum::F32(v))) => col.check(index, v)?, + (PropColumn::F64(col), PropRef::Num(PropNum::F64(v))) => col.check(index, v)?, + (PropColumn::Str(col), PropRef::Str(v)) => col.check(index, v)?, + (PropColumn::U8(col), PropRef::Num(PropNum::U8(v))) => col.check(index, v)?, + (PropColumn::U16(col), PropRef::Num(PropNum::U16(v))) => col.check(index, v)?, + (PropColumn::I32(col), PropRef::Num(PropNum::I32(v))) => col.check(index, v)?, + (PropColumn::List(col), PropRef::List(v)) => col.check(index, v)?, + (PropColumn::Map(col), PropRef::Map(v)) => col.check(index, &v.as_mem())?, + (PropColumn::NDTime(col), PropRef::NDTime(v)) => col.check(index, v)?, + (PropColumn::DTime(col), PropRef::DTime(v)) => col.check(index, v)?, + (PropColumn::Decimal(col), PropRef::Decimal { num, scale }) => col.check( + index, + &BigDecimal::from_bigint(BigInt::from(*num), *scale as i64), + )?, (col, prop) => { - return Err(IllegalPropType { + Err(IllegalPropType { expected: col.dtype(), - actual: prop.dtype(), - }) + actual: prop.clone().into_prop().dtype(), + })?; } } Ok(()) } - fn init_empty_col(&mut self, prop: &Prop) { - if let TPropColumn::Empty(len) = self { - match prop { - Prop::Bool(_) => *self = TPropColumn::Bool(LazyVec::with_len(*len)), - Prop::I64(_) => *self = TPropColumn::I64(LazyVec::with_len(*len)), - Prop::U32(_) => *self = TPropColumn::U32(LazyVec::with_len(*len)), - Prop::U64(_) => *self = TPropColumn::U64(LazyVec::with_len(*len)), - Prop::F32(_) => *self = TPropColumn::F32(LazyVec::with_len(*len)), - Prop::F64(_) => *self = TPropColumn::F64(LazyVec::with_len(*len)), - Prop::Str(_) => *self = TPropColumn::Str(LazyVec::with_len(*len)), - #[cfg(feature = "arrow")] - Prop::Array(_) => *self = TPropColumn::Array(LazyVec::with_len(*len)), - Prop::U8(_) => *self = TPropColumn::U8(LazyVec::with_len(*len)), - Prop::U16(_) => *self = TPropColumn::U16(LazyVec::with_len(*len)), - Prop::I32(_) => *self = TPropColumn::I32(LazyVec::with_len(*len)), - Prop::List(_) => *self = TPropColumn::List(LazyVec::with_len(*len)), - Prop::Map(_) => *self = TPropColumn::Map(LazyVec::with_len(*len)), - Prop::NDTime(_) => *self = TPropColumn::NDTime(LazyVec::with_len(*len)), - Prop::DTime(_) => *self = TPropColumn::DTime(LazyVec::with_len(*len)), - Prop::Decimal(_) => *self = TPropColumn::Decimal(LazyVec::with_len(*len)), - } - } - } - - fn is_empty(&self) -> bool { - matches!(self, TPropColumn::Empty(_)) - } - - pub(crate) fn push_null(&mut self) { - match self { - TPropColumn::Bool(col) => col.push(None), - TPropColumn::I64(col) => col.push(None), - TPropColumn::U32(col) => col.push(None), - TPropColumn::U64(col) => col.push(None), - TPropColumn::F32(col) => col.push(None), - TPropColumn::F64(col) => col.push(None), - TPropColumn::Str(col) => col.push(None), - #[cfg(feature = "arrow")] - TPropColumn::Array(col) => col.push(None), - TPropColumn::U8(col) => col.push(None), - TPropColumn::U16(col) => col.push(None), - TPropColumn::I32(col) => col.push(None), - TPropColumn::List(col) => col.push(None), - TPropColumn::Map(col) => col.push(None), - TPropColumn::NDTime(col) => col.push(None), - TPropColumn::DTime(col) => col.push(None), - TPropColumn::Decimal(col) => col.push(None), - TPropColumn::Empty(count) => { - *count += 1; - } - } - } - - pub fn get(&self, index: usize) -> Option { - match self { - TPropColumn::Bool(col) => col.get_opt(index).map(|prop| (*prop).into()), - TPropColumn::I64(col) => col.get_opt(index).map(|prop| (*prop).into()), - TPropColumn::U32(col) => col.get_opt(index).map(|prop| (*prop).into()), - TPropColumn::U64(col) => col.get_opt(index).map(|prop| (*prop).into()), - TPropColumn::F32(col) => col.get_opt(index).map(|prop| (*prop).into()), - TPropColumn::F64(col) => col.get_opt(index).map(|prop| (*prop).into()), - TPropColumn::Str(col) => col.get_opt(index).map(|prop| prop.into()), - #[cfg(feature = "arrow")] - TPropColumn::Array(col) => col.get_opt(index).map(|prop| Prop::Array(prop.clone())), - TPropColumn::U8(col) => col.get_opt(index).map(|prop| (*prop).into()), - TPropColumn::U16(col) => col.get_opt(index).map(|prop| (*prop).into()), - TPropColumn::I32(col) => col.get_opt(index).map(|prop| (*prop).into()), - TPropColumn::List(col) => col.get_opt(index).map(|prop| Prop::List(prop.clone())), - TPropColumn::Map(col) => col.get_opt(index).map(|prop| Prop::Map(prop.clone())), - TPropColumn::NDTime(col) => col.get_opt(index).map(|prop| Prop::NDTime(*prop)), - TPropColumn::DTime(col) => col.get_opt(index).map(|prop| Prop::DTime(*prop)), - TPropColumn::Decimal(col) => col.get_opt(index).map(|prop| Prop::Decimal(prop.clone())), - TPropColumn::Empty(_) => None, - } - } - - pub(crate) fn len(&self) -> usize { - match self { - TPropColumn::Bool(col) => col.len(), - TPropColumn::I64(col) => col.len(), - TPropColumn::U32(col) => col.len(), - TPropColumn::U64(col) => col.len(), - TPropColumn::F32(col) => col.len(), - TPropColumn::F64(col) => col.len(), - TPropColumn::Str(col) => col.len(), - #[cfg(feature = "arrow")] - TPropColumn::Array(col) => col.len(), - TPropColumn::U8(col) => col.len(), - TPropColumn::U16(col) => col.len(), - TPropColumn::I32(col) => col.len(), - TPropColumn::List(col) => col.len(), - TPropColumn::Map(col) => col.len(), - TPropColumn::NDTime(col) => col.len(), - TPropColumn::DTime(col) => col.len(), - TPropColumn::Decimal(col) => col.len(), - TPropColumn::Empty(count) => *count, - } - } -} - -impl NodeSlot { - pub fn t_props_log(&self) -> &TColumns { - &self.t_props_log - } - - pub fn t_props_log_mut(&mut self) -> &mut TColumns { - &mut self.t_props_log - } - - pub fn iter(&self) -> impl Iterator> { - self.nodes - .iter() - .filter(|v| v.is_initialised()) - .map(|ns| NodePtr::new(ns, &self.t_props_log)) - } - - pub fn par_iter(&self) -> impl ParallelIterator> { - self.nodes - .par_iter() - .filter(|v| v.is_initialised()) - .map(|ns| NodePtr::new(ns, &self.t_props_log)) - } -} - -impl Index for NodeSlot { - type Output = NodeStore; - - fn index(&self, index: usize) -> &Self::Output { - &self.nodes[index] - } -} - -impl IndexMut for NodeSlot { - fn index_mut(&mut self, index: usize) -> &mut Self::Output { - &mut self.nodes[index] - } -} - -impl Deref for NodeSlot { - type Target = Vec; - - fn deref(&self) -> &Self::Target { - &self.nodes - } -} - -impl DerefMut for NodeSlot { - fn deref_mut(&mut self) -> &mut Self::Target { - &mut self.nodes - } -} - -impl PartialEq for NodeVec { - fn eq(&self, other: &Self) -> bool { - let a = self.data.read_recursive(); - let b = other.data.read_recursive(); - a.deref() == b.deref() - } -} - -impl Default for NodeVec { - fn default() -> Self { - Self::new() - } -} - -impl NodeVec { - pub fn new() -> Self { - Self { - data: Arc::new(RwLock::new(Default::default())), - } - } - - #[inline] - pub fn read_arc_lock(&self) -> ArcRwLockReadGuard { - RwLock::read_arc_recursive(&self.data) - } - - #[inline] - pub fn write(&self) -> impl DerefMut + '_ { - loop_lock_write(&self.data) - } - - #[inline] - pub fn read(&self) -> impl Deref + '_ { - self.data.read_recursive() - } -} - -#[derive(Serialize, Deserialize)] -pub struct NodeStorage { - pub(crate) data: Box<[NodeVec]>, - len: AtomicUsize, -} - -impl Debug for NodeStorage { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - f.debug_struct("NodeStorage") - .field("len", &self.len()) - .field("data", &self.read_lock().iter().collect_vec()) - .finish() - } -} - -impl PartialEq for NodeStorage { - fn eq(&self, other: &Self) -> bool { - self.data.eq(&other.data) - } -} - -#[derive(Debug)] -pub struct ReadLockedStorage { - pub(crate) locks: Vec>>, - len: usize, -} - -impl ReadLockedStorage { - fn resolve(&self, index: VID) -> (usize, usize) { - let index: usize = index.into(); - let n = self.locks.len(); - let bucket = index % n; - let offset = index / n; - (bucket, offset) - } - - pub fn len(&self) -> usize { - self.len - } - - pub fn is_empty(&self) -> bool { - self.len == 0 - } - - #[cfg(test)] - pub fn get(&self, index: VID) -> &NodeStore { - let (bucket, offset) = self.resolve(index); - let bucket = &self.locks[bucket]; - &bucket[offset] - } - - #[inline] - pub fn get_entry(&self, index: VID) -> NodePtr<'_> { - let (bucket, offset) = self.resolve(index); - let bucket = &self.locks[bucket]; - NodePtr::new(&bucket[offset], &bucket.t_props_log) - } - - #[inline] - pub fn try_get_entry(&self, index: VID) -> Option> { - let (bucket, offset) = self.resolve(index); - let bucket = self.locks.get(bucket)?; - let node = bucket.get(offset)?; - if node.is_initialised() { - Some(NodePtr::new(node, &bucket.t_props_log)) - } else { - None - } - } - - pub fn iter(&self) -> impl Iterator> + '_ { - self.locks.iter().flat_map(|v| v.iter()) - } - - pub fn par_iter(&self) -> impl ParallelIterator> + '_ { - self.locks.par_iter().flat_map(|v| v.par_iter()) - } -} - -impl NodeStorage { - pub fn count_with_filter) -> bool + Send + Sync>(&self, f: F) -> usize { - self.read_lock().par_iter().filter(|x| f(*x)).count() - } -} - -impl NodeStorage { - #[inline] - fn resolve(&self, index: usize) -> (usize, usize) { - resolve(index, self.data.len()) - } - - #[inline] - pub fn read_lock(&self) -> ReadLockedStorage { - let guards = self - .data - .iter() - .map(|v| Arc::new(v.read_arc_lock())) - .collect(); - ReadLockedStorage { - locks: guards, - len: self.len(), - } - } - - pub fn write_lock(&self) -> WriteLockedNodes<'_> { - WriteLockedNodes { - guards: self.data.iter().map(|lock| lock.data.write()).collect(), - global_len: &self.len, - } - } - - pub fn new(n_locks: usize) -> Self { - let data: Box<[NodeVec]> = (0..n_locks) - .map(|_| NodeVec::new()) - .collect::>() - .into(); - - Self { - data, - len: AtomicUsize::new(0), - } - } - - pub fn push(&self, mut value: NodeStore) -> UninitialisedEntry<'_, NodeStore, NodeSlot> { - let index = self.len.fetch_add(1, Ordering::Relaxed); - value.vid = VID(index); - let (bucket, offset) = self.resolve(index); - let guard = loop_lock_write(&self.data[bucket].data); - UninitialisedEntry { - offset, - guard, - value, - } - } - - pub fn set(&self, value: NodeStore) { - let VID(index) = value.vid; - self.len.fetch_max(index + 1, Ordering::Relaxed); - let (bucket, offset) = self.resolve(index); - let mut guard = loop_lock_write(&self.data[bucket].data); - if guard.len() <= offset { - guard.resize_with(offset + 1, NodeStore::default) - } - guard[offset] = value - } - - #[inline] - pub fn entry(&self, index: VID) -> NodeEntry<'_> { - let index = index.into(); - let (bucket, offset) = self.resolve(index); - let guard = self.data[bucket].data.read_recursive(); - NodeEntry { offset, guard } - } - - /// Get the node if it is initialised - pub fn try_entry(&self, index: VID) -> Option> { - let (bucket, offset) = self.resolve(index.index()); - let guard = self.data.get(bucket)?.data.read_recursive(); - if guard.get(offset)?.is_initialised() { - Some(NodeEntry { offset, guard }) - } else { - None - } - } - - pub fn entry_mut(&self, index: VID) -> EntryMut<'_, RwLockWriteGuard<'_, NodeSlot>> { - let index = index.into(); - let (bucket, offset) = self.resolve(index); - let guard = loop_lock_write(&self.data[bucket].data); - EntryMut { - i: offset, - guard, - _pd: PhantomData, - } - } - - pub fn prop_entry_mut(&self, index: VID) -> impl DerefMut + '_ { - let index = index.into(); - let (bucket, _) = self.resolve(index); - let lock = loop_lock_write(&self.data[bucket].data); - RwLockWriteGuard::map(lock, |data| &mut data.t_props_log) - } - - // This helps get the right locks when adding an edge - #[deprecated(note = "use loop_pair_entry_mut instead")] - pub fn pair_entry_mut(&self, i: VID, j: VID) -> PairEntryMut<'_> { - let i = i.into(); - let j = j.into(); - let (bucket_i, offset_i) = self.resolve(i); - let (bucket_j, offset_j) = self.resolve(j); - // always acquire lock for smaller bucket first to avoid deadlock between two updates for the same pair of buckets - if bucket_i < bucket_j { - let guard_i = self.data[bucket_i].data.write(); - let guard_j = self.data[bucket_j].data.write(); - PairEntryMut::Different { - i: offset_i, - j: offset_j, - guard1: guard_i, - guard2: guard_j, + pub(crate) fn push(&mut self, prop: PropRef<'_>) -> Result<(), TPropColumnError> { + self.init_empty_col(&prop); + match (self, prop) { + (PropColumn::Bool(col), PropRef::Bool(v)) => col.push(Some(v)), + (PropColumn::U8(col), PropRef::Num(PropNum::U8(v))) => col.push(Some(v)), + (PropColumn::I64(col), PropRef::Num(PropNum::I64(v))) => col.push(Some(v)), + (PropColumn::U32(col), PropRef::Num(PropNum::U32(v))) => col.push(Some(v)), + (PropColumn::U64(col), PropRef::Num(PropNum::U64(v))) => col.push(Some(v)), + (PropColumn::F32(col), PropRef::Num(PropNum::F32(v))) => col.push(Some(v)), + (PropColumn::F64(col), PropRef::Num(PropNum::F64(v))) => col.push(Some(v)), + (PropColumn::Str(col), PropRef::Str(v)) => col.push_value(v)?, + (PropColumn::U16(col), PropRef::Num(PropNum::U16(v))) => col.push(Some(v)), + (PropColumn::I32(col), PropRef::Num(PropNum::I32(v))) => col.push(Some(v)), + (PropColumn::List(col), PropRef::List(v)) => col.push(Some(v.into_owned())), + (PropColumn::Map(col), PropRef::Map(v)) => { + // FIXME: if we start bulk loading complex structs this won't do + match v { + PropMapRef::Mem(map) => col.push(Some(map.clone())), + PropMapRef::PropCol { map, i } => { + col.push(map.get(i).and_then(|prop| prop.into_map())) + } + PropMapRef::Arrow(arc_map) => { + if let Some(prop) = arc_map.into_prop() { + if let Some(map_ref) = prop.as_prop_ref().as_map_ref() { + if let Some(map) = map_ref.as_map() { + col.push(Some(map.clone())); + } + } + } + } + } } - } else if bucket_i > bucket_j { - let guard_j = self.data[bucket_j].data.write(); - let guard_i = self.data[bucket_i].data.write(); - PairEntryMut::Different { - i: offset_i, - j: offset_j, - guard1: guard_i, - guard2: guard_j, + (PropColumn::NDTime(col), PropRef::NDTime(v)) => col.push(Some(v)), + (PropColumn::DTime(col), PropRef::DTime(v)) => col.push(Some(v)), + (PropColumn::Decimal(col), PropRef::Decimal { num, scale }) => { + col.push(Some(BigDecimal::from_bigint(num.into(), scale as i64))) } - } else { - PairEntryMut::Same { - i: offset_i, - j: offset_j, - guard: self.data[bucket_i].data.write(), + (col, prop) => { + Err(IllegalPropType { + expected: col.dtype(), + actual: prop.into_prop().dtype(), + })?; } } + Ok(()) } - pub fn loop_pair_entry_mut(&self, i: VID, j: VID) -> PairEntryMut<'_> { - let i = i.into(); - let j = j.into(); - let (bucket_i, offset_i) = self.resolve(i); - let (bucket_j, offset_j) = self.resolve(j); - loop { - if bucket_i < bucket_j { - let guard_i = self.data[bucket_i].data.try_write(); - let guard_j = self.data[bucket_j].data.try_write(); - let maybe_guards = - guard_i - .zip(guard_j) - .map(|(guard_i, guard_j)| PairEntryMut::Different { - i: offset_i, - j: offset_j, - guard1: guard_i, - guard2: guard_j, - }); - if let Some(guards) = maybe_guards { - return guards; - } - } else if bucket_i > bucket_j { - let guard_j = self.data[bucket_j].data.try_write(); - let guard_i = self.data[bucket_i].data.try_write(); - let maybe_guards = - guard_i - .zip(guard_j) - .map(|(guard_i, guard_j)| PairEntryMut::Different { - i: offset_i, - j: offset_j, - guard1: guard_i, - guard2: guard_j, - }); - if let Some(guards) = maybe_guards { - return guards; - } - } else { - let maybe_guard = self.data[bucket_i].data.try_write(); - if let Some(guard) = maybe_guard { - return PairEntryMut::Same { - i: offset_i, - j: offset_j, - guard, - }; - } + fn init_empty_col(&mut self, prop: &PropRef<'_>) { + if let PropColumn::Empty(len) = self { + match prop { + PropRef::Bool(_) => *self = PropColumn::Bool(LazyVec::with_len(*len)), + PropRef::Num(PropNum::I64(_)) => *self = PropColumn::I64(LazyVec::with_len(*len)), + PropRef::Num(PropNum::U32(_)) => *self = PropColumn::U32(LazyVec::with_len(*len)), + PropRef::Num(PropNum::U64(_)) => *self = PropColumn::U64(LazyVec::with_len(*len)), + PropRef::Num(PropNum::F32(_)) => *self = PropColumn::F32(LazyVec::with_len(*len)), + PropRef::Num(PropNum::F64(_)) => *self = PropColumn::F64(LazyVec::with_len(*len)), + PropRef::Str(_) => *self = PropColumn::Str(StringCol::with_len(*len)), + PropRef::Num(PropNum::U8(_)) => *self = PropColumn::U8(LazyVec::with_len(*len)), + PropRef::Num(PropNum::U16(_)) => *self = PropColumn::U16(LazyVec::with_len(*len)), + PropRef::Num(PropNum::I32(_)) => *self = PropColumn::I32(LazyVec::with_len(*len)), + PropRef::List(_) => *self = PropColumn::List(LazyVec::with_len(*len)), + PropRef::Map(_) => *self = PropColumn::Map(LazyVec::with_len(*len)), + PropRef::NDTime(_) => *self = PropColumn::NDTime(LazyVec::with_len(*len)), + PropRef::DTime(_) => *self = PropColumn::DTime(LazyVec::with_len(*len)), + PropRef::Decimal { .. } => *self = PropColumn::Decimal(LazyVec::with_len(*len)), } } } - #[inline] - pub fn len(&self) -> usize { - self.len.load(Ordering::SeqCst) - } - pub fn is_empty(&self) -> bool { - self.len() == 0 - } - - pub fn next_id(&self) -> VID { - VID(self.len.fetch_add(1, Ordering::Relaxed)) - } -} - -pub struct WriteLockedNodes<'a> { - guards: Vec>, - global_len: &'a AtomicUsize, -} - -pub struct NodeShardWriter<'a, S> { - shard: S, - shard_id: usize, - num_shards: usize, - global_len: &'a AtomicUsize, -} - -impl<'a, S> NodeShardWriter<'a, S> -where - S: DerefMut, -{ - #[inline] - fn resolve(&self, index: VID) -> Option { - let (shard_id, offset) = resolve(index.into(), self.num_shards); - (shard_id == self.shard_id).then_some(offset) - } - - #[inline] - pub fn get_mut(&mut self, index: VID) -> Option<&mut NodeStore> { - self.resolve(index).map(|offset| &mut self.shard[offset]) - } - - #[inline] - pub fn get_mut_entry(&mut self, index: VID) -> Option> { - self.resolve(index).map(|offset| EntryMut { - i: offset, - guard: &mut self.shard, - _pd: PhantomData, - }) - } - - #[inline] - pub fn get(&self, index: VID) -> Option<&NodeStore> { - self.resolve(index).map(|offset| &self.shard[offset]) - } - - #[inline] - pub fn t_prop_log_mut(&mut self) -> &mut TColumns { - &mut self.shard.t_props_log + matches!(self, PropColumn::Empty(_)) } - pub fn set(&mut self, vid: VID, gid: GidRef) -> Option> { - self.resolve(vid).map(|offset| { - if offset >= self.shard.len() { - self.shard.resize_with(offset + 1, NodeStore::default); - self.global_len - .fetch_max(vid.index() + 1, Ordering::Relaxed); - } - self.shard[offset] = NodeStore::resolved(gid.to_owned(), vid); - - EntryMut { - i: offset, - guard: &mut self.shard, - _pd: PhantomData, + pub(crate) fn push_null(&mut self) { + match self { + PropColumn::Bool(col) => col.push(None), + PropColumn::I64(col) => col.push(None), + PropColumn::U32(col) => col.push(None), + PropColumn::U64(col) => col.push(None), + PropColumn::F32(col) => col.push(None), + PropColumn::F64(col) => col.push(None), + PropColumn::Str(col) => col.push_null(), + PropColumn::U8(col) => col.push(None), + PropColumn::U16(col) => col.push(None), + PropColumn::I32(col) => col.push(None), + PropColumn::List(col) => col.push(None), + PropColumn::Map(col) => col.push(None), + PropColumn::NDTime(col) => col.push(None), + PropColumn::DTime(col) => col.push(None), + PropColumn::Decimal(col) => col.push(None), + PropColumn::Empty(count) => { + *count += 1; } - }) - } - - pub fn shard_id(&self) -> usize { - self.shard_id - } - - fn resize(&mut self, new_global_len: usize) { - let mut new_len = new_global_len / self.num_shards; - if self.shard_id < new_global_len % self.num_shards { - new_len += 1; } - if new_len > self.shard.len() { - self.shard.resize_with(new_len, Default::default); - self.global_len.fetch_max(new_global_len, Ordering::Relaxed); - } - } -} - -impl<'a> WriteLockedNodes<'a> { - pub fn par_iter_mut( - &mut self, - ) -> impl IndexedParallelIterator> + '_ { - let num_shards = self.guards.len(); - let global_len = self.global_len; - let shards: Vec<&mut NodeSlot> = self - .guards - .iter_mut() - .map(|guard| guard.deref_mut()) - .collect(); - shards - .into_par_iter() - .enumerate() - .map(move |(shard_id, shard)| NodeShardWriter { - shard, - shard_id, - num_shards, - global_len, - }) - } - - pub fn into_par_iter_mut( - self, - ) -> impl IndexedParallelIterator>> + 'a - { - let num_shards = self.guards.len(); - let global_len = self.global_len; - self.guards - .into_par_iter() - .enumerate() - .map(move |(shard_id, shard)| NodeShardWriter { - shard, - shard_id, - num_shards, - global_len, - }) - } - - pub fn resize(&mut self, new_len: usize) { - self.par_iter_mut() - .for_each(|mut shard| shard.resize(new_len)) } - pub fn num_shards(&self) -> usize { - self.guards.len() - } -} - -#[derive(Debug)] -pub struct NodeEntry<'a> { - offset: usize, - guard: RwLockReadGuard<'a, NodeSlot>, -} - -impl NodeEntry<'_> { - #[inline] - pub fn as_ref(&self) -> NodePtr<'_> { - NodePtr::new(&self.guard[self.offset], &self.guard.t_props_log) - } -} - -pub enum PairEntryMut<'a> { - Same { - i: usize, - j: usize, - guard: parking_lot::RwLockWriteGuard<'a, NodeSlot>, - }, - Different { - i: usize, - j: usize, - guard1: parking_lot::RwLockWriteGuard<'a, NodeSlot>, - guard2: parking_lot::RwLockWriteGuard<'a, NodeSlot>, - }, -} - -impl<'a> PairEntryMut<'a> { - pub(crate) fn get_i(&self) -> &NodeStore { - match self { - PairEntryMut::Same { i, guard, .. } => &guard[*i], - PairEntryMut::Different { i, guard1, .. } => &guard1[*i], - } - } - pub(crate) fn get_mut_i(&mut self) -> &mut NodeStore { + pub fn get(&self, index: usize) -> Option { match self { - PairEntryMut::Same { i, guard, .. } => &mut guard[*i], - PairEntryMut::Different { i, guard1, .. } => &mut guard1[*i], - } - } - - pub(crate) fn get_j(&self) -> &NodeStore { + PropColumn::Bool(col) => col.get_opt(index).map(|prop| (*prop).into()), + PropColumn::I64(col) => col.get_opt(index).map(|prop| (*prop).into()), + PropColumn::U32(col) => col.get_opt(index).map(|prop| (*prop).into()), + PropColumn::U64(col) => col.get_opt(index).map(|prop| (*prop).into()), + PropColumn::F32(col) => col.get_opt(index).map(|prop| (*prop).into()), + PropColumn::F64(col) => col.get_opt(index).map(|prop| (*prop).into()), + PropColumn::Str(col) => col.get_opt(index).map(|prop| prop.into()), + PropColumn::U8(col) => col.get_opt(index).map(|prop| (*prop).into()), + PropColumn::U16(col) => col.get_opt(index).map(|prop| (*prop).into()), + PropColumn::I32(col) => col.get_opt(index).map(|prop| (*prop).into()), + PropColumn::List(col) => col.get_opt(index).map(|prop| Prop::List(prop.clone())), + PropColumn::Map(col) => col.get_opt(index).map(|prop| Prop::Map(prop.clone())), + PropColumn::NDTime(col) => col.get_opt(index).map(|prop| Prop::NDTime(*prop)), + PropColumn::DTime(col) => col.get_opt(index).map(|prop| Prop::DTime(*prop)), + PropColumn::Decimal(col) => col.get_opt(index).map(|prop| Prop::Decimal(prop.clone())), + PropColumn::Empty(_) => None, + } + } + + pub fn get_ref(&self, index: usize) -> Option> { match self { - PairEntryMut::Same { j, guard, .. } => &guard[*j], - PairEntryMut::Different { j, guard2, .. } => &guard2[*j], + PropColumn::Bool(col) => col.get_opt(index).map(|prop| PropRef::Bool(*prop)), + PropColumn::I64(col) => col.get_opt(index).map(|prop| PropRef::from(*prop)), + PropColumn::U32(col) => col.get_opt(index).map(|prop| PropRef::from(*prop)), + PropColumn::U64(col) => col.get_opt(index).map(|prop| PropRef::from(*prop)), + PropColumn::F32(col) => col.get_opt(index).map(|prop| PropRef::from(*prop)), + PropColumn::F64(col) => col.get_opt(index).map(|prop| PropRef::from(*prop)), + PropColumn::Str(col) => col.get_opt(index).map(|prop| PropRef::Str(prop.as_ref())), + PropColumn::U8(col) => col.get_opt(index).map(|prop| PropRef::from(*prop)), + PropColumn::U16(col) => col.get_opt(index).map(|prop| PropRef::from(*prop)), + PropColumn::I32(col) => col.get_opt(index).map(|prop| PropRef::from(*prop)), + PropColumn::List(col) => col + .get_opt(index) + .map(|prop| PropRef::List(Cow::Borrowed(prop))), + PropColumn::Map(col) => col.get_opt(index).map(PropRef::from), + PropColumn::NDTime(col) => col.get_opt(index).copied().map(PropRef::from), + PropColumn::DTime(col) => col.get_opt(index).copied().map(PropRef::from), + PropColumn::Decimal(col) => col.get_opt(index).map(PropRef::from), + PropColumn::Empty(_) => None, } } - pub(crate) fn get_mut_j(&mut self) -> &mut NodeStore { + pub(crate) fn len(&self) -> usize { match self { - PairEntryMut::Same { j, guard, .. } => &mut guard[*j], - PairEntryMut::Different { j, guard2, .. } => &mut guard2[*j], - } - } -} - -pub struct EntryMut<'a, NS: 'a> { - i: usize, - guard: NS, - _pd: PhantomData<&'a ()>, -} - -impl<'a, NS> EntryMut<'a, NS> { - pub fn to_mut(&mut self) -> EntryMut<'a, &mut NS> { - EntryMut { - i: self.i, - guard: &mut self.guard, - _pd: self._pd, + PropColumn::Bool(col) => col.len(), + PropColumn::I64(col) => col.len(), + PropColumn::U32(col) => col.len(), + PropColumn::U64(col) => col.len(), + PropColumn::F32(col) => col.len(), + PropColumn::F64(col) => col.len(), + PropColumn::Str(col) => col.len(), + PropColumn::U8(col) => col.len(), + PropColumn::U16(col) => col.len(), + PropColumn::I32(col) => col.len(), + PropColumn::List(col) => col.len(), + PropColumn::Map(col) => col.len(), + PropColumn::NDTime(col) => col.len(), + PropColumn::DTime(col) => col.len(), + PropColumn::Decimal(col) => col.len(), + PropColumn::Empty(count) => *count, } } } -impl<'a, NS: DerefMut> AsMut for EntryMut<'a, NS> { - fn as_mut(&mut self) -> &mut NodeStore { - let slots = self.guard.deref_mut(); - &mut slots[self.i] - } -} - -impl<'a, NS: DerefMut + 'a> EntryMut<'a, &'a mut NS> { - pub fn node_store_mut(&mut self) -> &mut NodeStore { - &mut self.guard[self.i] - } - - pub fn t_props_log_mut(&mut self) -> &mut TColumns { - &mut self.guard.t_props_log - } -} - #[cfg(test)] mod test { - use super::{NodeStorage, TColumns}; - use crate::entities::nodes::node_store::NodeStore; - use proptest::{arbitrary::any, prop_assert_eq, proptest}; - use raphtory_api::core::entities::{properties::prop::Prop, GID, VID}; - use rayon::prelude::*; - use std::borrow::Cow; + use super::TColumns; + use raphtory_api::core::entities::properties::prop::Prop; #[test] fn tcolumns_append_1() { @@ -1117,91 +571,4 @@ mod test { ] ); } - - #[test] - fn add_5_values_to_storage() { - let storage = NodeStorage::new(2); - - for i in 0..5 { - storage.push(NodeStore::empty(i.into())).init(); - } - - assert_eq!(storage.len(), 5); - - for i in 0..5 { - let entry = storage.entry(VID(i)); - assert_eq!(entry.as_ref().node().vid, VID(i)); - } - - let items = storage.read_lock(); - - let actual = items - .iter() - .map(|s| s.node().vid.index()) - .collect::>(); - - assert_eq!(actual, vec![0, 2, 4, 1, 3]); - } - - #[test] - fn test_index_correctness() { - let storage = NodeStorage::new(2); - - for i in 0..5 { - storage.push(NodeStore::empty(i.into())).init(); - } - let locked = storage.read_lock(); - let actual: Vec<_> = (0..5) - .map(|i| (i, locked.get(VID(i)).global_id.to_str())) - .collect(); - - assert_eq!( - actual, - vec![ - (0usize, Cow::Borrowed("0")), - (1, "1".into()), - (2, "2".into()), - (3, "3".into()), - (4, "4".into()) - ] - ); - } - - #[test] - fn test_entry() { - let storage = NodeStorage::new(2); - - for i in 0..5 { - storage.push(NodeStore::empty(i.into())).init(); - } - - for i in 0..5 { - let entry = storage.entry(VID(i)); - assert_eq!(*entry.as_ref().node().global_id.to_str(), i.to_string()); - } - } - - #[test] - fn concurrent_push() { - proptest!(|(v in any::>())| { - let storage = NodeStorage::new(16); - let mut expected = v - .into_par_iter() - .map(|v| { - storage.push(NodeStore::empty(GID::U64(v))).init(); - v - }) - .collect::>(); - - let locked = storage.read_lock(); - let mut actual: Vec<_> = locked - .iter() - .map(|n| n.node().global_id.as_u64().unwrap()) - .collect(); - - actual.sort(); - expected.sort(); - prop_assert_eq!(actual, expected) - }) - } } diff --git a/raphtory-core/src/storage/node_entry.rs b/raphtory-core/src/storage/node_entry.rs deleted file mode 100644 index ee62ac8e74..0000000000 --- a/raphtory-core/src/storage/node_entry.rs +++ /dev/null @@ -1,140 +0,0 @@ -use super::TColumns; -use crate::entities::{nodes::node_store::NodeStore, properties::tprop::TPropCell}; -use itertools::Itertools; -use raphtory_api::core::{ - entities::{ - edges::edge_ref::EdgeRef, - properties::{prop::Prop, tprop::TPropOps}, - LayerIds, - }, - storage::timeindex::EventTime, - Direction, -}; -use std::{ - fmt::{Debug, Formatter}, - ops::Range, -}; - -#[derive(Copy, Clone)] -pub struct MemRow<'a> { - cols: &'a TColumns, - row: Option, -} - -impl<'a> Debug for MemRow<'a> { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - f.debug_list().entries(*self).finish() - } -} - -impl<'a> MemRow<'a> { - pub fn new(cols: &'a TColumns, row: Option) -> Self { - Self { cols, row } - } -} - -impl<'a> IntoIterator for MemRow<'a> { - type Item = (usize, Option); - - type IntoIter = Box + 'a>; - - fn into_iter(self) -> Self::IntoIter { - Box::new( - self.cols - .iter() - .enumerate() - .map(move |(i, col)| (i, self.row.and_then(|row| col.get(row)))), - ) - } -} - -#[derive(Copy, Clone)] -pub struct NodePtr<'a> { - pub node: &'a NodeStore, - t_props_log: &'a TColumns, -} - -impl<'a> NodePtr<'a> { - pub fn edges_iter( - self, - layers: &LayerIds, - dir: Direction, - ) -> impl Iterator + 'a { - self.node.edge_tuples(layers, dir) - } -} - -impl<'a> Debug for NodePtr<'a> { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - f.debug_struct("Node") - .field("gid", self.node.global_id()) - .field("vid", &self.node.vid) - .field("node_type", &self.node.node_type) - .field("layers", &self.node.layers) - .field( - "metadata", - &self - .node - .metadata_ids() - .filter_map(|i| Some((i, self.node.metadata(i)?))) - .collect_vec(), - ) - .field("temporal_properties", &self.into_rows().collect_vec()) - .field("additions", self.node.timestamps()) - .finish() - } -} - -impl<'a> NodePtr<'a> { - pub fn new(node: &'a NodeStore, t_props_log: &'a TColumns) -> Self { - Self { node, t_props_log } - } - - pub fn node(self) -> &'a NodeStore { - self.node - } - - pub fn t_prop(self, prop_id: usize) -> TPropCell<'a> { - TPropCell::new( - &self.node.timestamps().props_ts, - self.t_props_log.get(prop_id), - ) - } - - pub fn temporal_prop_ids(self) -> impl Iterator + 'a { - self.t_props_log - .t_props_log - .iter() - .enumerate() - .filter_map(|(id, col)| (!col.is_empty()).then_some(id)) - } - - pub fn into_rows(self) -> impl Iterator)> { - self.node - .timestamps() - .props_ts - .iter() - .map(move |(t, &row)| (*t, MemRow::new(self.t_props_log, row))) - } - - pub fn last_before_row(self, t: EventTime) -> Vec<(usize, Prop)> { - self.t_props_log - .iter() - .enumerate() - .filter_map(|(prop_id, _)| { - let t_prop = self.t_prop(prop_id); - t_prop.last_before(t).map(|(_, v)| (prop_id, v)) - }) - .collect() - } - - pub fn into_rows_window( - self, - w: Range, - ) -> impl Iterator)> + Send + Sync { - let tcell = &self.node.timestamps().props_ts; - tcell - .iter_window(w) - .map(move |(t, row)| (*t, MemRow::new(self.t_props_log, *row))) - } -} diff --git a/raphtory-core/src/storage/raw_edges.rs b/raphtory-core/src/storage/raw_edges.rs deleted file mode 100644 index b7dd07373c..0000000000 --- a/raphtory-core/src/storage/raw_edges.rs +++ /dev/null @@ -1,453 +0,0 @@ -use super::{resolve, timeindex::TimeIndex}; -use crate::{ - entities::edges::edge_store::{EdgeLayer, EdgeStore, MemEdge}, - loop_lock_write, -}; -use itertools::Itertools; -use lock_api::ArcRwLockReadGuard; -use parking_lot::{RwLock, RwLockReadGuard, RwLockWriteGuard}; -use raphtory_api::core::{entities::EID, storage::timeindex::EventTime}; -use rayon::prelude::*; -use serde::{Deserialize, Serialize}; -use std::{ - fmt::{Debug, Formatter}, - ops::{Deref, DerefMut}, - sync::{ - atomic::{self, AtomicUsize, Ordering}, - Arc, - }, -}; - -#[derive(Debug, Serialize, Deserialize, PartialEq)] -pub struct EdgeShard { - edge_ids: Vec, - props: Vec>, - additions: Vec>>, - deletions: Vec>>, -} - -#[must_use] -pub struct UninitialisedEdge<'a> { - guard: RwLockWriteGuard<'a, EdgeShard>, - offset: usize, - value: EdgeStore, -} - -impl<'a> UninitialisedEdge<'a> { - pub fn init(mut self) -> EdgeWGuard<'a> { - self.guard.insert(self.offset, self.value); - EdgeWGuard { - guard: self.guard, - i: self.offset, - } - } - - pub fn value(&self) -> &EdgeStore { - &self.value - } - - pub fn value_mut(&mut self) -> &mut EdgeStore { - &mut self.value - } -} - -impl EdgeShard { - pub fn insert(&mut self, index: usize, value: EdgeStore) { - if index >= self.edge_ids.len() { - self.edge_ids.resize_with(index + 1, Default::default); - } - self.edge_ids[index] = value; - } - - pub fn edge_store(&self, index: usize) -> &EdgeStore { - &self.edge_ids[index] - } - - pub fn internal_num_layers(&self) -> usize { - self.additions.len().max(self.deletions.len()) - } - - pub fn additions(&self, index: usize, layer_id: usize) -> Option<&TimeIndex> { - self.additions.get(layer_id).and_then(|add| add.get(index)) - } - - pub fn deletions(&self, index: usize, layer_id: usize) -> Option<&TimeIndex> { - self.deletions.get(layer_id).and_then(|del| del.get(index)) - } - - pub fn props(&self, index: usize, layer_id: usize) -> Option<&EdgeLayer> { - self.props.get(layer_id).and_then(|props| props.get(index)) - } - - pub fn props_iter(&self, index: usize) -> impl Iterator { - self.props - .iter() - .enumerate() - .filter_map(move |(id, layer)| layer.get(index).map(|l| (id, l))) - } -} - -#[derive(Clone, Serialize, Deserialize)] -pub struct EdgesStorage { - shards: Arc<[Arc>]>, - len: Arc, -} - -impl Debug for EdgesStorage { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - f.debug_struct("EdgesStorage") - .field("len", &self.len()) - .field("data", &self.read_lock().iter().collect_vec()) - .finish() - } -} - -impl PartialEq for EdgesStorage { - fn eq(&self, other: &Self) -> bool { - self.shards.len() == other.shards.len() - && self - .shards - .iter() - .zip(other.shards.iter()) - .all(|(a, b)| a.read_recursive().eq(&b.read_recursive())) - } -} - -impl Default for EdgesStorage { - fn default() -> Self { - Self::new(rayon::current_num_threads()) - } -} - -impl EdgesStorage { - pub fn new(num_shards: usize) -> Self { - let shards = (0..num_shards).map(|_| { - Arc::new(RwLock::new(EdgeShard { - edge_ids: vec![], - props: Vec::with_capacity(0), - additions: Vec::with_capacity(1), - deletions: Vec::with_capacity(0), - })) - }); - EdgesStorage { - shards: shards.collect(), - len: Arc::new(AtomicUsize::new(0)), - } - } - - #[inline] - pub fn len(&self) -> usize { - self.len.load(atomic::Ordering::SeqCst) - } - - pub fn next_id(&self) -> EID { - EID(self.len.fetch_add(1, Ordering::Relaxed)) - } - - pub fn read_lock(&self) -> LockedEdges { - LockedEdges { - shards: self - .shards - .iter() - .map(|shard| Arc::new(shard.read_arc_recursive())) - .collect(), - len: self.len(), - } - } - - pub fn write_lock(&self) -> WriteLockedEdges<'_> { - WriteLockedEdges { - shards: self.shards.iter().map(|shard| shard.write()).collect(), - global_len: &self.len, - } - } - - #[inline] - fn resolve(&self, index: usize) -> (usize, usize) { - resolve(index, self.shards.len()) - } - - pub(crate) fn push(&self, mut value: EdgeStore) -> UninitialisedEdge<'_> { - let index = self.len.fetch_add(1, atomic::Ordering::Relaxed); - value.eid = EID(index); - let (bucket, offset) = self.resolve(index); - let guard = loop_lock_write(&self.shards[bucket]); - UninitialisedEdge { - guard, - offset, - value, - } - } - - pub fn get_edge_mut(&self, eid: EID) -> EdgeWGuard<'_> { - let (bucket, offset) = self.resolve(eid.into()); - EdgeWGuard { - guard: loop_lock_write(&self.shards[bucket]), - i: offset, - } - } - - pub fn get_edge(&self, eid: EID) -> EdgeRGuard<'_> { - let (bucket, offset) = self.resolve(eid.into()); - EdgeRGuard { - guard: self.shards[bucket].read_recursive(), - offset, - } - } - - pub fn try_get_edge(&self, eid: EID) -> Option> { - let (bucket, offset) = self.resolve(eid.into()); - let guard = self.shards.get(bucket)?.read(); - if guard.edge_ids.get(offset)?.initialised() { - Some(EdgeRGuard { guard, offset }) - } else { - None - } - } -} - -pub struct EdgeWGuard<'a> { - guard: RwLockWriteGuard<'a, EdgeShard>, - i: usize, -} - -impl<'a> EdgeWGuard<'a> { - pub fn as_mut(&mut self) -> MutEdge<'_> { - MutEdge { - guard: self.guard.deref_mut(), - i: self.i, - } - } - - pub fn as_ref(&self) -> MemEdge<'_> { - MemEdge::new(&self.guard, self.i) - } - - pub fn eid(&self) -> EID { - self.as_ref().eid() - } -} - -pub struct MutEdge<'a> { - guard: &'a mut EdgeShard, - i: usize, -} - -impl<'a> MutEdge<'a> { - pub fn as_ref(&self) -> MemEdge<'_> { - MemEdge::new(self.guard, self.i) - } - pub fn eid(&self) -> EID { - self.as_ref().eid() - } - - pub fn edge_store_mut(&mut self) -> &mut EdgeStore { - &mut self.guard.edge_ids[self.i] - } - - pub fn deletions_mut(&mut self, layer_id: usize) -> &mut TimeIndex { - if layer_id >= self.guard.deletions.len() { - self.guard - .deletions - .resize_with(layer_id + 1, Default::default); - } - if self.i >= self.guard.deletions[layer_id].len() { - self.guard.deletions[layer_id].resize_with(self.i + 1, Default::default); - } - &mut self.guard.deletions[layer_id][self.i] - } - - fn has_layer(&self, layer_id: usize) -> bool { - if let Some(additions) = self.guard.additions.get(layer_id) { - if let Some(additions) = additions.get(self.i) { - return !additions.is_empty(); - } - } - if let Some(deletions) = self.guard.deletions.get(layer_id) { - if let Some(deletions) = deletions.get(self.i) { - return !deletions.is_empty(); - } - } - false - } - pub fn additions_mut(&mut self, layer_id: usize) -> &mut TimeIndex { - if layer_id >= self.guard.additions.len() { - self.guard - .additions - .resize_with(layer_id + 1, Default::default); - } - if self.i >= self.guard.additions[layer_id].len() { - self.guard.additions[layer_id].resize_with(self.i + 1, Default::default); - } - &mut self.guard.additions[layer_id][self.i] - } - - pub fn layer_mut(&mut self, layer_id: usize) -> &mut EdgeLayer { - if layer_id >= self.guard.props.len() { - self.guard.props.resize_with(layer_id + 1, Default::default); - } - if self.i >= self.guard.props[layer_id].len() { - self.guard.props[layer_id].resize_with(self.i + 1, Default::default); - } - - &mut self.guard.props[layer_id][self.i] - } - - /// Get a mutable reference to the layer only if it already exists but don't create a new one - pub fn get_layer_mut(&mut self, layer_id: usize) -> Option<&mut EdgeLayer> { - self.has_layer(layer_id).then(|| self.layer_mut(layer_id)) - } -} - -#[derive(Debug)] -pub struct EdgeRGuard<'a> { - guard: RwLockReadGuard<'a, EdgeShard>, - offset: usize, -} - -impl<'a> EdgeRGuard<'a> { - pub fn as_mem_edge(&self) -> MemEdge<'_> { - MemEdge::new(&self.guard, self.offset) - } - - pub fn layer_iter( - &self, - ) -> impl Iterator + '_)> + '_ { - self.guard.props_iter(self.offset) - } -} - -#[derive(Debug)] -pub struct LockedEdges { - shards: Arc<[Arc>]>, - len: usize, -} - -impl LockedEdges { - pub fn get_mem(&self, eid: EID) -> MemEdge<'_> { - let (bucket, offset) = resolve(eid.into(), self.shards.len()); - MemEdge::new(&self.shards[bucket], offset) - } - - pub fn try_get_mem(&self, eid: EID) -> Option> { - let (bucket, offset) = resolve(eid.into(), self.shards.len()); - let guard = self.shards.get(bucket)?; - if guard.edge_ids.get(offset)?.initialised() { - Some(MemEdge::new(guard, offset)) - } else { - None - } - } - - pub fn len(&self) -> usize { - self.len - } - - pub fn iter(&self) -> impl Iterator> + '_ { - self.shards.iter().flat_map(|shard| { - shard - .edge_ids - .iter() - .enumerate() - .filter(|(_, e)| e.initialised()) - .map(move |(offset, _)| MemEdge::new(shard, offset)) - }) - } - - pub fn par_iter(&self) -> impl ParallelIterator> + '_ { - self.shards.par_iter().flat_map(|shard| { - shard - .edge_ids - .par_iter() - .enumerate() - .filter(|(_, e)| e.initialised()) - .map(move |(offset, _)| MemEdge::new(shard, offset)) - }) - } -} - -pub struct EdgeShardWriter<'a, S> { - shard: S, - shard_id: usize, - num_shards: usize, - global_len: &'a AtomicUsize, -} - -impl<'a, S> EdgeShardWriter<'a, S> -where - S: DerefMut, -{ - /// Map an edge id to local offset if it is in the shard - fn resolve(&self, eid: EID) -> Option { - let EID(eid) = eid; - let (bucket, offset) = resolve(eid, self.num_shards); - (bucket == self.shard_id).then_some(offset) - } - - pub fn get_mut(&mut self, eid: EID) -> Option> { - let offset = self.resolve(eid)?; - if self.shard.edge_ids.len() <= offset { - self.global_len.fetch_max(eid.0 + 1, Ordering::Relaxed); - self.shard - .edge_ids - .resize_with(offset + 1, EdgeStore::default) - } - Some(MutEdge { - guard: self.shard.deref_mut(), - i: offset, - }) - } - - pub fn shard_id(&self) -> usize { - self.shard_id - } -} - -pub struct WriteLockedEdges<'a> { - shards: Vec>, - global_len: &'a AtomicUsize, -} - -impl<'a> WriteLockedEdges<'a> { - pub fn par_iter_mut( - &mut self, - ) -> impl IndexedParallelIterator> + '_ { - let num_shards = self.shards.len(); - let shards: Vec<_> = self - .shards - .iter_mut() - .map(|shard| shard.deref_mut()) - .collect(); - let global_len = self.global_len; - shards - .into_par_iter() - .enumerate() - .map(move |(shard_id, shard)| EdgeShardWriter { - shard, - shard_id, - num_shards, - global_len, - }) - } - - pub fn into_par_iter_mut( - self, - ) -> impl IndexedParallelIterator>> + 'a - { - let num_shards = self.shards.len(); - let global_len = self.global_len; - self.shards - .into_par_iter() - .enumerate() - .map(move |(shard_id, shard)| EdgeShardWriter { - shard, - shard_id, - num_shards, - global_len, - }) - } - - pub fn num_shards(&self) -> usize { - self.shards.len() - } -} diff --git a/raphtory-core/src/storage/string_col.rs b/raphtory-core/src/storage/string_col.rs new file mode 100644 index 0000000000..7db77168d3 --- /dev/null +++ b/raphtory-core/src/storage/string_col.rs @@ -0,0 +1,398 @@ +use crate::storage::lazy_vec::IllegalSet; +use arrow_array::{types::StringViewType, GenericByteViewArray}; +use arrow_buffer::{bit_util::set_bit, Buffer, NullBufferBuilder}; +use arrow_data::{ByteView, MAX_INLINE_VIEW_LEN}; +use arrow_schema::ArrowError; + +#[derive(Copy, Clone, Debug)] +struct BlockSizeGrowthStrategy { + current_size: u32, +} +const STARTING_BLOCK_SIZE: BlockSizeGrowthStrategy = BlockSizeGrowthStrategy { + current_size: 8 * 1024, +}; // 8KiB +const MAX_BLOCK_SIZE: u32 = 2 * 1024 * 1024; // 2MiB + +impl BlockSizeGrowthStrategy { + fn next_size(&mut self) -> u32 { + if self.current_size < MAX_BLOCK_SIZE { + // we have fixed start/end block sizes, so we can't overflow + self.current_size = self.current_size.saturating_mul(2); + self.current_size + } else { + MAX_BLOCK_SIZE + } + } +} + +#[inline] +fn inline_view(bytes: &[u8]) -> Option { + let len = bytes.len(); + if len <= MAX_INLINE_VIEW_LEN as usize { + let mut view_buffer = [0; 16]; + view_buffer[0..4].copy_from_slice(&(len as u32).to_le_bytes()); + view_buffer[4..4 + len].copy_from_slice(bytes); + Some(u128::from_le_bytes(view_buffer)) + } else { + None + } +} + +#[derive(Debug)] +pub struct StringColBuilder { + views_buffer: Vec, + null_buffer_builder: NullBufferBuilder, + completed: Vec, + in_progress: Vec, + block_size: BlockSizeGrowthStrategy, +} + +impl StringColBuilder { + pub fn len(&self) -> usize { + self.views_buffer.len() + } + + pub fn with_capacity(capacity: usize) -> Self { + Self { + views_buffer: Vec::with_capacity(capacity), + null_buffer_builder: NullBufferBuilder::new(capacity), + completed: vec![], + in_progress: vec![], + block_size: STARTING_BLOCK_SIZE, + } + } + + pub fn get_value(&self, index: usize) -> Option<&str> { + let view = self.views_buffer.get(index)?; + if self.null_buffer_builder.is_valid(index) { + let len = *view as u32; + let bytes = if len <= MAX_INLINE_VIEW_LEN { + // # Safety + // The view is valid from the builder + unsafe { GenericByteViewArray::::inline_value(view, len as usize) } + } else { + let view = ByteView::from(*view); + if view.buffer_index < self.completed.len() as u32 { + let block = &self.completed[view.buffer_index as usize]; + &block[view.offset as usize..view.offset as usize + view.length as usize] + } else { + &self.in_progress + [view.offset as usize..view.offset as usize + view.length as usize] + } + }; + // # Safety + // Strings in the builder are always valid + Some(unsafe { str::from_utf8_unchecked(bytes) }) + } else { + None + } + } + + /// Append a null value into the builder + #[inline] + pub fn append_null(&mut self) { + self.null_buffer_builder.append_null(); + self.views_buffer.push(0); + } + + #[inline] + fn append_value_inner(&mut self, bytes: &[u8]) -> Result { + let required_cap = self.in_progress.len() + bytes.len(); + if self.in_progress.capacity() < required_cap { + self.flush_in_progress(); + let to_reserve = bytes.len().max(self.block_size.next_size() as usize); + self.in_progress.reserve(to_reserve); + }; + + let offset = self.in_progress.len() as u32; + self.in_progress.extend_from_slice(bytes); + + let buffer_index: u32 = self.completed.len().try_into().map_err(|_| { + ArrowError::InvalidArgumentError(format!( + "Buffer count {} exceeds u32::MAX", + self.completed.len() + )) + })?; + + let length: u32 = bytes.len().try_into().map_err(|_| { + ArrowError::InvalidArgumentError(format!( + "String length {} exceeds u32::MAX", + bytes.len() + )) + })?; + + let view = ByteView { + length, + // This won't panic as we checked the length of prefix earlier. + prefix: u32::from_le_bytes(bytes[0..4].try_into().unwrap()), + buffer_index, + offset, + }; + Ok(view) + } + + #[inline] + fn update_value_inner(&mut self, index: usize, bytes: &[u8]) -> Result<(), ArrowError> { + if let Some(inline_view) = inline_view(bytes) { + // inline, only need to update the view + self.views_buffer[index] = inline_view; + return Ok(()); + } + let new_len: u32 = bytes.len().try_into().map_err(|_| { + ArrowError::InvalidArgumentError(format!( + "String length {} exceeds u32::MAX", + bytes.len() + )) + })?; + let old_view = self.views_buffer[index]; + let old_len = old_view as u32; + if old_len >= new_len { + // can maybe reuse old allocation + let mut view = ByteView::from(old_view); + if view.buffer_index >= self.completed.len() as u32 { + self.in_progress[view.offset as usize..view.offset as usize + bytes.len()] + .copy_from_slice(bytes); + view.length = new_len; + view.prefix = u32::from_le_bytes(bytes[0..4].try_into().unwrap()); + self.views_buffer[index] = view.into(); + return Ok(()); + } + } + let view = self.append_value_inner(bytes)?; + self.views_buffer[index] = view.into(); + Ok(()) + } + + #[inline] + pub fn try_append_value(&mut self, value: &str) -> Result<(), ArrowError> { + let v: &[u8] = value.as_ref(); + + if let Some(view) = inline_view(v) { + self.views_buffer.push(view); + self.null_buffer_builder.append_non_null(); + return Ok(()); + } + + let view = self.append_value_inner(v)?; + self.views_buffer.push(view.into()); + self.null_buffer_builder.append_non_null(); + + Ok(()) + } + + #[inline] + pub fn append_value(&mut self, value: &str) { + self.try_append_value(value).unwrap(); + } + + pub fn upsert_value(&mut self, index: usize, value: &str) -> Result<(), ArrowError> { + if index >= self.len() { + for _ in self.len()..index { + self.append_null(); + } + self.try_append_value(value) + } else { + let bytes = value.as_bytes(); + if let Some(inline_view) = inline_view(bytes) { + // inline, only need to update the view + self.views_buffer[index] = inline_view; + } else { + self.update_value_inner(index, bytes)?; + } + // set new entry as valid + if !self.null_buffer_builder.is_valid(index) { + let nulls = self + .null_buffer_builder + .as_slice_mut() + .expect("NullBufferBuilder with nulls should be materialized"); + set_bit(nulls, index); + } + Ok(()) + } + } + + /// Flushes the in progress block if any + #[inline] + fn flush_in_progress(&mut self) { + if !self.in_progress.is_empty() { + let f = Buffer::from_vec(std::mem::take(&mut self.in_progress)); + self.push_completed(f) + } + } + + /// Append a block to `self.completed`, checking for overflow + #[inline] + fn push_completed(&mut self, block: Buffer) { + assert!(block.len() < u32::MAX as usize, "Block too large"); + assert!(self.completed.len() < u32::MAX as usize, "Too many blocks"); + self.completed.push(block); + } +} + +#[derive(Debug)] +pub enum StringCol { + Empty { + len: usize, + }, + One { + len: usize, + index: usize, + value: String, + }, + Many { + values: StringColBuilder, + }, +} + +impl Default for StringCol { + fn default() -> Self { + StringCol::Empty { len: 0 } + } +} +impl StringCol { + pub fn with_len(len: usize) -> Self { + StringCol::Empty { len } + } + + pub fn len(&self) -> usize { + match self { + StringCol::Empty { len } | StringCol::One { len, .. } => *len, + StringCol::Many { values } => values.len(), + } + } + + pub fn get_opt(&self, i: usize) -> Option<&str> { + match self { + StringCol::Empty { .. } => None, + StringCol::One { index, value, .. } => { + if i == *index { + Some(value) + } else { + None + } + } + StringCol::Many { values } => values.get_value(i), + } + } + + pub fn upsert(&mut self, new_index: usize, new_value: &str) -> Result<(), ArrowError> { + match self { + StringCol::Empty { len } => { + let len = (*len).max(new_index + 1); + *self = StringCol::One { + len, + index: new_index, + value: new_value.to_string(), + }; + } + StringCol::One { len, index, value } => { + if *index == new_index { + *value = new_value.to_string(); + } else { + let len = (*len).max(new_index + 1); + let (first_index, first_value, second_index, second_value) = + if *index < new_index { + (*index, value.as_str(), new_index, new_value) + } else { + (new_index, new_value, *index, value.as_str()) + }; + let mut values = StringColBuilder::with_capacity(len); + for _ in 0..first_index { + values.append_null(); + } + values.append_value(first_value); + for _ in first_index + 1..second_index { + values.append_null(); + } + values.append_value(second_value); + for _ in second_index + 1..len { + values.append_null(); + } + *self = StringCol::Many { values }; + } + } + StringCol::Many { values } => values.upsert_value(new_index, new_value)?, + } + Ok(()) + } + + pub fn check(&self, new_index: usize, new_value: &str) -> Result<(), IllegalSet> { + if let Some(old_value) = self.get_opt(new_index) { + if old_value != new_value { + return Err(IllegalSet::new( + new_index, + old_value.to_owned(), + new_value.to_owned(), + )); + } + } + Ok(()) + } + + pub fn push_value(&mut self, new_value: &str) -> Result<(), ArrowError> { + match self { + StringCol::Empty { len } => { + let index = *len; + let len = index + 1; + let value = new_value.to_owned(); + *self = StringCol::One { len, index, value } + } + StringCol::One { index, value, len } => { + let mut values = StringColBuilder::with_capacity(*len + 1); + for _ in 0..*index { + values.append_null(); + } + values.try_append_value(value)?; + for _ in *index + 1..*len { + values.append_null(); + } + values.try_append_value(new_value)?; + *self = StringCol::Many { values }; + } + StringCol::Many { values } => values.try_append_value(new_value)?, + } + Ok(()) + } + + pub fn push_null(&mut self) { + match self { + StringCol::Empty { len } => *len += 1, + StringCol::One { len, .. } => *len += 1, + StringCol::Many { values } => values.append_null(), + } + } +} + +#[cfg(test)] +mod tests { + use crate::storage::string_col::StringCol; + use proptest::{arbitrary::any, proptest}; + use raphtory_api::core::storage::arc_str::OptionAsStr; + + #[test] + fn test_upsert_and_push() { + proptest!(|(mut old_values in proptest::collection::vec(any::>(), 0..100usize), new_value in any::(), new_index in 0..100usize)|{ + let mut col = StringCol::default(); + for v in &old_values { + match v { + None => {col.push_null()} + Some(v) => {col.push_value(v).unwrap()} + } + } + assert_eq!(col.len(), old_values.len()); + for (i, v) in old_values.iter().enumerate() { + assert_eq!(col.get_opt(i), v.as_str()); + } + + // upsert + col.upsert(new_index, &new_value).unwrap(); + + old_values.resize(old_values.len().max(new_index+1), None); + old_values[new_index] = Some(new_value); + assert_eq!(col.len(), old_values.len()); + for (i, v) in old_values.iter().enumerate() { + assert_eq!(col.get_opt(i), v.as_str()); + } + }) + } +} diff --git a/raphtory-core/src/storage/timeindex.rs b/raphtory-core/src/storage/timeindex.rs index d14584ac94..c92eedf414 100644 --- a/raphtory-core/src/storage/timeindex.rs +++ b/raphtory-core/src/storage/timeindex.rs @@ -308,7 +308,7 @@ where } fn range(&self, w: Range) -> Self { - let range = match self { + match self { TimeIndexWindow::Empty => TimeIndexWindow::Empty, TimeIndexWindow::Range { timeindex, range } => { let start = max(range.start, w.start); @@ -326,8 +326,7 @@ where timeindex: *timeindex, range: w, }, - }; - range + } } fn first(&self) -> Option { @@ -376,3 +375,29 @@ where } } } + +#[cfg(test)] +mod test { + use crate::{entities::properties::tcell::TCell, storage::timeindex::TimeIndexOps}; + use raphtory_api::core::storage::timeindex::EventTime; + + #[test] + fn window_of_window_not_empty() { + let mut cell: TCell<()> = TCell::default(); + cell.set(EventTime::new(1, 0), ()); + cell.set(EventTime::new(2, 0), ()); + cell.set(EventTime::new(3, 0), ()); + cell.set(EventTime::new(4, 0), ()); + cell.set(EventTime::new(8, 0), ()); + + assert_eq!(cell.iter_t().count(), 5); + + let cell_ref = &cell; + let window = EventTime::new(1, 0)..EventTime::new(8, 0); + let w = TimeIndexOps::range(&cell_ref, window.clone()); + assert_eq!(w.clone().iter_t().count(), 4); + + let w = TimeIndexOps::range(&w, window.clone()); + assert_eq!(w.iter_t().count(), 4); + } +} diff --git a/raphtory-core/src/utils/iter.rs b/raphtory-core/src/utils/iter.rs index 1c49f05c5a..73e2f7baa5 100644 --- a/raphtory-core/src/utils/iter.rs +++ b/raphtory-core/src/utils/iter.rs @@ -1,6 +1,7 @@ use ouroboros::self_referencing; pub use raphtory_api::iter::{BoxedLDIter, BoxedLIter}; +/// Iterator that returns elements from a locked object. #[self_referencing] pub struct GenLockedIter<'a, O, OUT> { owner: O, @@ -37,6 +38,7 @@ impl<'a, O, OUT> GenLockedIter<'a, O, OUT> { } } +/// Double-ended iterator that returns elements from a locked object. #[self_referencing] pub struct GenLockedDIter<'a, O, OUT> { owner: O, diff --git a/raphtory-cypher/Cargo.toml b/raphtory-cypher/Cargo.toml index ecf235c9f6..f7917ab775 100644 --- a/raphtory-cypher/Cargo.toml +++ b/raphtory-cypher/Cargo.toml @@ -15,11 +15,10 @@ edition.workspace = true [dependencies] raphtory = { workspace = true } -pometry-storage = { workspace = true, optional = true } -arrow = { workspace = true } -arrow-buffer = { workspace = true } -arrow-schema = { workspace = true } -arrow-array = { workspace = true } +arrow.workspace = true +arrow-buffer.workspace = true +arrow-schema.workspace = true +arrow-array.workspace = true pest.workspace = true pest_derive.workspace = true @@ -42,6 +41,3 @@ pretty_assertions.workspace = true tempfile.workspace = true tokio.workspace = true clap.workspace = true - -[features] -storage = ["raphtory/storage", "dep:pometry-storage"] diff --git a/raphtory-graphql/Cargo.toml b/raphtory-graphql/Cargo.toml index e2d33f41cb..081ea82f76 100644 --- a/raphtory-graphql/Cargo.toml +++ b/raphtory-graphql/Cargo.toml @@ -15,9 +15,9 @@ homepage.workspace = true [dependencies] raphtory = { workspace = true, features = [ 'vectors', - 'search', "io", ] } +tempfile = { workspace = true } raphtory-api = { workspace = true } raphtory-storage = { workspace = true } base64 = { workspace = true } @@ -51,8 +51,9 @@ rustc-hash = { workspace = true } moka = { workspace = true } rayon = { workspace = true } ahash = { workspace = true } -strum = {workspace = true} -strum_macros = {workspace = true} +strum = { workspace = true } +strum_macros = { workspace = true } +bigdecimal = { workspace = true, features = ["serde"] } # python binding optional dependencies pyo3 = { workspace = true, optional = true } @@ -68,9 +69,9 @@ rust-embed = { workspace = true } parking_lot = { workspace = true } tempfile = { workspace = true } pretty_assertions = { workspace = true } +raphtory = { workspace = true, features = ["test-utils"] } arrow-array = { workspace = true } [features] -storage = ["raphtory/storage"] python = ["dep:pyo3", "raphtory/python"] search = ["raphtory/search"] diff --git a/raphtory-graphql/schema.graphql b/raphtory-graphql/schema.graphql index f096d609fb..c4ea2b45cc 100644 --- a/raphtory-graphql/schema.graphql +++ b/raphtory-graphql/schema.graphql @@ -42,7 +42,20 @@ type CollectionOfMetaGraph { For example, if page(5, 2, 1) is called, a page with 5 items, offset by 11 items (2 pages of 5 + 1), will be returned. """ - page(limit: Int!, offset: Int, pageIndex: Int): [MetaGraph!]! + page( + """ + Maximum number of items to return on this page. + """ + limit: Int!, + """ + Extra items to skip on top of `pageIndex` paging (default 0). + """ + offset: Int, + """ + Zero-based page number; multiplies `limit` to determine where to start (default 0). + """ + pageIndex: Int + ): [MetaGraph!]! """ Returns a count of collection objects. """ @@ -63,7 +76,20 @@ type CollectionOfNamespace { For example, if page(5, 2, 1) is called, a page with 5 items, offset by 11 items (2 pages of 5 + 1), will be returned. """ - page(limit: Int!, offset: Int, pageIndex: Int): [Namespace!]! + page( + """ + Maximum number of items to return on this page. + """ + limit: Int!, + """ + Extra items to skip on top of `pageIndex` paging (default 0). + """ + offset: Int, + """ + Zero-based page number; multiplies `limit` to determine where to start (default 0). + """ + pageIndex: Int + ): [Namespace!]! """ Returns a count of collection objects. """ @@ -84,7 +110,20 @@ type CollectionOfNamespacedItem { For example, if page(5, 2, 1) is called, a page with 5 items, offset by 11 items (2 pages of 5 + 1), will be returned. """ - page(limit: Int!, offset: Int, pageIndex: Int): [NamespacedItem!]! + page( + """ + Maximum number of items to return on this page. + """ + limit: Int!, + """ + Extra items to skip on top of `pageIndex` paging (default 0). + """ + offset: Int, + """ + Zero-based page number; multiplies `limit` to determine where to start (default 0). + """ + pageIndex: Int + ): [NamespacedItem!]! """ Returns a count of collection objects. """ @@ -128,25 +167,45 @@ type Edge { Errors if any of the layers do not exist. """ - layers(names: [String!]!): Edge! + layers( + """ + Layer names to include. + """ + names: [String!]! + ): Edge! """ Returns a view of Edge containing all layers except the excluded list of names. Errors if any of the layers do not exist. """ - excludeLayers(names: [String!]!): Edge! + excludeLayers( + """ + Layer names to exclude. + """ + names: [String!]! + ): Edge! """ Returns a view of Edge containing the specified layer. Errors if any of the layers do not exist. """ - layer(name: String!): Edge! + layer( + """ + Layer name to include. + """ + name: String! + ): Edge! """ Returns a view of Edge containing all layers except the excluded layer specified. Errors if any of the layers do not exist. """ - excludeLayer(name: String!): Edge! + excludeLayer( + """ + Layer name to exclude. + """ + name: String! + ): Edge! """ Creates a WindowSet with the given window duration and optional step using a rolling window. @@ -158,7 +217,20 @@ type Edge { Note that passing a step larger than window while alignment_unit is not "Unaligned" may lead to some entries appearing before the start of the first window and/or after the end of the last window (i.e. not included in any window). """ - rolling(window: WindowDuration!, step: WindowDuration, alignmentUnit: AlignmentUnit): EdgeWindowSet! + rolling( + """ + Width of each window. Pass either `{epoch: }` for a discrete number of milliseconds (e.g. `{epoch: 1000}` for 1 second), or `{duration: }` for a calendar duration (e.g. `{duration: 1 day}` or `{duration: 2 hours and 30 minutes}`). + """ + window: WindowDuration!, + """ + Optional gap between the start of one window and the start of the next. Accepts the same `{epoch: }` or `{duration: }` values as `window`. Defaults to `window` — i.e. windows touch end-to-end with no overlap and no gap. + """ + step: WindowDuration, + """ + Optional anchor for window boundaries — pass `Unaligned` to disable, or one of the unit values (e.g. `Day`, `Hour`, `Minute`) to align edges to that calendar unit. Defaults to the smallest unit present in `step` (or `window` if no step is set). + """ + alignmentUnit: AlignmentUnit + ): EdgeWindowSet! """ Creates a WindowSet with the given step size using an expanding window. @@ -168,19 +240,45 @@ type Edge { If unspecified (i.e. by default), alignment is done on the smallest unit of time in the step. e.g. "1 month and 1 day" will align at the start of the day. """ - expanding(step: WindowDuration!, alignmentUnit: AlignmentUnit): EdgeWindowSet! + expanding( + """ + How much the window grows by on each step. Pass either `{epoch: }` for a discrete number of milliseconds, or `{duration: }` for a calendar duration (e.g. `{duration: 1 day}`). + """ + step: WindowDuration!, + """ + Optional anchor for window boundaries — pass `Unaligned` to disable, or one of the unit values (e.g. `Day`, `Hour`, `Minute`) to align edges to that calendar unit. Defaults to the smallest unit present in `step`. + """ + alignmentUnit: AlignmentUnit + ): EdgeWindowSet! """ Creates a view of the Edge including all events between the specified start (inclusive) and end (exclusive). For persistent graphs, any edge which exists at any point during the window will be included. You may want to restrict this to only edges that are present at the end of the window using the is_valid function. """ - window(start: TimeInput!, end: TimeInput!): Edge! + window( + """ + Inclusive lower bound. + """ + start: TimeInput!, + """ + Exclusive upper bound. + """ + end: TimeInput! + ): Edge! """ Creates a view of the Edge including all events at a specified time. """ - at(time: TimeInput!): Edge! + at( + """ + Instant to pin the view to. + """ + time: TimeInput! + ): Edge! """ - Returns a view of the edge at the latest time of the graph. + View of this edge pinned to the graph's latest time — equivalent to + `at(graph.latestTime)`. The edge's properties and metadata show their + most recent values, and (for persistent graphs) validity is evaluated + at that instant. """ latest: Edge! """ @@ -188,7 +286,12 @@ type Edge { This is equivalent to before(time + 1) for Graph and at(time) for PersistentGraph. """ - snapshotAt(time: TimeInput!): Edge! + snapshotAt( + """ + Instant at which entities must be valid. + """ + time: TimeInput! + ): Edge! """ Creates a view of the Edge including all events that are valid at the latest time. @@ -198,36 +301,82 @@ type Edge { """ Creates a view of the Edge including all events before a specified end (exclusive). """ - before(time: TimeInput!): Edge! + before( + """ + Exclusive upper bound. + """ + time: TimeInput! + ): Edge! """ Creates a view of the Edge including all events after a specified start (exclusive). """ - after(time: TimeInput!): Edge! + after( + """ + Exclusive lower bound. + """ + time: TimeInput! + ): Edge! """ Shrinks both the start and end of the window. """ - shrinkWindow(start: TimeInput!, end: TimeInput!): Edge! + shrinkWindow( + """ + Proposed new start (TimeInput); ignored if it would widen the window. + """ + start: TimeInput!, + """ + Proposed new end (TimeInput); ignored if it would widen the window. + """ + end: TimeInput! + ): Edge! """ Set the start of the window. """ - shrinkStart(start: TimeInput!): Edge! + shrinkStart( + """ + Proposed new start (TimeInput); ignored if it would widen the window. + """ + start: TimeInput! + ): Edge! """ Set the end of the window. """ - shrinkEnd(end: TimeInput!): Edge! + shrinkEnd( + """ + Proposed new end (TimeInput); ignored if it would widen the window. + """ + end: TimeInput! + ): Edge! """ Takes a specified selection of views and applies them in given order. """ - applyViews(views: [EdgeViewCollection!]!): Edge! + applyViews( + """ + Ordered list of view operations; each entry is a one-of variant (`window`, `layer`, `filter`, ...) applied to the running result. + """ + views: [EdgeViewCollection!]! + ): Edge! """ Returns the earliest time of an edge. """ earliestTime: EventTime! + """ + The timestamp of the first event in this edge's history (first update, first + deletion, or anything in between). Differs from `earliestTime` in that + `earliestTime` reports when the edge is first *valid*; `firstUpdate` reports + when its history actually begins. + """ firstUpdate: EventTime! """ Returns the latest time of an edge. """ latestTime: EventTime! + """ + The timestamp of the last event in this edge's history (last update, last + deletion, or anything in between). Differs from `latestTime` in that + `latestTime` reports when the edge is last *valid*; `lastUpdate` reports + when its history actually ends. + """ lastUpdate: EventTime! """ Returns the time of an exploded edge. Errors on an unexploded edge. @@ -263,12 +412,11 @@ type Edge { """ nbr: Node! """ - Returns the id of the edge. - - Returns: - list[str]: + Returns the `[src, dst]` id pair of the edge. Each id is a `String` + for string-indexed graphs or a non-negative `Int` for integer-indexed + graphs. """ - id: [String!]! + id: [NodeId!]! """ Returns a view of the properties of the edge. """ @@ -333,18 +481,27 @@ type Edge { Returns: boolean """ isSelfLoop: Boolean! - filter(expr: EdgeFilter!): Edge! + """ + Apply an edge filter in place, returning an edge view whose properties / + metadata / history are restricted to the matching subset. + """ + filter( + """ + Composite edge filter (by property, layer, src/dst, etc.). + """ + expr: EdgeFilter! + ): Edge! } input EdgeAddition { """ - Source node. + Source node id (string or non-negative integer). """ - src: String! + src: NodeId! """ - Destination node. + Destination node id (string or non-negative integer). """ - dst: String! + dst: NodeId! """ Layer. """ @@ -537,6 +694,11 @@ input EdgeLayersExpr { expr: EdgeFilter! } +""" +Describes edges between a specific pair of node types — the property and +metadata keys seen on such edges, along with their observed value types. +One `EdgeSchema` per `(srcType, dstType)` pair per layer. +""" type EdgeSchema { """ Returns the type of source for these edges @@ -692,7 +854,16 @@ input EdgeWindowExpr { expr: EdgeFilter! } +""" +A lazy sequence of per-window views of a single edge, produced by +`edge.rolling` / `edge.expanding`. Each entry is the edge as it exists in +that window. +""" type EdgeWindowSet { + """ + Number of windows in this set. Materialising all windows is expensive for + large graphs — prefer `page` over `list` when iterating. + """ count: Int! """ Fetch one page with a number of items up to a specified limit, optionally offset by a specified amount. @@ -701,10 +872,32 @@ type EdgeWindowSet { For example, if page(5, 2, 1) is called, a page with 5 items, offset by 11 items (2 pages of 5 + 1), will be returned. """ - page(limit: Int!, offset: Int, pageIndex: Int): [Edge!]! + page( + """ + Maximum number of items to return on this page. + """ + limit: Int!, + """ + Extra items to skip on top of `pageIndex` paging (default 0). + """ + offset: Int, + """ + Zero-based page number; multiplies `limit` to determine where to start (default 0). + """ + pageIndex: Int + ): [Edge!]! + """ + Materialise every window as a list. Rejected by the server when bulk list + endpoints are disabled; use `page` for paginated access instead. + """ list: [Edge!]! } +""" +A lazy collection of edges from a graph view. Supports the usual view +transforms (window, layer, filter, ...), plus edge-specific ones like +`explode` and `explodeLayers`, pagination, and sorting. +""" type Edges { """ Returns a collection containing only edges in the default edge layer. @@ -713,19 +906,39 @@ type Edges { """ Returns a collection containing only edges belonging to the listed layers. """ - layers(names: [String!]!): Edges! + layers( + """ + Layer names to include. + """ + names: [String!]! + ): Edges! """ Returns a collection containing edges belonging to all layers except the excluded list of layers. """ - excludeLayers(names: [String!]!): Edges! + excludeLayers( + """ + Layer names to exclude. + """ + names: [String!]! + ): Edges! """ Returns a collection containing edges belonging to the specified layer. """ - layer(name: String!): Edges! + layer( + """ + Layer name to include. + """ + name: String! + ): Edges! """ Returns a collection containing edges belonging to all layers except the excluded layer specified. """ - excludeLayer(name: String!): Edges! + excludeLayer( + """ + Layer name to exclude. + """ + name: String! + ): Edges! """ Creates a WindowSet with the given window duration and optional step using a rolling window. A rolling window is a window that moves forward by step size at each iteration. @@ -737,7 +950,20 @@ type Edges { Note that passing a step larger than window while alignment_unit is not "Unaligned" may lead to some entries appearing before the start of the first window and/or after the end of the last window (i.e. not included in any window). """ - rolling(window: WindowDuration!, step: WindowDuration, alignmentUnit: AlignmentUnit): EdgesWindowSet! + rolling( + """ + Width of each window. Pass either `{epoch: }` for a discrete number of milliseconds (e.g. `{epoch: 1000}` for 1 second), or `{duration: }` for a calendar duration (e.g. `{duration: 1 day}` or `{duration: 2 hours and 30 minutes}`). + """ + window: WindowDuration!, + """ + Optional gap between the start of one window and the start of the next. Accepts the same `{epoch: }` or `{duration: }` values as `window`. Defaults to `window` — i.e. windows touch end-to-end with no overlap and no gap. + """ + step: WindowDuration, + """ + Optional anchor for window boundaries — pass `Unaligned` to disable, or one of the unit values (e.g. `Day`, `Hour`, `Minute`) to align edges to that calendar unit. Defaults to the smallest unit present in `step` (or `window` if no step is set). + """ + alignmentUnit: AlignmentUnit + ): EdgesWindowSet! """ Creates a WindowSet with the given step size using an expanding window. An expanding window is a window that grows by step size at each iteration. @@ -747,20 +973,51 @@ type Edges { If unspecified (i.e. by default), alignment is done on the smallest unit of time in the step. e.g. "1 month and 1 day" will align at the start of the day. """ - expanding(step: WindowDuration!, alignmentUnit: AlignmentUnit): EdgesWindowSet! + expanding( + """ + How much the window grows by on each step. Pass either `{epoch: }` for a discrete number of milliseconds, or `{duration: }` for a calendar duration (e.g. `{duration: 1 day}`). + """ + step: WindowDuration!, + """ + Optional anchor for window boundaries — pass `Unaligned` to disable, or one of the unit values (e.g. `Day`, `Hour`, `Minute`) to align edges to that calendar unit. Defaults to the smallest unit present in `step`. + """ + alignmentUnit: AlignmentUnit + ): EdgesWindowSet! """ Creates a view of the Edge including all events between the specified start (inclusive) and end (exclusive). """ - window(start: TimeInput!, end: TimeInput!): Edges! + window( + """ + Inclusive lower bound. + """ + start: TimeInput!, + """ + Exclusive upper bound. + """ + end: TimeInput! + ): Edges! """ Creates a view of the Edge including all events at a specified time. """ - at(time: TimeInput!): Edges! + at( + """ + Instant to pin the view to. + """ + time: TimeInput! + ): Edges! + """ + View showing only the latest state of each edge (equivalent to `at(latestTime)`). + """ latest: Edges! """ Creates a view of the Edge including all events that are valid at time. This is equivalent to before(time + 1) for Graph and at(time) for PersistentGraph. """ - snapshotAt(time: TimeInput!): Edges! + snapshotAt( + """ + Instant at which entities must be valid. + """ + time: TimeInput! + ): Edges! """ Creates a view of the Edge including all events that are valid at the latest time. This is equivalent to a no-op for Graph and latest() for PersistentGraph. """ @@ -768,29 +1025,65 @@ type Edges { """ Creates a view of the Edge including all events before a specified end (exclusive). """ - before(time: TimeInput!): Edges! + before( + """ + Exclusive upper bound. + """ + time: TimeInput! + ): Edges! """ Creates a view of the Edge including all events after a specified start (exclusive). """ - after(time: TimeInput!): Edges! + after( + """ + Exclusive lower bound. + """ + time: TimeInput! + ): Edges! """ Shrinks both the start and end of the window. """ - shrinkWindow(start: TimeInput!, end: TimeInput!): Edges! + shrinkWindow( + """ + Proposed new start (TimeInput); ignored if it would widen the window. + """ + start: TimeInput!, + """ + Proposed new end (TimeInput); ignored if it would widen the window. + """ + end: TimeInput! + ): Edges! """ Set the start of the window. """ - shrinkStart(start: TimeInput!): Edges! + shrinkStart( + """ + Proposed new start (TimeInput); ignored if it would widen the window. + """ + start: TimeInput! + ): Edges! """ Set the end of the window. """ - shrinkEnd(end: TimeInput!): Edges! + shrinkEnd( + """ + Proposed new end (TimeInput); ignored if it would widen the window. + """ + end: TimeInput! + ): Edges! """ Takes a specified selection of views and applies them in order given. """ - applyViews(views: [EdgesViewCollection!]!): Edges! + applyViews( + """ + Ordered list of view operations; each entry is a one-of variant (`window`, `layer`, `filter`, ...) applied to the running result. + """ + views: [EdgesViewCollection!]! + ): Edges! """ - Returns an edge object for each update within the original edge. + Expand each edge into one edge per update: if `A->B` has three updates, it + becomes three `A->B` entries each at a distinct timestamp. Use this to + iterate per-event rather than per-edge. """ explode: Edges! """ @@ -800,9 +1093,15 @@ type Edges { """ explodeLayers: Edges! """ - Specify a sort order from: source, destination, property, time. You can also reverse the ordering. + Sort the edges. Multiple criteria are applied lexicographically (ties + on the first key break to the second, etc.). """ - sorted(sortBys: [EdgeSortBy!]!): Edges! + sorted( + """ + Ordered list of sort keys. Each entry chooses exactly one of `src` / `dst` / `time` / `property`, with an optional `reverse: true` to flip order. + """ + sortBys: [EdgeSortBy!]! + ): Edges! """ Returns the start time of the window or none if there is no window. """ @@ -825,19 +1124,73 @@ type Edges { For example, if page(5, 2, 1) is called, a page with 5 items, offset by 11 items (2 pages of 5 + 1), will be returned. """ - page(limit: Int!, offset: Int, pageIndex: Int): [Edge!]! + page( + """ + Maximum number of items to return on this page. + """ + limit: Int!, + """ + Extra items to skip on top of `pageIndex` paging (default 0). + """ + offset: Int, + """ + Zero-based page number; multiplies `limit` to determine where to start (default 0). + """ + pageIndex: Int + ): [Edge!]! """ Returns a list of all objects in the current selection of the collection. You should filter the collection first then call list. """ list: [Edge!]! """ - Returns a filtered view that applies to list down the chain - """ - filter(expr: EdgeFilter!): Edges! + Narrow the collection to edges matching `expr`. The filter sticks to the + returned view — every subsequent traversal through these edges (their + properties, their endpoints' neighbours, etc.) continues to see the + filtered scope. + + Useful when you want one scoping rule to apply across the whole query. + E.g. restricting everything to a specific week: + + ```text + edges { filter(expr: {window: {start: 1234, end: 5678}}) { + list { src { neighbours { list { name } } } } # neighbours still windowed + } } + ``` + + Contrast with `select`, which applies here and is not carried through. """ - Returns filtered list of edges + filter( + """ + Composite edge filter (by property, layer, src/dst, etc.). + """ + expr: EdgeFilter! + ): Edges! """ - select(expr: EdgeFilter!): Edges! + Narrow the collection to edges matching `expr`, but only at this step — + subsequent traversals out of these edges see the unfiltered graph again. + + Useful when you want different scopes at different hops. E.g. Monday's + edges, then the neighbours of their endpoints on Tuesday, then *those* + neighbours on Wednesday: + + ```text + edges { select(expr: {window: {...monday...}}) { + list { src { select(expr: {window: {...tuesday...}}) { + neighbours { select(expr: {window: {...wednesday...}}) { + neighbours { list { name } } + } } + } } } + } } + ``` + + Contrast with `filter`, which persists the scope through subsequent ops. + """ + select( + """ + Composite edge filter (by property, layer, src/dst, etc.). + """ + expr: EdgeFilter! + ): Edges! } input EdgesViewCollection @oneOf { @@ -903,7 +1256,16 @@ input EdgesViewCollection @oneOf { edgeFilter: EdgeFilter } +""" +A lazy sequence of per-window edge collections, produced by +`edges.rolling` / `edges.expanding`. Each entry is an `Edges` collection +as it exists in that window. +""" type EdgesWindowSet { + """ + Number of windows in this set. Materialising all windows is expensive for + large graphs — prefer `page` over `list` when iterating. + """ count: Int! """ Fetch one page with a number of items up to a specified limit, optionally offset by a specified amount. @@ -912,7 +1274,24 @@ type EdgesWindowSet { For example, if page(5, 2, 1) is called, a page with 5 items, offset by 11 items (2 pages of 5 + 1), will be returned. """ - page(limit: Int!, offset: Int, pageIndex: Int): [Edges!]! + page( + """ + Maximum number of items to return on this page. + """ + limit: Int!, + """ + Extra items to skip on top of `pageIndex` paging (default 0). + """ + offset: Int, + """ + Zero-based page number; multiplies `limit` to determine where to start (default 0). + """ + pageIndex: Int + ): [Edges!]! + """ + Materialise every window as a list. Rejected by the server when bulk list + endpoints are disabled; use `page` for paginated access instead. + """ list: [Edges!]! } @@ -951,50 +1330,113 @@ type EventTime { Refer to chrono::format::strftime for formatting specifiers and escape sequences. Raises an error if a time conversion fails. """ - datetime(formatString: String): String + datetime( + """ + Optional format string for the rendered datetime. Uses `%`-style specifiers — for example `%Y-%m-%d` for `2024-01-15`, `%Y-%m-%d %H:%M:%S` for `2024-01-15 10:30:00`, or `%H:%M` for `10:30`. Defaults to RFC 3339 (e.g. `2024-01-15T10:30:45.123+00:00`) when omitted. + """ + formatString: String + ): String } +""" +A view of a Raphtory graph. Every field here returns either data from the +view or a derived view (`window`, `layer`, `at`, `filter`, ...) that you can +keep chaining. Views are cheap — they don't copy the underlying data. +""" type Graph { """ Returns the names of all layers in the graphview. + Distinct layer names observed in the current view — any layer that has at + least one edge event visible here. Excludes layers that exist elsewhere in + the graph but whose edges have been filtered out. """ uniqueLayers: [String!]! """ - Returns a view containing only the default layer. + View restricted to the default layer — where nodes and edges end up + when `addNode` / `addEdge` is called without a `layer` argument. + Useful for separating "unlayered" base-graph events from named-layer + ones. """ defaultLayer: Graph! """ - Returns a view containing all the specified layers. - """ - layers(names: [String!]!): Graph! - """ - Returns a view containing all layers except the specified excluded layers. - """ - excludeLayers(names: [String!]!): Graph! - """ - Returns a view containing the layer specified. - """ - layer(name: String!): Graph! - """ - Returns a view containing all layers except the specified excluded layer. - """ - excludeLayer(name: String!): Graph! - """ - Returns a subgraph of a specified set of nodes which contains only the edges that connect nodes of the subgraph to each other. - """ - subgraph(nodes: [String!]!): Graph! - """ - Returns a view of the graph that only includes valid edges. + View restricted to the named layers. Updates on any other layer are hidden; + if that leaves a node or edge with no updates left, it disappears from the + view. + """ + layers( + """ + Layer names to include. + """ + names: [String!]! + ): Graph! + """ + View with the named layers hidden. Updates on those layers are removed; if + that leaves a node or edge with no updates left, it disappears from the + view. + """ + excludeLayers( + """ + Layer names to exclude. + """ + names: [String!]! + ): Graph! + """ + View restricted to a single layer. Convenience form of + `layers(names: [name])` — updates on any other layer are hidden, and + entities with nothing left disappear. + """ + layer( + """ + Layer name to include. + """ + name: String! + ): Graph! + """ + View with one layer hidden. Convenience form of + `excludeLayers(names: [name])` — updates on that layer are removed, and + entities with nothing left disappear. + """ + excludeLayer( + """ + Layer name to exclude. + """ + name: String! + ): Graph! + """ + View restricted to a chosen set of nodes and the edges between them. Edges + connecting a selected node to a non-selected node are hidden. + """ + subgraph( + """ + Node ids to keep. + """ + nodes: [NodeId!]! + ): Graph! + """ + View containing only valid edges — for persistent graphs this drops edges + whose most recent event is a deletion at the latest time of the current + view (a later re-addition would keep them). On event graphs this is a + no-op. """ valid: Graph! """ - Returns a subgraph filtered by the specified node types. + View restricted to nodes with the given node types. """ - subgraphNodeTypes(nodeTypes: [String!]!): Graph! + subgraphNodeTypes( + """ + Node types to include. + """ + nodeTypes: [String!]! + ): Graph! """ - Returns a subgraph containing all nodes except the specified excluded nodes. + View with a set of nodes removed (along with any edges touching them). """ - excludeNodes(nodes: [String!]!): Graph! + excludeNodes( + """ + Node ids to exclude. + """ + nodes: [NodeId!]! + ): Graph! """ Creates a rolling window with the specified window size and an optional step. @@ -1006,7 +1448,20 @@ type Graph { Note that passing a step larger than window while alignment_unit is not "Unaligned" may lead to some entries appearing before the start of the first window and/or after the end of the last window (i.e. not included in any window). """ - rolling(window: WindowDuration!, step: WindowDuration, alignmentUnit: AlignmentUnit): GraphWindowSet! + rolling( + """ + Width of each window. Pass either `{epoch: }` for a discrete number of milliseconds (e.g. `{epoch: 1000}` for 1 second), or `{duration: }` for a calendar duration (e.g. `{duration: 1 day}` or `{duration: 2 hours and 30 minutes}`). + """ + window: WindowDuration!, + """ + Optional gap between the start of one window and the start of the next. Accepts the same `{epoch: }` or `{duration: }` values as `window`. Defaults to `window` — i.e. windows touch end-to-end with no overlap and no gap. + """ + step: WindowDuration, + """ + Optional anchor for window boundaries — pass `Unaligned` to disable, or one of the unit values (e.g. `Day`, `Hour`, `Minute`) to align edges to that calendar unit. Defaults to the smallest unit present in `step` (or `window` if no step is set). + """ + alignmentUnit: AlignmentUnit + ): GraphWindowSet! """ Creates an expanding window with the specified step size. @@ -1016,15 +1471,38 @@ type Graph { If unspecified (i.e. by default), alignment is done on the smallest unit of time in the step. e.g. "1 month and 1 day" will align at the start of the day. """ - expanding(step: WindowDuration!, alignmentUnit: AlignmentUnit): GraphWindowSet! + expanding( + """ + How much the window grows by on each step. Pass either `{epoch: }` for a discrete number of milliseconds, or `{duration: }` for a calendar duration (e.g. `{duration: 1 day}`). + """ + step: WindowDuration!, + """ + Optional anchor for window boundaries — pass `Unaligned` to disable, or one of the unit values (e.g. `Day`, `Hour`, `Minute`) to align edges to that calendar unit. Defaults to the smallest unit present in `step`. + """ + alignmentUnit: AlignmentUnit + ): GraphWindowSet! """ Return a graph containing only the activity between start and end, by default raphtory stores times in milliseconds from the unix epoch. """ - window(start: TimeInput!, end: TimeInput!): Graph! + window( + """ + Inclusive lower bound. + """ + start: TimeInput!, + """ + Exclusive upper bound. + """ + end: TimeInput! + ): Graph! """ Creates a view including all events at a specified time. """ - at(time: TimeInput!): Graph! + at( + """ + Instant to pin the view to. + """ + time: TimeInput! + ): Graph! """ Creates a view including all events at the latest time. """ @@ -1032,7 +1510,12 @@ type Graph { """ Create a view including all events that are valid at the specified time. """ - snapshotAt(time: TimeInput!): Graph! + snapshotAt( + """ + Instant at which entities must be valid. + """ + time: TimeInput! + ): Graph! """ Create a view including all events that are valid at the latest time. """ @@ -1040,25 +1523,57 @@ type Graph { """ Create a view including all events before a specified end (exclusive). """ - before(time: TimeInput!): Graph! + before( + """ + Exclusive upper bound. + """ + time: TimeInput! + ): Graph! """ Create a view including all events after a specified start (exclusive). """ - after(time: TimeInput!): Graph! - """ - Shrink both the start and end of the window. - """ - shrinkWindow(start: TimeInput!, end: TimeInput!): Graph! + after( + """ + Exclusive lower bound. + """ + time: TimeInput! + ): Graph! + """ + Shrink both the start and end of the window. The new bounds are taken as the + intersection with the current window; this never widens the view. + """ + shrinkWindow( + """ + Proposed new start (TimeInput); ignored if before the current start. + """ + start: TimeInput!, + """ + Proposed new end (TimeInput); ignored if after the current end. + """ + end: TimeInput! + ): Graph! """ Set the start of the window to the larger of the specified value or current start. """ - shrinkStart(start: TimeInput!): Graph! + shrinkStart( + """ + Proposed new start (TimeInput); has no effect if it would widen the window. + """ + start: TimeInput! + ): Graph! """ Set the end of the window to the smaller of the specified value or current end. """ - shrinkEnd(end: TimeInput!): Graph! + shrinkEnd( + """ + Proposed new end (TimeInput); has no effect if it would widen the window. + """ + end: TimeInput! + ): Graph! """ - Returns the timestamp for the creation of the graph. + Filesystem creation timestamp (epoch millis) of the graph's on-disk folder + — i.e. when this graph was first saved to the server, not when its earliest + event occurred. Use `earliestTime` for the latter. """ created: Int! """ @@ -1086,13 +1601,26 @@ type Graph { """ end: EventTime! """ - Returns the earliest time that any edge in this graph is valid. + The earliest time at which any edge in this graph is valid. + + * `includeNegative` — if false, edge events with a timestamp `< 0` are + skipped when computing the minimum. Defaults to true. """ - earliestEdgeTime(includeNegative: Boolean): EventTime! + earliestEdgeTime( + """ + If false, edge events with a timestamp `< 0` are skipped when computing the minimum. Defaults to true. + """ + includeNegative: Boolean + ): EventTime! """ - Returns the latest time that any edge in this graph is valid. + The latest time at which any edge in this graph is valid. """ - latestEdgeTime(includeNegative: Boolean): EventTime! + latestEdgeTime( + """ + If false, edge events with a timestamp `< 0` are skipped when computing the maximum. Defaults to true. + """ + includeNegative: Boolean + ): EventTime! """ Returns the number of edges in the graph. @@ -1111,29 +1639,74 @@ type Graph { """ countNodes: Int! """ - Returns true if the graph contains the specified node. - """ - hasNode(name: String!): Boolean! - """ - Returns true if the graph contains the specified edge. Edges are specified by providing a source and destination node id. You can restrict the search to a specified layer. - """ - hasEdge(src: String!, dst: String!, layer: String): Boolean! - """ - Gets the node with the specified id. - """ - node(name: String!): Node - """ - Gets (optionally a subset of) the nodes in the graph. - """ - nodes(select: NodeFilter): Nodes! - """ - Gets the edge with the specified source and destination nodes. - """ - edge(src: String!, dst: String!): Edge - """ - Gets the edges in the graph. - """ - edges(select: EdgeFilter): Edges! + Returns true if a node with the given id exists in this view. + """ + hasNode( + """ + Node id to look up. + """ + name: NodeId! + ): Boolean! + """ + Returns true if an edge exists between `src` and `dst` in this view, optionally + restricted to a single layer. + """ + hasEdge( + """ + Source node id. + """ + src: NodeId!, + """ + Destination node id. + """ + dst: NodeId!, + """ + Optional; if provided, only checks whether the edge exists on this layer. If null or omitted, any layer counts. + """ + layer: String + ): Boolean! + """ + Look up a single node by id. Returns null if the node doesn't exist in this + view. + """ + node( + """ + Node id. + """ + name: NodeId! + ): Node + """ + All nodes in this view, optionally narrowed by a filter. + """ + nodes( + """ + Optional node filter (by name, property, type, etc.). If omitted, every node in the view is returned. + """ + select: NodeFilter + ): Nodes! + """ + Look up a single edge by its endpoint ids. Returns null if no edge exists + between `src` and `dst` in this view. + """ + edge( + """ + Source node id. + """ + src: NodeId!, + """ + Destination node id. + """ + dst: NodeId! + ): Edge + """ + All edges in this view, optionally narrowed by a filter. + """ + edges( + """ + Optional edge filter (by property, layer, src/dst, etc.). If omitted, every edge in the view is returned. + """ + select: EdgeFilter + ): Edges! """ Returns the properties of the graph. """ @@ -1158,36 +1731,116 @@ type Graph { Returns the graph schema. """ schema: GraphSchema! - algorithms: GraphAlgorithmPlugin! - sharedNeighbours(selectedNodes: [String!]!): [Node!]! """ - Export all nodes and edges from this graph view to another existing graph + Access registered graph algorithms (PageRank, shortest path, etc.) for this + graph view. The set of available algorithms is defined by the plugin registry + loaded at server startup. """ - exportTo(path: String!): Boolean! - filter(expr: GraphFilter): Graph! - filterNodes(expr: NodeFilter!): Graph! - filterEdges(expr: EdgeFilter!): Graph! + algorithms: GraphAlgorithmPlugin! + """ + Nodes that are neighbours of every node in `selectedNodes`. Returns the + intersection of each selected node's neighbour set (undirected). + """ + sharedNeighbours( + """ + Node ids whose common neighbours you want. Returns an empty list if `selectedNodes` is empty or any id does not exist. + """ + selectedNodes: [NodeId!]! + ): [Node!]! + """ + Copy all nodes and edges of the current graph view into another already- + existing graph stored on the server. The destination graph is preserved + — this only adds; it does not replace. + """ + exportTo( + """ + Destination graph path relative to the root namespace. + """ + path: String! + ): Boolean! + """ + Returns a filtered view of the graph. Applies a mixed node/edge filter + expression and narrows nodes, edges, and their properties to what matches. + """ + filter( + """ + Optional composite filter combining node, edge, property, and metadata conditions. If omitted, applies the identity filter (equivalent to no filtering). + """ + expr: GraphFilter + ): Graph! + """ + Returns a graph view restricted to nodes that match the given filter; edges + are kept only if both endpoints survive. + """ + filterNodes( + """ + Composite node filter (by name, property, type, etc.). + """ + expr: NodeFilter! + ): Graph! + """ + Returns a graph view restricted to edges that match the given filter. Nodes + remain in the view even if all their edges are filtered out. + """ + filterEdges( + """ + Composite edge filter (by property, layer, src/dst, etc.). + """ + expr: EdgeFilter! + ): Graph! """ (Experimental) Get index specification. """ getIndexSpec: IndexSpec! """ - (Experimental) Searches for nodes which match the given filter expression. - - Uses Tantivy's exact search. - """ - searchNodes(filter: NodeFilter!, limit: Int!, offset: Int!): [Node!]! - """ - (Experimental) Searches the index for edges which match the given filter expression. - - Uses Tantivy's exact search. - """ - searchEdges(filter: EdgeFilter!, limit: Int!, offset: Int!): [Edge!]! - """ - Returns the specified graph view or if none is specified returns the default view. - This allows you to specify multiple operations together. - """ - applyViews(views: [GraphViewCollection!]!): Graph! + (Experimental) Searches for nodes which match the given filter + expression. Uses Tantivy's exact search; requires the graph to have + been indexed. + """ + searchNodes( + """ + Composite node filter (by name, property, type, etc.). + """ + filter: NodeFilter!, + """ + Maximum number of nodes to return. + """ + limit: Int!, + """ + Number of matches to skip before returning results. + """ + offset: Int! + ): [Node!]! + """ + (Experimental) Searches the index for edges which match the given + filter expression. Uses Tantivy's exact search; requires the graph to + have been indexed. + """ + searchEdges( + """ + Composite edge filter (by property, layer, src/dst, etc.). + """ + filter: EdgeFilter!, + """ + Maximum number of edges to return. + """ + limit: Int!, + """ + Number of matches to skip before returning results. + """ + offset: Int! + ): [Edge!]! + """ + Apply a list of view operations in the given order and return the + resulting graph view. Lets callers compose multiple view transforms + (window, layer, filter, snapshot, ...) in a single call. + """ + applyViews( + """ + Ordered list of view operations; each entry is a one-of variant applied to the running result. + """ + views: [GraphViewCollection!]! + ): Graph! } type GraphAlgorithmPlugin { @@ -1327,7 +1980,7 @@ input GraphViewCollection @oneOf { """ Subgraph nodes. """ - subgraph: [String!] + subgraph: [NodeId!] """ Subgraph node types. """ @@ -1335,7 +1988,7 @@ input GraphViewCollection @oneOf { """ List of excluded nodes. """ - excludeNodes: [String!] + excludeNodes: [NodeId!] """ Valid state. """ @@ -1418,9 +2071,16 @@ input GraphWindowExpr { expr: GraphFilter } +""" +A lazy sequence of graph snapshots produced by `rolling` or `expanding`. +Each entry is a `Graph` at a different window over time. Iterate via +`list` / `page` (or count with `count`). Subsequent view ops apply +per-window. +""" type GraphWindowSet { """ - Returns the number of items. + Number of windows in this set. Materialising all windows is expensive for + large graphs — prefer `page` over `list` when iterating. """ count: Int! """ @@ -1430,7 +2090,24 @@ type GraphWindowSet { For example, if page(5, 2, 1) is called, a page with 5 items, offset by 11 items (2 pages of 5 + 1), will be returned. """ - page(limit: Int!, offset: Int, pageIndex: Int): [Graph!]! + page( + """ + Maximum number of items to return on this page. + """ + limit: Int!, + """ + Extra items to skip on top of `pageIndex` paging (default 0). + """ + offset: Int, + """ + Zero-based page number; multiplies `limit` to determine where to start (default 0). + """ + pageIndex: Int + ): [Graph!]! + """ + Materialise every window as a list. Rejected by the server when bulk list + endpoints are disabled; use `page` for paginated access instead. + """ list: [Graph!]! } @@ -1462,7 +2139,20 @@ type History { For example, if page(5, 2, 1) is called, a page with 5 items, offset by 11 items (2 pages of 5 + 1), will be returned. """ - page(limit: Int!, offset: Int, pageIndex: Int): [EventTime!]! + page( + """ + Maximum number of items to return on this page. + """ + limit: Int!, + """ + Extra items to skip on top of `pageIndex` paging (default 0). + """ + offset: Int, + """ + Zero-based page number; multiplies `limit` to determine where to start (default 0). + """ + pageIndex: Int + ): [EventTime!]! """ Fetch one page of EventTime entries with a number of items up to a specified limit, optionally offset by a specified amount. The page_index sets the number of pages to skip (defaults to 0). @@ -1470,7 +2160,20 @@ type History { For example, if page_rev(5, 2, 1) is called, a page with 5 items, offset by 11 items (2 pages of 5 + 1), will be returned. """ - pageRev(limit: Int!, offset: Int, pageIndex: Int): [EventTime!]! + pageRev( + """ + Maximum number of items to return on this page. + """ + limit: Int!, + """ + Extra items to skip on top of `pageIndex` paging (default 0). + """ + offset: Int, + """ + Zero-based page number; multiplies `limit` to determine where to start (default 0). + """ + pageIndex: Int + ): [EventTime!]! """ Returns True if the history is empty. """ @@ -1490,14 +2193,22 @@ type History { Optionally, a format string can be passed to format the output. Defaults to RFC 3339 if not provided (e.g., "2023-12-25T10:30:45.123Z"). Refer to chrono::format::strftime for formatting specifiers and escape sequences. """ - datetimes(formatString: String): HistoryDateTime! + datetimes( + """ + Optional format string for the rendered datetime. Uses `%`-style specifiers — for example `%Y-%m-%d` for `2024-01-15`, `%Y-%m-%d %H:%M:%S` for `2024-01-15 10:30:00`, or `%H:%M` for `10:30`. Defaults to RFC 3339 (e.g. `2024-01-15T10:30:45.123+00:00`) when omitted. + """ + formatString: String + ): HistoryDateTime! """ Returns a HistoryEventId object which accesses event ids of EventTime entries. They are used for ordering within the same timestamp. """ eventId: HistoryEventId! """ - Returns an Intervals object which calculates the intervals between consecutive EventTime timestamps. + Inter-event gap analysis for this history. The returned `Intervals` + object exposes each gap (in milliseconds) between consecutive events, + plus summary statistics — `min` / `max` / `mean` / `median` — and + paginated access via `list` / `listRev` / `page` / `pageRev`. """ intervals: Intervals! } @@ -1511,13 +2222,23 @@ type HistoryDateTime { If filter_broken is set to True, time conversion errors will be ignored. If set to False, a TimeError will be raised on time conversion error. Defaults to False. """ - list(filterBroken: Boolean): [String!]! + list( + """ + If true, ignore unconvertible timestamps; if false, raise an error on the first conversion failure. Defaults to false. + """ + filterBroken: Boolean + ): [String!]! """ List all datetimes formatted as strings in reverse chronological order. If filter_broken is set to True, time conversion errors will be ignored. If set to False, a TimeError will be raised on time conversion error. Defaults to False. """ - listRev(filterBroken: Boolean): [String!]! + listRev( + """ + If true, ignore unconvertible timestamps; if false, raise an error on the first conversion failure. Defaults to false. + """ + filterBroken: Boolean + ): [String!]! """ Fetch one page of datetimes formatted as string with a number of items up to a specified limit, optionally offset by a specified amount. The page_index sets the number of pages to skip (defaults to 0). @@ -1527,7 +2248,24 @@ type HistoryDateTime { For example, if page(5, 2, 1) is called, a page with 5 items, offset by 11 items (2 pages of 5 + 1), will be returned. """ - page(limit: Int!, offset: Int, pageIndex: Int, filterBroken: Boolean): [String!]! + page( + """ + Maximum number of items to return on this page. + """ + limit: Int!, + """ + Extra items to skip on top of `pageIndex` paging (default 0). + """ + offset: Int, + """ + Zero-based page number; multiplies `limit` to determine where to start (default 0). + """ + pageIndex: Int, + """ + If true, skip timestamps whose conversion fails; if false, raise an error on the first conversion failure. Defaults to false. + """ + filterBroken: Boolean + ): [String!]! """ Fetch one page of datetimes formatted as string in reverse chronological order with a number of items up to a specified limit, optionally offset by a specified amount. The page_index sets the number of pages to skip (defaults to 0). @@ -1537,7 +2275,24 @@ type HistoryDateTime { For example, if page_rev(5, 2, 1) is called, a page with 5 items, offset by 11 items (2 pages of 5 + 1), will be returned. """ - pageRev(limit: Int!, offset: Int, pageIndex: Int, filterBroken: Boolean): [String!]! + pageRev( + """ + Maximum number of items to return on this page. + """ + limit: Int!, + """ + Extra items to skip on top of `pageIndex` paging (default 0). + """ + offset: Int, + """ + Zero-based page number; multiplies `limit` to determine where to start (default 0). + """ + pageIndex: Int, + """ + If true, skip timestamps whose conversion fails; if false, raise an error on the first conversion failure. Defaults to false. + """ + filterBroken: Boolean + ): [String!]! } """ @@ -1559,7 +2314,20 @@ type HistoryEventId { For example, if page(5, 2, 1) is called, a page with 5 items, offset by 11 items (2 pages of 5 + 1), will be returned. """ - page(limit: Int!, offset: Int, pageIndex: Int): [Int!]! + page( + """ + Maximum number of items to return on this page. + """ + limit: Int!, + """ + Extra items to skip on top of `pageIndex` paging (default 0). + """ + offset: Int, + """ + Zero-based page number; multiplies `limit` to determine where to start (default 0). + """ + pageIndex: Int + ): [Int!]! """ Fetch one page of event ids in reverse chronological order with a number of items up to a specified limit, optionally offset by a specified amount. The page_index sets the number of pages to skip (defaults to 0). @@ -1567,7 +2335,20 @@ type HistoryEventId { For example, if page_rev(5, 2, 1) is called, a page with 5 items, offset by 11 items (2 pages of 5 + 1), will be returned. """ - pageRev(limit: Int!, offset: Int, pageIndex: Int): [Int!]! + pageRev( + """ + Maximum number of items to return on this page. + """ + limit: Int!, + """ + Extra items to skip on top of `pageIndex` paging (default 0). + """ + offset: Int, + """ + Zero-based page number; multiplies `limit` to determine where to start (default 0). + """ + pageIndex: Int + ): [Int!]! } """ @@ -1589,7 +2370,20 @@ type HistoryTimestamp { For example, if page(5, 2, 1) is called, a page with 5 items, offset by 11 items (2 pages of 5 + 1), will be returned. """ - page(limit: Int!, offset: Int, pageIndex: Int): [Int!]! + page( + """ + Maximum number of items to return on this page. + """ + limit: Int!, + """ + Extra items to skip on top of `pageIndex` paging (default 0). + """ + offset: Int, + """ + Zero-based page number; multiplies `limit` to determine where to start (default 0). + """ + pageIndex: Int + ): [Int!]! """ Fetch one page of timestamps in reverse order with a number of items up to a specified limit, optionally offset by a specified amount. The page_index sets the number of pages to skip (defaults to 0). @@ -1597,7 +2391,20 @@ type HistoryTimestamp { For example, if page_rev(5, 2, 1) is called, a page with 5 items, offset by 11 items (2 pages of 5 + 1), will be returned. """ - pageRev(limit: Int!, offset: Int, pageIndex: Int): [Int!]! + pageRev( + """ + Maximum number of items to return on this page. + """ + limit: Int!, + """ + Extra items to skip on top of `pageIndex` paging (default 0). + """ + offset: Int, + """ + Zero-based page number; multiplies `limit` to determine where to start (default 0). + """ + pageIndex: Int + ): [Int!]! } type IndexSpec { @@ -1632,13 +2439,13 @@ input IndexSpecInput { input InputEdge { """ - Source node. + Source node id (string or non-negative integer). """ - src: String! + src: NodeId! """ - Destination node. + Destination node id (string or non-negative integer). """ - dst: String! + dst: NodeId! } """ @@ -1660,7 +2467,20 @@ type Intervals { For example, if page(5, 2, 1) is called, a page with 5 items, offset by 11 items (2 pages of 5 + 1), will be returned. """ - page(limit: Int!, offset: Int, pageIndex: Int): [Int!]! + page( + """ + Maximum number of items to return on this page. + """ + limit: Int!, + """ + Extra items to skip on top of `pageIndex` paging (default 0). + """ + offset: Int, + """ + Zero-based page number; multiplies `limit` to determine where to start (default 0). + """ + pageIndex: Int + ): [Int!]! """ Fetch one page of intervals between consecutive timestamps in reverse order with a number of items up to a specified limit, optionally offset by a specified amount. The page_index sets the number of pages to skip (defaults to 0). @@ -1668,7 +2488,20 @@ type Intervals { For example, if page(5, 2, 1) is called, a page with 5 items, offset by 11 items (2 pages of 5 + 1), will be returned. """ - pageRev(limit: Int!, offset: Int, pageIndex: Int): [Int!]! + pageRev( + """ + Maximum number of items to return on this page. + """ + limit: Int!, + """ + Extra items to skip on top of `pageIndex` paging (default 0). + """ + offset: Int, + """ + Zero-based page number; multiplies `limit` to determine where to start (default 0). + """ + pageIndex: Int + ): [Int!]! """ Compute the mean interval between consecutive timestamps. Returns None if fewer than 1 timestamp. """ @@ -1687,6 +2520,10 @@ type Intervals { min: Int } +""" +Describes a single edge layer — its name and the per `(srcType, dstType)` +edge schemas observed within it. +""" type LayerSchema { """ Returns the name of the layer with this schema @@ -1698,6 +2535,12 @@ type LayerSchema { edges: [EdgeSchema!]! } +""" +Lightweight summary of a stored graph — its name, path, counts, and +filesystem timestamps — served without deserializing the full graph. +Useful for listing what's available on the server before committing to a +full load. +""" type MetaGraph { """ Returns the graph name. @@ -1736,23 +2579,45 @@ type MetaGraph { metadata: [Property!]! } +""" +Constant key/value metadata attached to an entity (node, edge, or graph). +Metadata has no timeline — each key maps to exactly one value for the +lifetime of the entity. Separate from `Properties`, which carries +time-varying data. +""" type Metadata { """ - Get metadata value matching the specified key. + Look up a single metadata value by key. Returns null if no metadata with that + key exists. """ - get(key: String!): Property + get( + """ + The metadata name. + """ + key: String! + ): Property """ - /// Check if the key is in the metadata. + Returns true if a metadata entry with the given key exists. """ - contains(key: String!): Boolean! + contains( + """ + The metadata name to look up. + """ + key: String! + ): Boolean! """ - Return all metadata keys. + All metadata keys present on this entity. """ keys: [String!]! """ - /// Return all metadata values. + All metadata values as `{key, value}` entries. """ - values(keys: [String!]): [Property!]! + values( + """ + Optional whitelist. If provided, only metadata with these keys is returned; if omitted, every metadata entry is returned. + """ + keys: [String!] + ): [Property!]! } type MutRoot { @@ -1761,54 +2626,152 @@ type MutRoot { """ plugins: MutationPlugin! """ - Delete graph from a path on the server. - """ - deleteGraph(path: String!): Boolean! - """ - Creates a new graph. - """ - newGraph(path: String!, graphType: GraphType!): Boolean! - """ - Move graph from a path path on the server to a new_path on the server. - - If namespace is not provided, it will be set to the current working directory. - This applies to both the graph namespace and new graph namespace. - """ - moveGraph(path: String!, newPath: String!): Boolean! - """ - Copy graph from a path path on the server to a new_path on the server. - - If namespace is not provided, it will be set to the current working directory. - This applies to both the graph namespace and new graph namespace. - """ - copyGraph(path: String!, newPath: String!): Boolean! - """ - Upload a graph file from a path on the client using GQL multipart uploading. - - Returns:: - name of the new graph - """ - uploadGraph(path: String!, graph: Upload!, overwrite: Boolean!): String! - """ - Send graph bincode as base64 encoded string. - - Returns:: - path of the new graph - """ - sendGraph(path: String!, graph: String!, overwrite: Boolean!): String! - """ - Returns a subgraph given a set of nodes from an existing graph in the server. - - Returns:: - name of the new graph - """ - createSubgraph(parentPath: String!, nodes: [String!]!, newPath: String!, overwrite: Boolean!): String! - """ - (Experimental) Creates search index. - """ - createIndex(path: String!, indexSpec: IndexSpecInput, inRam: Boolean!): Boolean! + Permanently delete a stored graph from the server. + Requires WRITE on the graph and on its parent namespace. + """ + deleteGraph( + """ + Graph path relative to the root namespace. + """ + path: String! + ): Boolean! + """ + Create a new empty graph at the given path. Errors if a graph already + exists there. + Requires WRITE on the parent namespace. + """ + newGraph( + """ + Destination path relative to the root namespace. + """ + path: String!, graphType: GraphType! + ): Boolean! + """ + Move a stored graph to a new path on the server (rename / relocate). + Atomic: copies first, then deletes the source. + Requires WRITE on the source graph and on both the source and + destination namespaces. + """ + moveGraph( + """ + Current graph path relative to the root namespace. + """ + path: String!, + """ + Destination path relative to the root namespace. + """ + newPath: String!, + """ + If true, allow replacing an existing graph at `newPath`; defaults to false. + """ + overwrite: Boolean + ): Boolean! + """ + Duplicate a stored graph to a new path on the server. Source is + preserved. + Requires READ on the source graph and WRITE on the destination namespace. + """ + copyGraph( + """ + Source graph path relative to the root namespace. + """ + path: String!, + """ + Destination path relative to the root namespace. + """ + newPath: String!, + """ + If true, allow replacing an existing graph at `newPath`; defaults to false. + """ + overwrite: Boolean + ): Boolean! + """ + Stream-upload a graph file using GraphQL multipart upload. The client + sends the file directly; the server stores it under `path`. + Requires WRITE on the destination namespace. + """ + uploadGraph( + """ + Destination path relative to the root namespace. + """ + path: String!, + """ + Multipart upload of the serialised graph file. + """ + graph: Upload!, + """ + If true, replace any graph already at `path`. + """ + overwrite: Boolean! + ): String! + """ + Send a serialised graph as a base64-encoded string in the request + body. Use for smaller graphs where multipart upload is overkill. + Requires WRITE on the destination namespace. + """ + sendGraph( + """ + Destination path relative to the root namespace. + """ + path: String!, + """ + Base64-encoded bincode of the serialised graph. + """ + graph: String!, + """ + If true, replace any graph already at `path`. + """ + overwrite: Boolean! + ): String! + """ + Persist a subgraph of an existing stored graph as a new graph. The + subgraph contains only the listed nodes and edges between them. + Requires READ on the parent graph and WRITE on the destination namespace. + """ + createSubgraph( + """ + Source graph path relative to the root namespace. + """ + parentPath: String!, + """ + Node ids to include in the subgraph. + """ + nodes: [NodeId!]!, + """ + Destination path relative to the root namespace. + """ + newPath: String!, + """ + If true, replace any graph already at `newPath`. + """ + overwrite: Boolean! + ): String! + """ + (Experimental) Build a Tantivy search index for a stored graph so it + can be queried via `searchNodes` / `searchEdges`. + Requires WRITE on the graph. + """ + createIndex( + """ + Graph path relative to the root namespace. + """ + path: String!, + """ + Optional spec selecting which node/edge property fields to index. Omit to index a default set. + """ + indexSpec: IndexSpecInput, + """ + If true, build the index in memory (faster but lost on restart). If false, persist to disk. + """ + inRam: Boolean! + ): Boolean! } +""" +Write-side handle for a single edge — returned from `addEdge` or +`MutableGraph.edge`. Supports adding updates, deletions, and attaching +or updating metadata. +""" type MutableEdge { """ Use to check if adding the edge was successful. @@ -1827,83 +2790,283 @@ type MutableEdge { """ dst: MutableNode! """ - Mark the edge as deleted at time time. - """ - delete(time: Int!, layer: String): Boolean! - """ - Add metadata to the edge (errors if the value already exists). - - If this is called after add_edge, the layer is inherited from the add_edge and does not - need to be specified again. - """ - addMetadata(properties: [PropertyInput!]!, layer: String): Boolean! - """ - Update metadata of the edge (existing values are overwritten). - - If this is called after add_edge, the layer is inherited from the add_edge and does not - need to be specified again. - """ - updateMetadata(properties: [PropertyInput!]!, layer: String): Boolean! - """ - Add temporal property updates to the edge. - - If this is called after add_edge, the layer is inherited from the add_edge and does not - need to be specified again. - """ - addUpdates(time: Int!, properties: [PropertyInput!], layer: String): Boolean! + Mark this edge as deleted at the given time. Persistent graphs treat this + as a tombstone (the edge becomes invalid from `time` onwards); event + graphs simply log the deletion event. + """ + delete( + """ + Time of the deletion. + """ + time: TimeInput!, + """ + Optional layer name. If omitted, uses the layer the edge was originally added on (when called after `addEdge`). + """ + layer: String + ): Boolean! + """ + Add metadata to this edge. Errors if any of the keys already exists — + use `updateMetadata` to overwrite. If this is called after `addEdge`, + the layer is inherited and does not need to be specified again. + """ + addMetadata( + """ + List of `{key, value}` pairs to set as metadata. + """ + properties: [PropertyInput!]!, + """ + Optional layer name; defaults to the inherited layer. + """ + layer: String + ): Boolean! + """ + Update metadata of this edge, overwriting any existing values for the + given keys. If this is called after `addEdge`, the layer is inherited + and does not need to be specified again. + """ + updateMetadata( + """ + List of `{key, value}` pairs to upsert. + """ + properties: [PropertyInput!]!, + """ + Optional layer name; defaults to the inherited layer. + """ + layer: String + ): Boolean! + """ + Append a property update to this edge at a specific time. If called + after `addEdge`, the layer is inherited and does not need to be + specified again. + """ + addUpdates( + """ + Time of the update. + """ + time: TimeInput!, + """ + Optional `{key, value}` pairs attached to the event. + """ + properties: [PropertyInput!], + """ + Optional layer name; defaults to the inherited layer. + """ + layer: String + ): Boolean! } +""" +Write-enabled handle for a graph. Obtained by calling `updateGraph(path)` +on the root query with a path you have write permission for. Supports +adding nodes and edges (individually or in batches), attaching +properties/metadata, and looking up mutable `node`/`edge` handles. Use the +read-only `graph(path)` resolver for queries. +""" type MutableGraph { """ - Get the non-mutable graph. - """ - graph: Graph! - """ - Get mutable existing node. - """ - node(name: String!): MutableNode - """ - Add a new node or add updates to an existing node. - """ - addNode(time: Int!, name: String!, properties: [PropertyInput!], nodeType: String): MutableNode! - """ - Create a new node or fail if it already exists. - """ - createNode(time: Int!, name: String!, properties: [PropertyInput!], nodeType: String): MutableNode! - """ - Add a batch of nodes. - """ - addNodes(nodes: [NodeAddition!]!): Boolean! - """ - Get a mutable existing edge. - """ - edge(src: String!, dst: String!): MutableEdge - """ - Add a new edge or add updates to an existing edge. - """ - addEdge(time: Int!, src: String!, dst: String!, properties: [PropertyInput!], layer: String): MutableEdge! - """ - Add a batch of edges. - """ - addEdges(edges: [EdgeAddition!]!): Boolean! - """ - Mark an edge as deleted (creates the edge if it did not exist). - """ - deleteEdge(time: Int!, src: String!, dst: String!, layer: String): MutableEdge! - """ - Add temporal properties to graph. - """ - addProperties(t: Int!, properties: [PropertyInput!]!): Boolean! - """ - Add metadata to graph (errors if the property already exists). - """ - addMetadata(properties: [PropertyInput!]!): Boolean! - """ - Update metadata of the graph (overwrites existing values). - """ - updateMetadata(properties: [PropertyInput!]!): Boolean! + Read-only view of this graph — identical to what you'd get from + `graph(path:)` on the query root. Use this when you want to compose + queries on the graph you've just mutated. `graphType` lets you + re-interpret the graph at query time (see `graph(path:)` for + semantics); defaults to the stored graph's native type. + """ + graph( + """ + Optional override for graph semantics — `EVENT` treats every update as a point-in-time event, `PERSISTENT` carries values forward until overwritten or deleted. Defaults to the stored graph's native type. + """ + graphType: GraphType + ): Graph! + """ + Look up an existing node for mutation. Returns null if the node doesn't + exist; use `addNode` or `createNode` to create one. + """ + node( + """ + Node id. + """ + name: NodeId! + ): MutableNode + """ + Add a new node or append an update to an existing one. Upsert semantics: + no error if the node already exists — properties and type are merged. + """ + addNode( + """ + Time of the event. + """ + time: TimeInput!, + """ + Node id. + """ + name: NodeId!, + """ + Optional property updates attached to this event. + """ + properties: [PropertyInput!], + """ + Optional node type to assign. If provided, sets the node's type at this event. + """ + nodeType: String, + """ + Optional layer name. If omitted, the default layer is used. + """ + layer: String + ): MutableNode! + """ + Create a new node or fail if it already exists. Strict alternative to + `addNode` — use this when you want to detect collisions. + """ + createNode( + """ + Time of the create event. + """ + time: TimeInput!, + """ + Node id. + """ + name: NodeId!, + """ + Optional property updates attached to this event. + """ + properties: [PropertyInput!], + """ + Optional node type to assign. If provided, sets the node's type at this event. + """ + nodeType: String, + """ + Optional layer name. If omitted, the default layer is used. + """ + layer: String + ): MutableNode! + """ + Batch-add multiple nodes in one call. For each `NodeAddition`, applies every + update it carries (time/properties pairs), then optionally sets its node type + and adds any metadata. On partial failure, returns a `BatchFailures` error + describing which entries failed and why; otherwise returns true. + """ + addNodes( + """ + List of `NodeAddition` inputs, each specifying a node's name, optional type, layer, per-timestamp updates, and metadata. + """ + nodes: [NodeAddition!]! + ): Boolean! + """ + Look up an existing edge for mutation. Returns null if no such edge exists. + """ + edge( + """ + Source node id. + """ + src: NodeId!, + """ + Destination node id. + """ + dst: NodeId! + ): MutableEdge + """ + Add a new edge or append an update to an existing one. Upsert semantics: + safe to call on an edge that already exists — creates missing endpoints if + needed. + """ + addEdge( + """ + Time of the event. + """ + time: TimeInput!, + """ + Source node id. + """ + src: NodeId!, + """ + Destination node id. + """ + dst: NodeId!, + """ + Optional property updates attached to this event. + """ + properties: [PropertyInput!], + """ + Optional layer name. If omitted, the default layer is used. + """ + layer: String + ): MutableEdge! + """ + Batch-add multiple edges in one call. For each `EdgeAddition`, applies every + update it carries, then adds any metadata. On partial failure, returns a + `BatchFailures` error describing which entries failed; otherwise returns + true. + """ + addEdges( + """ + List of `EdgeAddition` inputs, each specifying an edge's `src`, `dst`, optional layer, per-timestamp updates, and metadata. + """ + edges: [EdgeAddition!]! + ): Boolean! + """ + Mark an edge as deleted at the given time. Persistent graphs treat this + as a tombstone (the edge becomes invalid from `time` onwards); event + graphs simply log the deletion event. Creates the edge first if it did + not exist. + """ + deleteEdge( + """ + Time of the deletion. + """ + time: TimeInput!, + """ + Source node id. + """ + src: NodeId!, + """ + Destination node id. + """ + dst: NodeId!, + """ + Optional layer name. If omitted, the default layer is used. + """ + layer: String + ): MutableEdge! + """ + Add temporal properties to the graph itself (not a node or edge). Each + call records a property update at `t`. + """ + addProperties( + """ + Time of the update. + """ + t: TimeInput!, + """ + List of `{key, value}` pairs to set. + """ + properties: [PropertyInput!]! + ): Boolean! + """ + Add metadata to the graph itself. Errors if any of the keys already + exists — use `updateMetadata` to overwrite. + """ + addMetadata( + """ + List of `{key, value}` pairs to set as metadata. + """ + properties: [PropertyInput!]! + ): Boolean! + """ + Update metadata of the graph itself, overwriting any existing values for + the given keys. + """ + updateMetadata( + """ + List of `{key, value}` pairs to upsert. + """ + properties: [PropertyInput!]! + ): Boolean! } +""" +Write-side handle for a single node — returned from `addNode`, `createNode`, +or `MutableGraph.node`. Supports adding updates, setting node type, and +attaching or updating metadata. +""" type MutableNode { """ Use to check if adding the node was successful. @@ -1914,32 +3077,90 @@ type MutableNode { """ node: Node! """ - Add metadata to the node (errors if the property already exists). - """ - addMetadata(properties: [PropertyInput!]!): Boolean! - """ - Set the node type (errors if the node already has a non-default type). - """ - setNodeType(newType: String!): Boolean! - """ - Update metadata of the node (overwrites existing property values). - """ - updateMetadata(properties: [PropertyInput!]!): Boolean! - """ - Add temporal property updates to the node. - """ - addUpdates(time: Int!, properties: [PropertyInput!]): Boolean! + Add metadata to this node. Errors if any of the keys already exists — + use `updateMetadata` to overwrite. + """ + addMetadata( + """ + List of `{key, value}` pairs to set as metadata. + """ + properties: [PropertyInput!]! + ): Boolean! + """ + Set this node's type. Errors if the node already has a non-default + type and you're trying to change it. + """ + setNodeType( + """ + Node-type name to assign. + """ + newType: String! + ): Boolean! + """ + Update metadata of this node, overwriting any existing values for the + given keys. + """ + updateMetadata( + """ + List of `{key, value}` pairs to upsert. + """ + properties: [PropertyInput!]! + ): Boolean! + """ + Append a property update to this node at a specific time. + """ + addUpdates( + """ + Time of the update. + """ + time: TimeInput!, + """ + Optional `{key, value}` pairs attached to the event. + """ + properties: [PropertyInput!], + """ + Optional layer name. If omitted, the default layer is used. + """ + layer: String + ): Boolean! } type MutationPlugin { NoOps: String! } +""" +A directory-like container for graphs and nested namespaces. Graphs are +addressed by path (e.g. `"team/project/graph"`), and every segment except +the last is a namespace. Use to browse what's stored on the server without +loading any graph data. +""" type Namespace { + """ + Graphs directly inside this namespace (excludes graphs in nested + namespaces). Filtered by the caller's permissions — only graphs the + caller is allowed to see are returned. + """ graphs: CollectionOfMetaGraph! + """ + Path of this namespace relative to the root namespace. Empty string for + the root namespace itself. + """ path: String! + """ + Parent namespace, or null at the root. + """ parent: Namespace + """ + Sub-namespaces directly inside this one (one level down, not recursive). + Filtered by permissions. + """ children: CollectionOfNamespace! + """ + Everything in this namespace — sub-namespaces and graphs — as a single + heterogeneous collection. Sub-namespaces are listed before graphs. + Filtered by permissions. + """ items: CollectionOfNamespacedItem! } @@ -1950,9 +3171,10 @@ Raphtory graph node. """ type Node { """ - Returns the unique id of the node. + Returns the unique id of the node — `String` for string-indexed + graphs, non-negative `Int` for integer-indexed graphs. """ - id: String! + id: NodeId! """ Returns the name of the node. """ @@ -1964,19 +3186,39 @@ type Node { """ Return a view of node containing all layers specified. """ - layers(names: [String!]!): Node! + layers( + """ + Layer names to include. + """ + names: [String!]! + ): Node! """ Returns a collection containing nodes belonging to all layers except the excluded list of layers. """ - excludeLayers(names: [String!]!): Node! + excludeLayers( + """ + Layer names to exclude. + """ + names: [String!]! + ): Node! """ Returns a collection containing nodes belonging to the specified layer. """ - layer(name: String!): Node! + layer( + """ + Layer name to include. + """ + name: String! + ): Node! """ Returns a collection containing nodes belonging to all layers except the excluded layer. """ - excludeLayer(name: String!): Node! + excludeLayer( + """ + Layer name to exclude. + """ + name: String! + ): Node! """ Creates a WindowSet with the specified window size and optional step using a rolling window. @@ -1988,7 +3230,20 @@ type Node { Note that passing a step larger than window while alignment_unit is not "Unaligned" may lead to some entries appearing before the start of the first window and/or after the end of the last window (i.e. not included in any window). """ - rolling(window: WindowDuration!, step: WindowDuration, alignmentUnit: AlignmentUnit): NodeWindowSet! + rolling( + """ + Width of each window. Pass either `{epoch: }` for a discrete number of milliseconds (e.g. `{epoch: 1000}` for 1 second), or `{duration: }` for a calendar duration (e.g. `{duration: 1 day}` or `{duration: 2 hours and 30 minutes}`). + """ + window: WindowDuration!, + """ + Optional gap between the start of one window and the start of the next. Accepts the same `{epoch: }` or `{duration: }` values as `window`. Defaults to `window` — i.e. windows touch end-to-end with no overlap and no gap. + """ + step: WindowDuration, + """ + Optional anchor for window boundaries — pass `Unaligned` to disable, or one of the unit values (e.g. `Day`, `Hour`, `Minute`) to align edges to that calendar unit. Defaults to the smallest unit present in `step` (or `window` if no step is set). + """ + alignmentUnit: AlignmentUnit + ): NodeWindowSet! """ Creates a WindowSet with the specified step size using an expanding window. @@ -1998,15 +3253,38 @@ type Node { If unspecified (i.e. by default), alignment is done on the smallest unit of time in the step. e.g. "1 month and 1 day" will align at the start of the day. """ - expanding(step: WindowDuration!, alignmentUnit: AlignmentUnit): NodeWindowSet! + expanding( + """ + How much the window grows by on each step. Pass either `{epoch: }` for a discrete number of milliseconds, or `{duration: }` for a calendar duration (e.g. `{duration: 1 day}`). + """ + step: WindowDuration!, + """ + Optional anchor for window boundaries — pass `Unaligned` to disable, or one of the unit values (e.g. `Day`, `Hour`, `Minute`) to align edges to that calendar unit. Defaults to the smallest unit present in `step`. + """ + alignmentUnit: AlignmentUnit + ): NodeWindowSet! """ Create a view of the node including all events between the specified start (inclusive) and end (exclusive). """ - window(start: TimeInput!, end: TimeInput!): Node! + window( + """ + Inclusive lower bound. + """ + start: TimeInput!, + """ + Exclusive upper bound. + """ + end: TimeInput! + ): Node! """ Create a view of the node including all events at a specified time. """ - at(time: TimeInput!): Node! + at( + """ + Instant to pin the view to. + """ + time: TimeInput! + ): Node! """ Create a view of the node including all events at the latest time. """ @@ -2014,7 +3292,12 @@ type Node { """ Create a view of the node including all events that are valid at the specified time. """ - snapshotAt(time: TimeInput!): Node! + snapshotAt( + """ + Instant at which entities must be valid. + """ + time: TimeInput! + ): Node! """ Create a view of the node including all events that are valid at the latest time. """ @@ -2022,23 +3305,52 @@ type Node { """ Create a view of the node including all events before specified end time (exclusive). """ - before(time: TimeInput!): Node! + before( + """ + Exclusive upper bound. + """ + time: TimeInput! + ): Node! """ Create a view of the node including all events after the specified start time (exclusive). """ - after(time: TimeInput!): Node! + after( + """ + Exclusive lower bound. + """ + time: TimeInput! + ): Node! """ Shrink a Window to a specified start and end time, if these are earlier and later than the current start and end respectively. """ - shrinkWindow(start: TimeInput!, end: TimeInput!): Node! + shrinkWindow( + """ + Proposed new start (TimeInput); ignored if it would widen the window. + """ + start: TimeInput!, + """ + Proposed new end (TimeInput); ignored if it would widen the window. + """ + end: TimeInput! + ): Node! """ Set the start of the window to the larger of a specified start time and self.start(). """ - shrinkStart(start: TimeInput!): Node! + shrinkStart( + """ + Proposed new start (TimeInput); ignored if it would widen the window. + """ + start: TimeInput! + ): Node! """ Set the end of the window to the smaller of a specified end and self.end(). """ - shrinkEnd(end: TimeInput!): Node! + shrinkEnd( + """ + Proposed new end (TimeInput); ignored if it would widen the window. + """ + end: TimeInput! + ): Node! applyViews(views: [NodeViewCollection!]!): Node! """ Returns the earliest time that the node exists. @@ -2131,9 +3443,9 @@ type Node { input NodeAddition { """ - Name. + Node id (string or non-negative integer). """ - name: String! + name: NodeId! """ Node type. """ @@ -2146,6 +3458,10 @@ input NodeAddition { Updates. """ updates: [TemporalPropertyInput!] + """ + Layer. + """ + layer: String } enum NodeField { @@ -2340,6 +3656,13 @@ input NodeFilter @oneOf { isActive: Boolean } +""" +Identifier for a node — either a string (`"alice"`) or a non-negative +integer (`42`). Use whichever form matches how the graph was indexed +when nodes were added. +""" +scalar NodeId + """ Restricts node evaluation to one or more layers and applies a nested `NodeFilter`. @@ -2356,12 +3679,27 @@ input NodeLayersExpr { expr: NodeFilter! } +""" +Describes nodes of a specific type in a graph — its property keys and +observed value types (and, for string-valued properties, the set of +distinct values seen). One `NodeSchema` per node type. +""" type NodeSchema { + """ + The node type this schema describes (e.g. `"person"`, `"org"`). + Falls back to the default node type for untyped nodes. + """ typeName: String! """ - Returns the list of property schemas for this node + Property schemas seen on nodes of this type — one entry per property key + ever set on a node of this type, with its observed `PropertyType` and (for + string-valued properties) the set of distinct values. """ properties: [PropertySchema!]! + """ + Metadata schemas seen on nodes of this type — like `properties`, but + covering metadata fields rather than temporal properties. + """ metadata: [PropertySchema!]! } @@ -2497,7 +3835,16 @@ input NodeWindowExpr { expr: NodeFilter! } +""" +A lazy sequence of per-window views of a single node, produced by +`node.rolling` / `node.expanding`. Each entry is the node as it exists in +that window. +""" type NodeWindowSet { + """ + Number of windows in this set. Materialising all windows is expensive for + large graphs — prefer `page` over `list` when iterating. + """ count: Int! """ Fetch one page with a number of items up to a specified limit, optionally offset by a specified amount. @@ -2506,10 +3853,32 @@ type NodeWindowSet { For example, if page(5, 2, 1) is called, a page with 5 items, offset by 11 items (2 pages of 5 + 1), will be returned. """ - page(limit: Int!, offset: Int, pageIndex: Int): [Node!]! + page( + """ + Maximum number of items to return on this page. + """ + limit: Int!, + """ + Extra items to skip on top of `pageIndex` paging (default 0). + """ + offset: Int, + """ + Zero-based page number; multiplies `limit` to determine where to start (default 0). + """ + pageIndex: Int + ): [Node!]! + """ + Materialise every window as a list. Rejected by the server when bulk list + endpoints are disabled; use `page` for paginated access instead. + """ list: [Node!]! } +""" +A lazy collection of nodes from a graph view. Supports all the same view +transforms as `Graph` (window, layer, filter, ...) plus pagination and +sorting. Iterated via `list` / `page` / `ids` / `count`. +""" type Nodes { """ Return a view of the nodes containing only the default edge layer. @@ -2518,19 +3887,39 @@ type Nodes { """ Return a view of the nodes containing all layers specified. """ - layers(names: [String!]!): Nodes! + layers( + """ + Layer names to include. + """ + names: [String!]! + ): Nodes! """ Return a view of the nodes containing all layers except those specified. """ - excludeLayers(names: [String!]!): Nodes! + excludeLayers( + """ + Layer names to exclude. + """ + names: [String!]! + ): Nodes! """ Return a view of the nodes containing the specified layer. """ - layer(name: String!): Nodes! + layer( + """ + Layer name to include. + """ + name: String! + ): Nodes! """ Return a view of the nodes containing all layers except those specified. """ - excludeLayer(name: String!): Nodes! + excludeLayer( + """ + Layer name to exclude. + """ + name: String! + ): Nodes! """ Creates a WindowSet with the specified window size and optional step using a rolling window. @@ -2542,7 +3931,20 @@ type Nodes { Note that passing a step larger than window while alignment_unit is not "Unaligned" may lead to some entries appearing before the start of the first window and/or after the end of the last window (i.e. not included in any window). """ - rolling(window: WindowDuration!, step: WindowDuration, alignmentUnit: AlignmentUnit): NodesWindowSet! + rolling( + """ + Width of each window. Pass either `{epoch: }` for a discrete number of milliseconds (e.g. `{epoch: 1000}` for 1 second), or `{duration: }` for a calendar duration (e.g. `{duration: 1 day}` or `{duration: 2 hours and 30 minutes}`). + """ + window: WindowDuration!, + """ + Optional gap between the start of one window and the start of the next. Accepts the same `{epoch: }` or `{duration: }` values as `window`. Defaults to `window` — i.e. windows touch end-to-end with no overlap and no gap. + """ + step: WindowDuration, + """ + Optional anchor for window boundaries — pass `Unaligned` to disable, or one of the unit values (e.g. `Day`, `Hour`, `Minute`) to align edges to that calendar unit. Defaults to the smallest unit present in `step` (or `window` if no step is set). + """ + alignmentUnit: AlignmentUnit + ): NodesWindowSet! """ Creates a WindowSet with the specified step size using an expanding window. @@ -2552,15 +3954,38 @@ type Nodes { If unspecified (i.e. by default), alignment is done on the smallest unit of time in the step. e.g. "1 month and 1 day" will align at the start of the day. """ - expanding(step: WindowDuration!, alignmentUnit: AlignmentUnit): NodesWindowSet! + expanding( + """ + How much the window grows by on each step. Pass either `{epoch: }` for a discrete number of milliseconds, or `{duration: }` for a calendar duration (e.g. `{duration: 1 day}`). + """ + step: WindowDuration!, + """ + Optional anchor for window boundaries — pass `Unaligned` to disable, or one of the unit values (e.g. `Day`, `Hour`, `Minute`) to align edges to that calendar unit. Defaults to the smallest unit present in `step`. + """ + alignmentUnit: AlignmentUnit + ): NodesWindowSet! """ Create a view of the node including all events between the specified start (inclusive) and end (exclusive). """ - window(start: TimeInput!, end: TimeInput!): Nodes! + window( + """ + Inclusive lower bound. + """ + start: TimeInput!, + """ + Exclusive upper bound. + """ + end: TimeInput! + ): Nodes! """ Create a view of the nodes including all events at a specified time. """ - at(time: TimeInput!): Nodes! + at( + """ + Instant to pin the view to. + """ + time: TimeInput! + ): Nodes! """ Create a view of the nodes including all events at the latest time. """ @@ -2568,7 +3993,12 @@ type Nodes { """ Create a view of the nodes including all events that are valid at the specified time. """ - snapshotAt(time: TimeInput!): Nodes! + snapshotAt( + """ + Instant at which entities must be valid. + """ + time: TimeInput! + ): Nodes! """ Create a view of the nodes including all events that are valid at the latest time. """ @@ -2576,29 +4006,82 @@ type Nodes { """ Create a view of the nodes including all events before specified end time (exclusive). """ - before(time: TimeInput!): Nodes! + before( + """ + Exclusive upper bound. + """ + time: TimeInput! + ): Nodes! """ Create a view of the nodes including all events after the specified start time (exclusive). """ - after(time: TimeInput!): Nodes! + after( + """ + Exclusive lower bound. + """ + time: TimeInput! + ): Nodes! """ Shrink both the start and end of the window. """ - shrinkWindow(start: TimeInput!, end: TimeInput!): Nodes! + shrinkWindow( + """ + Proposed new start (TimeInput); ignored if it would widen the window. + """ + start: TimeInput!, + """ + Proposed new end (TimeInput); ignored if it would widen the window. + """ + end: TimeInput! + ): Nodes! """ Set the start of the window to the larger of a specified start time and self.start(). """ - shrinkStart(start: TimeInput!): Nodes! + shrinkStart( + """ + Proposed new start (TimeInput); ignored if it would widen the window. + """ + start: TimeInput! + ): Nodes! """ Set the end of the window to the smaller of a specified end and self.end(). """ - shrinkEnd(end: TimeInput!): Nodes! + shrinkEnd( + """ + Proposed new end (TimeInput); ignored if it would widen the window. + """ + end: TimeInput! + ): Nodes! """ Filter nodes by node type. """ - typeFilter(nodeTypes: [String!]!): Nodes! - applyViews(views: [NodesViewCollection!]!): Nodes! - sorted(sortBys: [NodeSortBy!]!): Nodes! + typeFilter( + """ + Node-type names to keep. + """ + nodeTypes: [String!]! + ): Nodes! + """ + Apply a list of views in the given order and return the resulting nodes + collection. Lets callers compose window, layer, filter, and snapshot + operations in a single call. + """ + applyViews( + """ + Ordered list of view operations; each entry is a one-of variant (`window`, `layer`, `filter`, etc.) applied to the running result. + """ + views: [NodesViewCollection!]! + ): Nodes! + """ + Sort the nodes. Multiple criteria are applied lexicographically (ties on the + first key break to the second, etc.). + """ + sorted( + """ + Ordered list of sort keys. Each entry chooses exactly one of `id` / `time` / `property`, with an optional `reverse: true` to flip order. + """ + sortBys: [NodeSortBy!]! + ): Nodes! """ Returns the start time of the window. Errors if there is no window. """ @@ -2607,6 +4090,9 @@ type Nodes { Returns the end time of the window. Errors if there is no window. """ end: EventTime! + """ + Number of nodes in the current view. + """ count: Int! """ Fetch one page with a number of items up to a specified limit, optionally offset by a specified amount. @@ -2615,20 +4101,78 @@ type Nodes { For example, if page(5, 2, 1) is called, a page with 5 items, offset by 11 items (2 pages of 5 + 1), will be returned. """ - page(limit: Int!, offset: Int, pageIndex: Int): [Node!]! + page( + """ + Maximum number of items to return on this page. + """ + limit: Int!, + """ + Extra items to skip on top of `pageIndex` paging (default 0). + """ + offset: Int, + """ + Zero-based page number; multiplies `limit` to determine where to start (default 0). + """ + pageIndex: Int + ): [Node!]! + """ + Materialise every node in the view. Rejected by the server when bulk list + endpoints are disabled; use `page` for paginated access instead. + """ list: [Node!]! """ - Returns a view of the node ids. + Every node's id (name) as a flat list of strings. Rejected by the server when + bulk list endpoints are disabled. """ ids: [String!]! """ - Returns a filtered view that applies to list down the chain - """ - filter(expr: NodeFilter!): Nodes! + Narrow the collection to nodes matching `expr`. The filter sticks to the + returned view — every subsequent traversal through these nodes (their + neighbours, edges, properties) continues to see the filtered scope. + + Useful when you want one scoping rule to apply across the whole query. + E.g. restricting everything to a specific week: + + ```text + nodes { filter(expr: {window: {start: 1234, end: 5678}}) { + list { neighbours { list { name } } } # neighbours still windowed + } } + ``` + + Contrast with `select`, which applies here and is not carried through. """ - Returns filtered list of nodes + filter( + """ + Composite node filter (by name, property, type, etc.). + """ + expr: NodeFilter! + ): Nodes! """ - select(expr: NodeFilter!): Nodes! + Narrow the collection to nodes matching `expr`, but only at this step — + subsequent traversals out of these nodes see the unfiltered graph again. + + Useful when you want different scopes at different hops. E.g. nodes + active on Monday, then their neighbours active on Tuesday, then *those* + neighbours active on Wednesday: + + ```text + nodes { select(expr: {window: {...monday...}}) { + list { neighbours { select(expr: {window: {...tuesday...}}) { + list { neighbours { select(expr: {window: {...wednesday...}}) { + list { name } + } } } + } } } + } } + ``` + + Contrast with `filter`, which persists the scope through subsequent ops. + """ + select( + """ + Composite node filter (by name, property, type, etc.). + """ + expr: NodeFilter! + ): Nodes! } input NodesViewCollection @oneOf { @@ -2698,7 +4242,16 @@ input NodesViewCollection @oneOf { typeFilter: [String!] } +""" +A lazy sequence of per-window node collections, produced by +`nodes.rolling` / `nodes.expanding`. Each entry is a `Nodes` collection +as it exists in that window. +""" type NodesWindowSet { + """ + Number of windows in this set. Materialising all windows is expensive for + large graphs — prefer `page` over `list` when iterating. + """ count: Int! """ Fetch one page with a number of items up to a specified limit, optionally offset by a specified amount. @@ -2707,7 +4260,24 @@ type NodesWindowSet { For example, if page(5, 2, 1) is called, a page with 5 items, offset by 11 items (2 pages of 5 + 1), will be returned. """ - page(limit: Int!, offset: Int, pageIndex: Int): [Nodes!]! + page( + """ + Maximum number of items to return on this page. + """ + limit: Int!, + """ + Extra items to skip on top of `pageIndex` paging (default 0). + """ + offset: Int, + """ + Zero-based page number; multiplies `limit` to determine where to start (default 0). + """ + pageIndex: Int + ): [Nodes!]! + """ + Materialise every window as a list. Rejected by the server when bulk list + endpoints are disabled; use `page` for paginated access instead. + """ list: [Nodes!]! } @@ -2738,23 +4308,49 @@ type PagerankOutput { rank: Float! } +""" +A collection of nodes anchored to a source node — the result of traversals +like `node.neighbours`, `inNeighbours`, or `outNeighbours`. Supports all +the usual view transforms (window, layer, filter, ...) and can be chained +to walk further hops. +""" type PathFromNode { """ Returns a view of PathFromNode containing the specified layer, errors if the layer does not exist. """ - layers(names: [String!]!): PathFromNode! + layers( + """ + Layer names to include. + """ + names: [String!]! + ): PathFromNode! """ Return a view of PathFromNode containing all layers except the specified excluded layers, errors if any of the layers do not exist. """ - excludeLayers(names: [String!]!): PathFromNode! + excludeLayers( + """ + Layer names to exclude. + """ + names: [String!]! + ): PathFromNode! """ Return a view of PathFromNode containing the layer specified layer, errors if the layer does not exist. """ - layer(name: String!): PathFromNode! + layer( + """ + Layer name to include. + """ + name: String! + ): PathFromNode! """ Return a view of PathFromNode containing all layers except the specified excluded layers, errors if any of the layers do not exist. """ - excludeLayer(name: String!): PathFromNode! + excludeLayer( + """ + Layer name to exclude. + """ + name: String! + ): PathFromNode! """ Creates a WindowSet with the given window size and optional step using a rolling window. @@ -2766,7 +4362,20 @@ type PathFromNode { Note that passing a step larger than window while alignment_unit is not "Unaligned" may lead to some entries appearing before the start of the first window and/or after the end of the last window (i.e. not included in any window). """ - rolling(window: WindowDuration!, step: WindowDuration, alignmentUnit: AlignmentUnit): PathFromNodeWindowSet! + rolling( + """ + Width of each window. Pass either `{epoch: }` for a discrete number of milliseconds (e.g. `{epoch: 1000}` for 1 second), or `{duration: }` for a calendar duration (e.g. `{duration: 1 day}` or `{duration: 2 hours and 30 minutes}`). + """ + window: WindowDuration!, + """ + Optional gap between the start of one window and the start of the next. Accepts the same `{epoch: }` or `{duration: }` values as `window`. Defaults to `window` — i.e. windows touch end-to-end with no overlap and no gap. + """ + step: WindowDuration, + """ + Optional anchor for window boundaries — pass `Unaligned` to disable, or one of the unit values (e.g. `Day`, `Hour`, `Minute`) to align edges to that calendar unit. Defaults to the smallest unit present in `step` (or `window` if no step is set). + """ + alignmentUnit: AlignmentUnit + ): PathFromNodeWindowSet! """ Creates a WindowSet with the given step size using an expanding window. @@ -2776,15 +4385,38 @@ type PathFromNode { If unspecified (i.e. by default), alignment is done on the smallest unit of time in the step. e.g. "1 month and 1 day" will align at the start of the day. """ - expanding(step: WindowDuration!, alignmentUnit: AlignmentUnit): PathFromNodeWindowSet! + expanding( + """ + How much the window grows by on each step. Pass either `{epoch: }` for a discrete number of milliseconds, or `{duration: }` for a calendar duration (e.g. `{duration: 1 day}`). + """ + step: WindowDuration!, + """ + Optional anchor for window boundaries — pass `Unaligned` to disable, or one of the unit values (e.g. `Day`, `Hour`, `Minute`) to align edges to that calendar unit. Defaults to the smallest unit present in `step`. + """ + alignmentUnit: AlignmentUnit + ): PathFromNodeWindowSet! """ Create a view of the PathFromNode including all events between a specified start (inclusive) and end (exclusive). """ - window(start: TimeInput!, end: TimeInput!): PathFromNode! + window( + """ + Inclusive lower bound. + """ + start: TimeInput!, + """ + Exclusive upper bound. + """ + end: TimeInput! + ): PathFromNode! """ Create a view of the PathFromNode including all events at time. """ - at(time: TimeInput!): PathFromNode! + at( + """ + Instant to pin the view to. + """ + time: TimeInput! + ): PathFromNode! """ Create a view of the PathFromNode including all events that are valid at the latest time. """ @@ -2792,7 +4424,12 @@ type PathFromNode { """ Create a view of the PathFromNode including all events that are valid at the specified time. """ - snapshotAt(time: TimeInput!): PathFromNode! + snapshotAt( + """ + Instant at which entities must be valid. + """ + time: TimeInput! + ): PathFromNode! """ Create a view of the PathFromNode including all events at the latest time. """ @@ -2800,27 +4437,61 @@ type PathFromNode { """ Create a view of the PathFromNode including all events before the specified end (exclusive). """ - before(time: TimeInput!): PathFromNode! + before( + """ + Exclusive upper bound. + """ + time: TimeInput! + ): PathFromNode! """ Create a view of the PathFromNode including all events after the specified start (exclusive). """ - after(time: TimeInput!): PathFromNode! + after( + """ + Exclusive lower bound. + """ + time: TimeInput! + ): PathFromNode! """ Shrink both the start and end of the window. """ - shrinkWindow(start: TimeInput!, end: TimeInput!): PathFromNode! + shrinkWindow( + """ + Proposed new start (TimeInput); ignored if it would widen the window. + """ + start: TimeInput!, + """ + Proposed new end (TimeInput); ignored if it would widen the window. + """ + end: TimeInput! + ): PathFromNode! """ Set the start of the window to the larger of the specified start and self.start(). """ - shrinkStart(start: TimeInput!): PathFromNode! + shrinkStart( + """ + Proposed new start (TimeInput); ignored if it would widen the window. + """ + start: TimeInput! + ): PathFromNode! """ Set the end of the window to the smaller of the specified end and self.end(). """ - shrinkEnd(end: TimeInput!): PathFromNode! + shrinkEnd( + """ + Proposed new end (TimeInput); ignored if it would widen the window. + """ + end: TimeInput! + ): PathFromNode! """ - Filter nodes by type. + Narrow this path to neighbours whose node type is in the given set. """ - typeFilter(nodeTypes: [String!]!): PathFromNode! + typeFilter( + """ + Node types to keep. + """ + nodeTypes: [String!]! + ): PathFromNode! """ Returns the earliest time that this PathFromNode is valid or None if the PathFromNode is valid for all times. """ @@ -2829,6 +4500,9 @@ type PathFromNode { Returns the latest time that this PathFromNode is valid or None if the PathFromNode is valid for all times. """ end: EventTime! + """ + Number of neighbour nodes reachable from the source in this view. + """ count: Int! """ Fetch one page with a number of items up to a specified limit, optionally offset by a specified amount. @@ -2837,24 +4511,84 @@ type PathFromNode { For example, if page(5, 2, 1) is called, a page with 5 items, offset by 11 items (2 pages of 5 + 1), will be returned. """ - page(limit: Int!, offset: Int, pageIndex: Int): [Node!]! + page( + """ + Maximum number of items to return on this page. + """ + limit: Int!, + """ + Extra items to skip on top of `pageIndex` paging (default 0). + """ + offset: Int, + """ + Zero-based page number; multiplies `limit` to determine where to start (default 0). + """ + pageIndex: Int + ): [Node!]! + """ + Materialise every neighbour node in the path. Rejected by the server when + bulk list endpoints are disabled; use `page` for paginated access instead. + """ list: [Node!]! """ - Returns the node ids. + Every neighbour node's id (name) as a flat list of strings. Rejected by the + server when bulk list endpoints are disabled. """ ids: [String!]! """ Takes a specified selection of views and applies them in given order. """ - applyViews(views: [PathFromNodeViewCollection!]!): PathFromNode! + applyViews( + """ + Ordered list of view operations; each entry is a one-of variant (`window`, `layer`, `filter`, ...) applied to the running result. + """ + views: [PathFromNodeViewCollection!]! + ): PathFromNode! """ - Returns a filtered view that applies to list down the chain - """ - filter(expr: NodeFilter!): PathFromNode! + Narrow the neighbour set to nodes matching `expr`. The filter sticks to + the returned path — every subsequent traversal (further hops, edges, + properties) continues to see the filtered scope. + + Useful when you want one scoping rule to apply across the whole query. + E.g. restricting the whole traversal to a specific week: + + ```text + node(name: "A") { neighbours { filter(expr: {window: {...week...}}) { + list { neighbours { list { name } } } # further hops still windowed + } } } + ``` + + Contrast with `select`, which applies here and is not carried through. """ - Returns filtered list of neighbour nodes + filter( + """ + Composite node filter (by name, property, type, etc.). + """ + expr: NodeFilter! + ): PathFromNode! """ - select(expr: NodeFilter!): PathFromNode! + Narrow the neighbour set to nodes matching `expr`, but only at this hop + — further traversals out of these nodes see the unfiltered graph again. + + Useful when each hop needs a different scope. E.g. neighbours active on + Monday, then *their* neighbours active on Tuesday: + + ```text + node(name: "A") { neighbours { select(expr: {window: {...monday...}}) { + list { neighbours { select(expr: {window: {...tuesday...}}) { + list { name } + } } } + } } } + ``` + + Contrast with `filter`, which persists the scope through subsequent ops. + """ + select( + """ + Composite node filter (by name, property, type, etc.). + """ + expr: NodeFilter! + ): PathFromNode! } input PathFromNodeViewCollection @oneOf { @@ -2912,7 +4646,16 @@ input PathFromNodeViewCollection @oneOf { shrinkEnd: TimeInput } +""" +A lazy sequence of per-window neighbour sets, produced by +`neighbours.rolling` / `neighbours.expanding` (or the in/out variants). +Each entry is a `PathFromNode` scoped to that window. +""" type PathFromNodeWindowSet { + """ + Number of windows in this set. Materialising all windows is expensive for + large graphs — prefer `page` over `list` when iterating. + """ count: Int! """ Fetch one page with a number of items up to a specified limit, optionally offset by a specified amount. @@ -2921,7 +4664,24 @@ type PathFromNodeWindowSet { For example, if page(5, 2, 1) is called, a page with 5 items, offset by 11 items (2 pages of 5 + 1), will be returned. """ - page(limit: Int!, offset: Int, pageIndex: Int): [PathFromNode!]! + page( + """ + Maximum number of items to return on this page. + """ + limit: Int!, + """ + Extra items to skip on top of `pageIndex` paging (default 0). + """ + offset: Int, + """ + Zero-based page number; multiplies `limit` to determine where to start (default 0). + """ + pageIndex: Int + ): [PathFromNode!]! + """ + Materialise every window as a list. Rejected by the server when bulk list + endpoints are disabled; use `page` for paginated access instead. + """ list: [PathFromNode!]! } @@ -3055,29 +4815,72 @@ input PropCondition @oneOf { len: PropCondition } +""" +All temporal properties of an entity (metadata is exposed separately). +Look up individual properties via `get` / `contains`, enumerate via +`keys` / `values`, or drop into `temporal` for time-aware accessors. +""" type Properties { """ - Get property value matching the specified key. + Look up a single property by key. Returns null if no property with that key + exists in the current view. """ - get(key: String!): Property + get( + """ + The property name. + """ + key: String! + ): Property """ - Check if the key is in the properties. + Returns true if a property with the given key exists in this view. """ - contains(key: String!): Boolean! + contains( + """ + The property name to look up. + """ + key: String! + ): Boolean! """ - Return all property keys. + All property keys present in the current view. Does not include metadata + — metadata is exposed separately via the entity's `metadata` field. """ keys: [String!]! """ - Return all property values. + Snapshot of property values, one `{key, value}` entry per property. + """ + values( + """ + Optional whitelist. If provided, only properties with these keys are returned; if omitted or null, every property in the view is returned. + """ + keys: [String!] + ): [Property!]! + """ + The temporal-only view of these properties — excludes metadata (which has no + history) and lets you drill into per-key timelines and aggregates. """ - values(keys: [String!]): [Property!]! temporal: TemporalProperties! } +""" +A single `(key, value)` property reading at a point in the graph view. +The value is exposed both as a typed scalar (`value`) and as a +human-readable string (`asString`). +""" type Property { + """ + The property key (name). + """ key: String! + """ + The property value rendered as a human-readable string (e.g. `"10"`, `"hello"`, + `"2024-01-01T00:00:00Z"`). For programmatic access use `value`, which returns + a typed scalar. + """ asString: String! + """ + The property value as a typed `PropertyOutput` scalar — numbers come back as + numbers, booleans as booleans, strings as strings, etc. + """ value: PropertyOutput! } @@ -3128,9 +4931,25 @@ type PropertySchema { variants: [String!]! } +""" +A `(time, value)` pair — the output type of temporal-property accessors +that need to report *when* a value was observed (e.g. `min`, `max`, +`median`, `orderedDedupe`). +""" type PropertyTuple { + """ + The timestamp at which this value was recorded. + """ time: EventTime! + """ + The value rendered as a human-readable string. For programmatic access use + `value`, which returns a typed scalar. + """ asString: String! + """ + The value as a typed `PropertyOutput` scalar — numbers come back as numbers, + booleans as booleans, etc. + """ value: PropertyOutput! } @@ -3149,61 +4968,140 @@ type QueryPlugin { NoOps: String! } +""" +Top-level READ-only query root. Entry points for loading a graph +(`graph`, `graphMetadata`), browsing stored graphs (`namespaces`, +`namespace`, `root`), downloading a stored graph as a base64 blob +(`receiveGraph`), inspecting vectorised variants (`vectorisedGraph`), +and a few utility endpoints (`version`, `hello`, `plugins`). +""" type QueryRoot { """ - Hello world demo + Liveness check — returns a static "hello world" string. Useful for + smoke-testing that the GraphQL server is reachable. """ hello: String! """ - Returns a graph - """ - graph(path: String!): Graph! - """ - Update graph query, has side effects to update graph state - - Returns:: GqlMutableGraph - """ - updateGraph(path: String!): MutableGraph! - """ - Update graph query, has side effects to update graph state - - Returns:: GqlMutableGraph - """ - vectoriseGraph(path: String!, model: EmbeddingModel, nodes: Template, edges: Template): Boolean! - """ - Create vectorised graph in the format used for queries - - Returns:: GqlVectorisedGraph - """ - vectorisedGraph(path: String!): VectorisedGraph - """ - Returns all namespaces using recursive search - - Returns:: List of namespaces on root + Load a graph by path. Returns null if the graph doesn't exist or is + inaccessible. When a READ-scoped filter is attached to the caller's + permissions, that filter is applied before the graph is returned. + `graphType` lets you re-interpret the stored graph at query time — + e.g. read an event-stored graph through persistent semantics. Defaults + to the type the graph was created with. + Requires READ on the graph. + """ + graph( + """ + Graph path relative to the root namespace (e.g. `"master"` or `"team/project/graph"`). + """ + path: String!, + """ + Optional override for graph semantics — `EVENT` treats every update as a point-in-time event, `PERSISTENT` carries values forward until overwritten or deleted. Defaults to the stored graph's native type. + """ + graphType: GraphType + ): Graph + """ + Returns lightweight metadata for a graph (node/edge counts, + timestamps) without deserialising the full graph. Returns null if the + graph doesn't exist or is inaccessible. + Requires READ on the graph, or INTROSPECT on its parent namespace. + """ + graphMetadata( + """ + Graph path relative to the root namespace. + """ + path: String! + ): MetaGraph + """ + Open a graph for writing — returns a `MutableGraph` handle that can + add nodes/edges/properties/metadata. + Requires WRITE on the graph. + """ + updateGraph( + """ + Graph path relative to the root namespace. + """ + path: String! + ): MutableGraph! + """ + Compute and persist embeddings for the nodes and edges of a stored + graph so it can be queried via `vectorisedGraph`. + Requires WRITE access. + """ + vectoriseGraph( + """ + Graph path relative to the root namespace. + """ + path: String!, + """ + Optional embedding model; defaults to OpenAI's standard model. + """ + model: EmbeddingModel, + """ + Optional node-document template (which fields go into each node's text representation); defaults to the built-in template. + """ + nodes: Template, + """ + Optional edge-document template; defaults to the built-in template. + """ + edges: Template + ): Boolean! + """ + Open a previously-vectorised graph for similarity queries. Returns null + if the graph has no embeddings (call `vectoriseGraph` first) or is + inaccessible. + Requires READ on the graph. + """ + vectorisedGraph( + """ + Graph path relative to the root namespace. + """ + path: String! + ): VectorisedGraph + """ + Recursively list every namespace under the root. Each namespace is + filtered against the caller's permissions: only namespaces with at + least DISCOVER are returned. """ namespaces: CollectionOfNamespace! """ - Returns a specific namespace at a given path - - Returns:: Namespace or error if no namespace found + Return a specific namespace by path. Errors if no namespace exists at + that path. + Requires INTROSPECT on the namespace to browse its contents. """ - namespace(path: String!): Namespace! + namespace( + """ + Namespace path relative to the root namespace (e.g. `"team/project"`). + """ + path: String! + ): Namespace! """ - Returns root namespace - - Returns:: Root namespace + Returns the root namespace. Use it as the entry point for browsing + namespaces and graphs — child listings filter against the caller's + permissions. """ root: Namespace! """ - Returns a plugin. + Entry point for READ-only plugins registered with the server (e.g. graph + algorithms exposed as queries). Available plugins are defined at server + startup via the plugin registry. """ plugins: QueryPlugin! """ - Encodes graph and returns as string - - Returns:: Base64 url safe encoded string + Encode a stored graph as a base64 string for client-side download. If + a READ-scoped filter is attached to the caller's permissions, only the + materialised filtered view is encoded. + Requires READ on the graph. + """ + receiveGraph( + """ + Graph path relative to the root namespace. + """ + path: String! + ): String! + """ + Version string of the running `raphtory-graphql` server build. """ - receiveGraph(path: String!): String! version: String! } @@ -3248,46 +5146,138 @@ input Template @oneOf { custom: String } +""" +The temporal-only view of an entity's properties. Each entry is a +`TemporalProperty` carrying the full timeline for that key — use this when +you need per-update iteration, time-indexed lookups, or aggregates. +""" type TemporalProperties { """ - Get property value matching the specified key. + Look up a single temporal property by key. Returns null if there's no temporal + property with that key. """ - get(key: String!): TemporalProperty + get( + """ + The property name. + """ + key: String! + ): TemporalProperty """ - Check if the key is in the properties. + Returns true if a temporal property with the given key exists. """ - contains(key: String!): Boolean! + contains( + """ + The property name to look up. + """ + key: String! + ): Boolean! """ - Return all property keys. + All temporal-property keys present in this view. """ keys: [String!]! """ - Return all property values. + All temporal properties, each as a `TemporalProperty` with its full timeline + available. Use `history`, `values`, `latest`, `at`, etc. on each entry. """ - values(keys: [String!]): [TemporalProperty!]! + values( + """ + Optional whitelist. If provided, only temporal properties with these keys are returned; if omitted, every temporal property in the view is returned. + """ + keys: [String!] + ): [TemporalProperty!]! } +""" +The full timeline of a single property key on one entity. Exposes every +update (via `values` / `history` / `orderedDedupe`), point lookups (`at`, +`latest`), and aggregates over the timeline (`sum`, `mean`, `min`, `max`, +`median`, `count`). +""" type TemporalProperty { """ - Key of a property. + The property key (name). """ key: String! + """ + Event history for this property — one entry per temporal update, in + insertion order. Use this to navigate the full timeline: access the + raw `timestamps` / `datetimes` / `eventId` lists, analyse gaps between + updates via `intervals` (mean/median/min/max), ask `isEmpty`, or + paginate the events. + """ history: History! """ - Return the values of the properties. + All values this property has ever taken, in temporal order (one per update). + Typed as `PropertyOutput` so numeric values stay numeric. + """ + values: [PropertyOutput!]! + """ + The value at or before time `t` (latest update on or before `t`). Returns null + if no update exists on or before `t`. + """ + at( + """ + A TimeInput (epoch millis integer, RFC3339 string, or `{timestamp, eventId}` object). + """ + t: TimeInput! + ): PropertyOutput + """ + The most recent value, or null if the property has never been set in this view. + """ + latest: PropertyOutput + """ + The set of distinct values this property has ever taken (order not guaranteed). + """ + unique: [PropertyOutput!]! + """ + Collapses runs of consecutive-equal updates into a single `(time, value)` pair. + """ + orderedDedupe( + """ + If true, each run is represented by its *last* timestamp; if false, by its *first*. Useful for compressing chatter in a timeline. + """ + latestTime: Boolean! + ): [PropertyTuple!]! + """ + Sum of all updates. Returns null if the dtype is not additive or the property is empty. + """ + sum: PropertyOutput + """ + Mean of all updates as an F64. Returns null if any value is non-numeric or the property is + empty. + """ + mean: PropertyOutput + """ + Alias for `mean` — same F64 average, same null cases. + """ + average: PropertyOutput + """ + Minimum `(time, value)` pair. Returns null if the dtype is not comparable or the property is + empty. + """ + min: PropertyTuple + """ + Maximum `(time, value)` pair. Returns null if the dtype is not comparable or the property is + empty. """ - values: [String!]! - at(t: TimeInput!): String - latest: String - unique: [String!]! - orderedDedupe(latestTime: Boolean!): [PropertyTuple!]! + max: PropertyTuple + """ + Median `(time, value)` pair (lower median on even-length inputs). Returns null if the dtype + is not comparable or the property is empty. + """ + median: PropertyTuple + """ + Number of updates recorded for this property in the current view. + """ + count: Int! } input TemporalPropertyInput { """ - Time. + Time of the update — accepts the same forms as `TimeInput` (epoch + millis Int, RFC3339 string, or `{timestamp, eventId}` object). """ - time: Int! + time: TimeInput! """ Properties. """ @@ -3299,6 +5289,11 @@ Input for primary time component. Expects Int, DateTime formatted String, or Obj where the timestamp is either an Int or a DateTime formatted String, and eventId is a non-negative Int. Valid string formats are RFC3339, RFC2822, %Y-%m-%d, %Y-%m-%dT%H:%M:%S%.3f, %Y-%m-%dT%H:%M:%S%, %Y-%m-%d %H:%M:%S%.3f and %Y-%m-%d %H:%M:%S%. + +Internally wraps `InputTime` so write paths (`addNode`, `addEdge`, +`addProperties`, etc.) can preserve auto-increment of `event_id` when only +a timestamp is given. Pass the object form `{timestamp, eventId}` to lock +the event_id explicitly. """ scalar TimeInput @@ -3356,8 +5351,26 @@ input Value @oneOf { Object. """ object: [ObjectEntry!] + """ + Timezone-aware datetime. + """ + dtime: String + """ + Naive datetime (no timezone). + """ + ndtime: String + """ + BigDecimal number (string representation, e.g. "3.14159" or "123e-5"). + """ + decimal: String } +""" +A working set of documents / nodes / edges built up via similarity +searches on a `VectorisedGraph`. Selections are mutable: you can grow +them with more hops (`expand*`), dereference the contents (`nodes`, +`edges`, `getDocuments`), or start fresh with `emptySelection`. +""" type VectorSelection { """ Returns a list of nodes in the current selection. @@ -3372,40 +5385,109 @@ type VectorSelection { """ getDocuments: [Document!]! """ - Adds all the documents associated with the specified nodes to the current selection. - - Documents added by this call are assumed to have a score of 0. - """ - addNodes(nodes: [String!]!): VectorSelection! - """ - Adds all the documents associated with the specified edges to the current selection. - - Documents added by this call are assumed to have a score of 0. - """ - addEdges(edges: [InputEdge!]!): VectorSelection! - """ - Add all the documents a specified number of hops away to the selection. - - Two documents A and B are considered to be 1 hop away of each other if they are on the same entity or if they are on the same node and edge pair. - """ - expand(hops: Int!, window: VectorisedGraphWindow): VectorSelection! - """ - Adds documents, from the set of one hop neighbours to the current selection, to the selection based on their similarity score with the specified query. This function loops so that the set of one hop neighbours expands on each loop and number of documents added is determined by the specified limit. - """ - expandEntitiesBySimilarity(query: String!, limit: Int!, window: VectorisedGraphWindow): VectorSelection! - """ - Add the adjacent nodes with higher score for query to the selection up to a specified limit. This function loops like expand_entities_by_similarity but is restricted to nodes. - """ - expandNodesBySimilarity(query: String!, limit: Int!, window: VectorisedGraphWindow): VectorSelection! - """ - Add the adjacent edges with higher score for query to the selection up to a specified limit. This function loops like expand_entities_by_similarity but is restricted to edges. - """ - expandEdgesBySimilarity(query: String!, limit: Int!, window: VectorisedGraphWindow): VectorSelection! + Add every document associated with the named nodes to the selection. + Documents added this way receive a score of 0 (no similarity ranking). + """ + addNodes( + """ + Node ids whose documents to include. + """ + nodes: [NodeId!]! + ): VectorSelection! + """ + Add every document associated with the named edges to the selection. + Documents added this way receive a score of 0 (no similarity ranking). + """ + addEdges( + """ + List of `{src, dst}` pairs identifying the edges. + """ + edges: [InputEdge!]! + ): VectorSelection! + """ + Grow the selection by including documents that are within `hops` of any + document already in the selection. Two documents are 1 hop apart if + they're on the same entity or on a connected node/edge pair. + """ + expand( + """ + Number of expansion rounds (1 = direct neighbours). + """ + hops: Int!, + """ + Optional `{start, end}` to restrict expansion to entities active in that interval. + """ + window: VectorisedGraphWindow + ): VectorSelection! + """ + Iteratively expand the selection by similarity to a natural-language + query. Each pass takes the one-hop neighbour set of the current + selection and adds the highest-scoring entities (mixed nodes and + edges); the loop continues until `limit` entities have been added. + """ + expandEntitiesBySimilarity( + """ + Natural-language search string; embedded by the server. + """ + query: String!, + """ + Total number of entities to add across all passes. + """ + limit: Int!, + """ + Optional `{start, end}` to restrict matches to entities active in that interval. + """ + window: VectorisedGraphWindow + ): VectorSelection! + """ + Like `expandEntitiesBySimilarity` but restricted to nodes — iteratively + add the highest-scoring adjacent nodes to the selection. + """ + expandNodesBySimilarity( + """ + Natural-language search string; embedded by the server. + """ + query: String!, + """ + Total number of nodes to add across all passes. + """ + limit: Int!, + """ + Optional `{start, end}` to restrict matches to nodes active in that interval. + """ + window: VectorisedGraphWindow + ): VectorSelection! + """ + Like `expandEntitiesBySimilarity` but restricted to edges — iteratively + add the highest-scoring adjacent edges to the selection. + """ + expandEdgesBySimilarity( + """ + Natural-language search string; embedded by the server. + """ + query: String!, + """ + Total number of edges to add across all passes. + """ + limit: Int!, + """ + Optional `{start, end}` to restrict matches to edges active in that interval. + """ + window: VectorisedGraphWindow + ): VectorSelection! } +""" +A graph with embedded vector representations for its nodes and edges. +Exposes similarity search over documents, nodes, and edges, plus +selection building (`emptySelection`) and index maintenance +(`optimizeIndex`). +""" type VectorisedGraph { """ - Optmize the vector index + Rebuild (or incrementally update) the on-disk vector indexes for nodes + and edges so subsequent similarity searches hit the fresh embeddings. + Safe to call repeatedly; returns true on success. """ optimizeIndex: Boolean! """ @@ -3413,28 +5495,73 @@ type VectorisedGraph { """ emptySelection: VectorSelection! """ - Search the top scoring entities according to a specified query returning no more than a specified limit of entities. - """ - entitiesBySimilarity(query: String!, limit: Int!, window: VectorisedGraphWindow): VectorSelection! - """ - Search the top scoring nodes according to a specified query returning no more than a specified limit of nodes. - """ - nodesBySimilarity(query: String!, limit: Int!, window: VectorisedGraphWindow): VectorSelection! - """ - Search the top scoring edges according to a specified query returning no more than a specified limit of edges. - """ - edgesBySimilarity(query: String!, limit: Int!, window: VectorisedGraphWindow): VectorSelection! + Find the highest-scoring nodes *and* edges (mixed) by similarity to a + natural-language query. The query is embedded server-side and matched + against indexed entity vectors. + """ + entitiesBySimilarity( + """ + Natural-language search string; embedded by the server. + """ + query: String!, + """ + Maximum number of results to return. + """ + limit: Int!, + """ + Optional `{start, end}` to restrict matches to entities active in that interval. + """ + window: VectorisedGraphWindow + ): VectorSelection! + """ + Find the highest-scoring nodes by similarity to a natural-language + query. The query is embedded server-side and matched against indexed + node vectors. + """ + nodesBySimilarity( + """ + Natural-language search string; embedded by the server. + """ + query: String!, + """ + Maximum number of nodes to return. + """ + limit: Int!, + """ + Optional `{start, end}` to restrict matches to nodes active in that interval. + """ + window: VectorisedGraphWindow + ): VectorSelection! + """ + Find the highest-scoring edges by similarity to a natural-language + query. The query is embedded server-side and matched against indexed + edge vectors. + """ + edgesBySimilarity( + """ + Natural-language search string; embedded by the server. + """ + query: String!, + """ + Maximum number of edges to return. + """ + limit: Int!, + """ + Optional `{start, end}` to restrict matches to edges active in that interval. + """ + window: VectorisedGraphWindow + ): VectorSelection! } input VectorisedGraphWindow { """ - Start time. + Inclusive lower bound of the search window. """ - start: Int! + start: TimeInput! """ - End time. + Exclusive upper bound of the search window. """ - end: Int! + end: TimeInput! } input Window { diff --git a/raphtory-graphql/src/auth.rs b/raphtory-graphql/src/auth.rs index 1626bf38a3..bae5a83bd2 100644 --- a/raphtory-graphql/src/auth.rs +++ b/raphtory-graphql/src/auth.rs @@ -1,4 +1,4 @@ -use crate::config::auth_config::{AuthConfig, PublicKey}; +use crate::config::{app_config::AppConfig, auth_config::PublicKey}; use async_graphql::{ async_trait, extensions::{Extension, ExtensionContext, ExtensionFactory, NextParseQuery}, @@ -10,59 +10,55 @@ use async_graphql_poem::{GraphQLBatchRequest, GraphQLBatchResponse, GraphQLReque use futures_util::StreamExt; use jsonwebtoken::{decode, Algorithm, Validation}; use poem::{ - error::{TooManyRequests, Unauthorized}, + error::{BadRequest, TooManyRequests, Unauthorized}, Body, Endpoint, FromRequest, IntoResponse, Request, Response, Result, }; use reqwest::header::AUTHORIZATION; use serde::Deserialize; use std::{sync::Arc, time::Duration}; use tokio::sync::{RwLock, Semaphore}; +use tracing::{debug, warn}; #[derive(Clone, Debug, Deserialize, PartialEq)] #[serde(rename_all = "lowercase")] -pub(crate) enum Access { +pub enum Access { Ro, Rw, } #[derive(Deserialize, Debug, Clone)] pub(crate) struct TokenClaims { - pub(crate) a: Access, + pub(crate) access: Access, + #[serde(default)] + pub(crate) role: Option, } // TODO: maybe this should be renamed as it doens't only take care of auth anymore pub struct AuthenticatedGraphQL { executor: E, - config: AuthConfig, + config: AppConfig, semaphore: Option, - lock: Option>, + lock: Option>, } impl AuthenticatedGraphQL { /// Create a GraphQL endpoint. - pub fn new(executor: E, config: AuthConfig) -> Self { + pub fn new(executor: E, config: AppConfig) -> Self { + let semaphore = config.concurrency.heavy_query_limit.map(|limit| { + println!("Server running with concurrency limited to {limit} for heavy queries"); + Semaphore::new(limit) + }); + let lock = if config.concurrency.exclusive_writes { + println!("Server running with exclusive writes"); + Some(RwLock::new(())) + } else { + None + }; Self { executor, config, - semaphore: std::env::var("RAPHTORY_CONCURRENCY_LIMIT") - .ok() - .and_then(|limit| { - let limit = limit.parse::().ok()?; - println!( - "Server running with concurrency limited to {limit} for heavy queries" - ); - Some(Semaphore::new(limit)) - }), - lock: std::env::var("RAPHTORY_THREADSAFE") - .ok() - .and_then(|thread_safe| { - if thread_safe == "1" { - println!("Server running in threadsafe mode"); - Some(RwLock::new(())) - } else { - None - } - }), + semaphore, + lock, } } } @@ -103,6 +99,10 @@ pub enum AuthError { RequireRead, #[error("The requested endpoint requires write access")] RequireWrite, + #[error("Query batching is disabled on this server")] + BatchingDisabled, + #[error("Batch size {actual} exceeds the maximum allowed {max}")] + BatchSizeExceeded { max: usize, actual: usize }, } impl From for ServerError { @@ -124,23 +124,28 @@ where async fn call(&self, req: Request) -> Result { // here ANY error when trying to validate the Authorization header is equivalent to it not being present at all - let access = match &self.config.public_key { + let (access, role) = match &self.config.auth.public_key { Some(public_key) => { - let presented_access = req + let claims = req .header(AUTHORIZATION) - .and_then(|header| extract_access_from_header(header, public_key)); - match presented_access { - Some(access) => access, + .and_then(|header| extract_claims_from_header(header, public_key)); + match claims { + Some(claims) => { + debug!(role = ?claims.role, "JWT validated successfully"); + (claims.access, claims.role) + } None => { - if self.config.enabled_for_reads { + if self.config.auth.require_auth_for_reads { + warn!("Request missing valid JWT — rejecting (require_auth_for_reads=true)"); return Err(Unauthorized(AuthError::RequireRead)); } else { - Access::Ro // if read access is not required, we give read access to all requests + debug!("No valid JWT but require_auth_for_reads=false — granting read access"); + (Access::Ro, None) } } } } - None => Access::Rw, // if auth is not setup, we give write access to all requests + None => (Access::Rw, None), // if auth is not setup, we give write access to all requests }; let is_accept_multipart_mixed = req @@ -151,7 +156,7 @@ where if is_accept_multipart_mixed { let (req, mut body) = req.split(); let req = GraphQLRequest::from_request(&req, &mut body).await?; - let req = req.0.data(access); + let req = req.0.data(access).data(role); let stream = self.executor.execute_stream(req, None); Ok(Response::builder() .header("content-type", "multipart/mixed; boundary=graphql") @@ -161,8 +166,21 @@ where ))) } else { let (req, mut body) = req.split(); - let req = GraphQLBatchRequest::from_request(&req, &mut body).await?; - let req = req.0.data(access); + let batch_req = GraphQLBatchRequest::from_request(&req, &mut body).await?.0; + + if let BatchRequest::Batch(requests) = &batch_req { + if self.config.concurrency.disable_batching { + return Err(BadRequest(AuthError::BatchingDisabled)); + } + if let Some(max) = self.config.concurrency.max_batch_size { + let actual = requests.len(); + if actual > max { + return Err(BadRequest(AuthError::BatchSizeExceeded { max, actual })); + } + } + } + + let req = batch_req.data(access).data(role); let contains_update = match &req { BatchRequest::Single(request) => request.query.contains("updateGraph"), @@ -200,28 +218,50 @@ fn is_query_heavy(query: &str) -> bool { || query.contains("inNeighbours") } -fn extract_access_from_header(header: &str, public_key: &PublicKey) -> Option { +fn extract_claims_from_header(header: &str, public_key: &PublicKey) -> Option { if header.starts_with("Bearer ") { let jwt = header.replace("Bearer ", ""); - let mut validation = Validation::new(Algorithm::EdDSA); + let mut validation = Validation::new(public_key.algorithms[0]); + validation.algorithms = public_key.algorithms.clone(); validation.set_required_spec_claims::(&[]); // we don't require 'exp' to be present let decoded = decode::(&jwt, &public_key.decoding_key, &validation); - Some(decoded.ok()?.claims.a) + match decoded { + Ok(token_data) => Some(token_data.claims), + Err(e) => { + warn!(error = %e, "JWT signature validation failed"); + None + } + } } else { + warn!("Authorization header is missing or does not start with 'Bearer '"); None } } pub(crate) trait ContextValidation { - fn require_write_access(&self) -> Result<(), AuthError>; + fn require_jwt_write_access(&self) -> Result<(), AuthError>; +} + +/// Check that the request carries a write-access JWT (`"access": "rw"`). +/// For use in dynamic resolver ops that run under `query { ... }` and are +/// therefore not covered by the `MutationAuth` extension. +pub fn require_jwt_write_access_dynamic( + ctx: &async_graphql::dynamic::ResolverContext, +) -> Result<(), async_graphql::Error> { + if ctx.data::().is_ok_and(|a| a == &Access::Rw) { + Ok(()) + } else { + Err(async_graphql::Error::new( + "Access denied: write access required", + )) + } } impl<'a> ContextValidation for &Context<'a> { - fn require_write_access(&self) -> Result<(), AuthError> { - if self.data::().is_ok_and(|role| role == &Access::Rw) { - Ok(()) - } else { - Err(AuthError::RequireWrite) + fn require_jwt_write_access(&self) -> Result<(), AuthError> { + match self.data::() { + Ok(access) if access == &Access::Rw => Ok(()), + _ => Err(AuthError::RequireWrite), } } } @@ -249,10 +289,18 @@ impl Extension for MutationAuth { .iter() .any(|op| op.1.node.ty == OperationType::Mutation); if mutation && ctx.data::() != Ok(&Access::Rw) { - Err(AuthError::RequireWrite.into()) - } else { - Ok(doc) + // If a policy is active, allow "ro" users through to resolvers — + // each resolver enforces its own per-graph or admin-only check. + // Without a policy (OSS), preserve the original blanket deny. + let policy_active = ctx + .data::() + .map(|d| d.auth_policy.is_some()) + .unwrap_or(false); + if !policy_active { + return Err(AuthError::RequireWrite.into()); + } } + Ok(doc) }) } } diff --git a/raphtory-graphql/src/auth_policy.rs b/raphtory-graphql/src/auth_policy.rs new file mode 100644 index 0000000000..30696c3cc6 --- /dev/null +++ b/raphtory-graphql/src/auth_policy.rs @@ -0,0 +1,128 @@ +use crate::model::graph::filtering::GraphAccessFilter; + +/// Opaque error returned by [`AuthorizationPolicy::graph_permissions`] when access is entirely +/// denied. The message is intended for logging only; callers must not surface it to end users. +#[derive(Debug)] +pub struct AuthPolicyError(String); + +impl AuthPolicyError { + pub fn new(msg: impl Into) -> Self { + Self(msg.into()) + } +} + +impl std::fmt::Display for AuthPolicyError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str(&self.0) + } +} + +// async_graphql's blanket `impl From for Error` covers +// AuthPolicyError automatically via its Display impl. + +/// The effective permission level a principal has on a specific graph. +/// Variants are ordered by the hierarchy: `Write` > `Read{filter:None}` > `Read{filter:Some}` > `Introspect`. +/// A filtered `Read` is less powerful than an unfiltered `Read` because it sees a restricted view. +#[derive(Clone)] +pub enum GraphPermission { + /// May query graph metadata (counts, schema) but not read data. + Introspect, + /// May read graph data; optionally restricted by a data filter. + Read { filter: Option }, + /// May read and mutate the graph (implies `Read` and `Introspect`, never filtered). + Write, +} + +impl GraphPermission { + /// Numeric level used for ordering: `Introspect`=0, `Read{Some}`=1, `Read{None}`=2, `Write`=3. + fn level(&self) -> u8 { + match self { + GraphPermission::Introspect => 0, + GraphPermission::Read { filter: Some(_) } => 1, + GraphPermission::Read { filter: None } => 2, + GraphPermission::Write => 3, + } + } + + /// Returns `true` if the permission level is `Read` or higher. + pub fn is_at_least_read(&self) -> bool { + self.level() >= 1 + } + + /// Returns `true` only for `Write` permission. + pub fn is_write(&self) -> bool { + self.level() >= 3 + } + + /// Returns `Some(self)` if at least `Read` (filtered or not), `None` otherwise. + /// Use with `?` to gate access and preserve the permission value for filter extraction. + pub fn at_least_read(self) -> Option { + self.is_at_least_read().then_some(self) + } + + /// Returns `Some(self)` if `Write`, `None` otherwise. + pub fn at_least_write(self) -> Option { + self.is_write().then_some(self) + } +} + +impl PartialEq for GraphPermission { + fn eq(&self, other: &Self) -> bool { + self.level() == other.level() + } +} + +impl Eq for GraphPermission {} + +impl PartialOrd for GraphPermission { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for GraphPermission { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + self.level().cmp(&other.level()) + } +} + +/// The effective permission level a principal has on a namespace. +/// Variants are ordered lowest to highest so that `PartialOrd`/`Ord` reflect the hierarchy. +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub enum NamespacePermission { + /// No access — namespace is invisible. + Denied, + /// Namespace is visible in parent `children()` listings but cannot be browsed. + Discover, + /// Namespace is browseable; graphs inside are visible as MetaGraph in `graphs()`. + Introspect, + /// All descendant graphs are fully readable. + Read, + /// All descendants are writable; `newGraph` is allowed. + Write, +} + +pub trait AuthorizationPolicy: Send + Sync + 'static { + /// Resolves the effective permission level for a principal on a graph. + /// Returns `Err(denial message)` only when access is entirely denied (not even introspect). + /// Admin principals (`"access": "rw"` JWT) always yield `Write`. + /// Empty store (no roles configured) yields `Read` — fail open for reads, + /// but write still requires an explicit `Write` grant. + /// The implementation is responsible for extracting principal identity from `ctx`. + fn graph_permissions( + &self, + ctx: &async_graphql::Context<'_>, + path: &str, + ) -> Result; + + /// Resolves the effective namespace permission for a principal. + /// Admin principals always yield `Write`. + /// Empty store yields `Read` (fail open, consistent with graph_permissions). + /// Missing role yields `Denied`. + /// The implementation is responsible for extracting principal identity from `ctx`. + fn namespace_permissions( + &self, + ctx: &async_graphql::Context<'_>, + path: &str, + ) -> NamespacePermission; +} diff --git a/raphtory-graphql/src/cli.rs b/raphtory-graphql/src/cli.rs index a51cb906e9..56a7bbe242 100644 --- a/raphtory-graphql/src/cli.rs +++ b/raphtory-graphql/src/cli.rs @@ -3,19 +3,24 @@ use crate::config::index_config::DEFAULT_CREATE_INDEX; use crate::{ config::{ app_config::AppConfigBuilder, - auth_config::{DEFAULT_AUTH_ENABLED_FOR_READS, PUBLIC_KEY_DECODING_ERR_MSG}, + auth_config::{DEFAULT_REQUIRE_AUTH_FOR_READS, PUBLIC_KEY_DECODING_ERR_MSG}, cache_config::{DEFAULT_CAPACITY, DEFAULT_TTI_SECONDS}, + concurrency_config::{ + DEFAULT_DISABLE_BATCHING, DEFAULT_DISABLE_LISTS, DEFAULT_EXCLUSIVE_WRITES, + }, log_config::DEFAULT_LOG_LEVEL, otlp_config::{ TracingLevel, DEFAULT_OTLP_AGENT_HOST, DEFAULT_OTLP_AGENT_PORT, DEFAULT_OTLP_TRACING_SERVICE_NAME, DEFAULT_TRACING_ENABLED, DEFAULT_TRACING_LEVEL, }, + schema_config::DEFAULT_DISABLE_INTROSPECTION, }, model::App, - server::DEFAULT_PORT, + server::{apply_server_extension, DEFAULT_PORT}, GraphServer, }; use clap::{Parser, Subcommand}; +use raphtory::db::api::storage::storage::Config; use std::path::PathBuf; use tokio::io::Result as IoResult; @@ -74,15 +79,109 @@ struct ServerArgs { #[arg(long, env = "RAPHTORY_AUTH_PUBLIC_KEY", default_value = None, help = "Public key for auth")] auth_public_key: Option, - #[arg(long, env = "RAPHTORY_AUTH_ENABLED_FOR_READS", default_value_t = DEFAULT_AUTH_ENABLED_FOR_READS, help = "Enable auth for reads")] - auth_enabled_for_reads: bool, + #[arg(long, env = "RAPHTORY_REQUIRE_AUTH_FOR_READS", default_value_t = DEFAULT_REQUIRE_AUTH_FOR_READS, help = "Require JWT authentication for read requests (default: true)")] + require_auth_for_reads: bool, + + #[arg( + long, + env = "RAPHTORY_HEAVY_QUERY_LIMIT", + default_value = None, + help = "Restricts how many expensive graph traversal queries can execute simultaneously. Covers operations like connected components, edge traversals, and neighbour lookups (outComponent, inComponent, edges, outEdges, inEdges, neighbours, outNeighbours, inNeighbours). Once the limit is exceeded, queries are parked on a semaphore and wait until a slot becomes available before executing." + )] + heavy_query_limit: Option, + + #[arg( + long, + env = "RAPHTORY_EXCLUSIVE_WRITES", + default_value_t = DEFAULT_EXCLUSIVE_WRITES, + help = "Ensures only one ingestion/write operation runs at a time and blocks reads until it completes." + )] + exclusive_writes: bool, + + #[arg( + long, + env = "RAPHTORY_DISABLE_BATCHING", + default_value_t = DEFAULT_DISABLE_BATCHING, + help = "Rejects batched GraphQL requests outright. Batching can otherwise be used to circumvent per-request depth and complexity limits." + )] + disable_batching: bool, + + #[arg( + long, + env = "RAPHTORY_MAX_BATCH_SIZE", + default_value = None, + help = "Caps the number of queries accepted in a single batched HTTP request. Requests whose batch exceeds this size are rejected." + )] + max_batch_size: Option, + + #[arg( + long, + env = "RAPHTORY_DISABLE_LISTS", + default_value_t = DEFAULT_DISABLE_LISTS, + help = "Completely disables bulk list endpoints (e.g. listing all nodes/edges). Essential for large graphs where unbounded list queries could return billions of results and exhaust server resources." + )] + disable_lists: bool, + + #[arg( + long, + env = "RAPHTORY_MAX_PAGE_SIZE", + default_value = None, + help = "Maximum page size enforced on paged collection queries. Caps the `limit` argument of `page` so clients can't circumvent `disable_lists` by requesting huge pages." + )] + max_page_size: Option, + + #[arg( + long, + env = "RAPHTORY_MAX_QUERY_DEPTH", + default_value = None, + help = "Limits how deeply nested a query can be." + )] + max_query_depth: Option, + + #[arg( + long, + env = "RAPHTORY_MAX_QUERY_COMPLEXITY", + default_value = None, + help = "Limits the total estimated cost of a query based on the number of fields selected. Blocks queries that try to fetch too much data in one request." + )] + max_query_complexity: Option, + + #[arg( + long, + env = "RAPHTORY_MAX_RECURSIVE_DEPTH", + default_value = None, + help = "Internal safety limit to prevent stack overflows from pathologically structured queries. Falls back to the async-graphql default of 32 if unset." + )] + max_recursive_depth: Option, + + #[arg( + long, + env = "RAPHTORY_MAX_DIRECTIVES_PER_FIELD", + default_value = None, + help = "Limits the number of GraphQL directives on any single field. Directives are annotations prefixed with @ that modify how a field is executed (e.g. @skip, @include, @deprecated)." + )] + max_directives_per_field: Option, + + #[arg( + long, + env = "RAPHTORY_DISABLE_INTROSPECTION", + default_value_t = DEFAULT_DISABLE_INTROSPECTION, + help = "Fully disable schema introspection, preventing clients from discovering the API's structure and available fields. Recommended for production." + )] + disable_introspection: bool, #[arg(long, env = "RAPHTORY_PUBLIC_DIR", default_value = None, help = "Public directory path")] public_dir: Option, + #[arg(long, env = "RAPHTORY_PERMISSIONS_STORE_PATH", default_value = None, help = "Path to the JSON permissions store file")] + permissions_store_path: Option, + #[cfg(feature = "search")] #[arg(long, env = "RAPHTORY_CREATE_INDEX", default_value_t = DEFAULT_CREATE_INDEX, help = "Enable index creation")] create_index: bool, + + #[command(flatten)] + graph_config: Config, } pub(crate) async fn cli_with_args(args_iter: I) -> IoResult<()> @@ -110,7 +209,18 @@ where .with_auth_public_key(server_args.auth_public_key) .expect(PUBLIC_KEY_DECODING_ERR_MSG) .with_public_dir(server_args.public_dir) - .with_auth_enabled_for_reads(server_args.auth_enabled_for_reads); + .with_require_auth_for_reads(server_args.require_auth_for_reads) + .with_heavy_query_limit(server_args.heavy_query_limit) + .with_exclusive_writes(server_args.exclusive_writes) + .with_disable_batching(server_args.disable_batching) + .with_max_batch_size(server_args.max_batch_size) + .with_disable_lists(server_args.disable_lists) + .with_max_page_size(server_args.max_page_size) + .with_max_query_depth(server_args.max_query_depth) + .with_max_query_complexity(server_args.max_query_complexity) + .with_max_recursive_depth(server_args.max_recursive_depth) + .with_max_directives_per_field(server_args.max_directives_per_field) + .with_disable_introspection(server_args.disable_introspection); #[cfg(feature = "search")] { @@ -119,10 +229,16 @@ where let app_config = Some(builder.build()); - GraphServer::new(server_args.work_dir, app_config, None) - .await? - .run_with_port(server_args.port) - .await?; + let server = GraphServer::new( + server_args.work_dir, + app_config, + None, + server_args.graph_config, + ) + .await?; + let server = + apply_server_extension(server, server_args.permissions_store_path.as_deref()); + server.run_with_port(server_args.port).await?; } } Ok(()) @@ -132,6 +248,10 @@ pub async fn cli() -> IoResult<()> { cli_with_args(std::env::args_os()).await } +/// Run the Raphtory GraphQL CLI from Python. Uses `sys.argv` for arguments. +/// +/// Returns: +/// None: #[cfg(feature = "python")] #[pyo3::pyfunction(name = "cli")] pub fn python_cli() -> pyo3::PyResult<()> { diff --git a/raphtory-graphql/src/client/error.rs b/raphtory-graphql/src/client/error.rs new file mode 100644 index 0000000000..e58f38a9c3 --- /dev/null +++ b/raphtory-graphql/src/client/error.rs @@ -0,0 +1,30 @@ +//! Error type for the GraphQL client. + +use thiserror::Error; + +#[derive(Error, Debug)] +pub enum ClientError { + #[error("Network/request error: {0}")] + Request(#[from] reqwest::Error), + + #[error("{0}")] + HttpError(String), + + #[error("GraphQL errors: {0}")] + GraphQLErrors(String), + + #[error("Invalid response: {0}")] + InvalidResponse(String), + + #[error("JSON parse error: {0}")] + Json(#[from] serde_json::Error), + + #[error("Graph encode/decode error: {0}")] + Graph(#[from] raphtory::errors::GraphError), + + #[error("An error when parsing Jinja query templates: {0}")] + JinjaError(String), + + #[error("The request did not succeed.")] + UnsuccessfulResponse, +} diff --git a/raphtory-graphql/src/client/mod.rs b/raphtory-graphql/src/client/mod.rs new file mode 100644 index 0000000000..8eaf1cf6f8 --- /dev/null +++ b/raphtory-graphql/src/client/mod.rs @@ -0,0 +1,166 @@ +//! Pure Rust GraphQL client for Raphtory GraphQL server. + +mod error; +pub mod raphtory_client; +pub mod remote_edge; +pub mod remote_graph; +pub mod remote_node; + +pub use error::ClientError; +pub use remote_edge::GraphQLRemoteEdge; +pub use remote_graph::GraphQLRemoteGraph; +pub use remote_node::GraphQLRemoteNode; + +use raphtory_api::core::entities::properties::prop::Prop; +use std::collections::HashMap; + +/// Check if a server at the given URL is online (responds with 200). +pub fn is_online(url: &str) -> bool { + reqwest::blocking::Client::new() + .get(url) + .send() + .map(|response| response.status().as_u16() == 200) + .unwrap_or(false) +} + +pub(crate) fn inner_collection(value: &Prop) -> String { + match value { + Prop::Str(value) => format!("{{ str: {} }}", serde_json::to_string(value).unwrap()), + Prop::U8(value) => format!("{{ u8: {} }}", value), + Prop::U16(value) => format!("{{ u16: {} }}", value), + Prop::I32(value) => format!("{{ i32: {} }}", value), + Prop::I64(value) => format!("{{ i64: {} }}", value), + Prop::U32(value) => format!("{{ u32: {} }}", value), + Prop::U64(value) => format!("{{ u64: {} }}", value), + Prop::F32(value) => format!("{{ f32: {} }}", value), + Prop::F64(value) => format!("{{ f64: {} }}", value), + Prop::Bool(value) => format!("{{ bool: {} }}", value), + Prop::List(value) => { + let vec: Vec = value.iter().map(|p| inner_collection(&p)).collect(); + format!("{{ list: [{}] }}", vec.join(", ")) + } + Prop::Map(value) => { + let properties_array: Vec = value + .iter() + .map(|(k, v)| { + format!( + "{{ key: {}, value: {} }}", + serde_json::to_string(k).unwrap(), + inner_collection(v) + ) + }) + .collect(); + format!("{{ object: [{}] }}", properties_array.join(", ")) + } + Prop::DTime(dt) => format!("{{ dtime: \"{}\" }}", dt.to_rfc3339()), + Prop::NDTime(ndt) => format!( + "{{ ndtime: \"{}\" }}", + ndt.format("%Y-%m-%dT%H:%M:%S%.3f").to_string() + ), + Prop::Decimal(value) => format!("{{ decimal: \"{}\" }}", value.to_string()), + } +} + +fn to_graphql_valid(key: &String, value: &Prop) -> String { + match value { + Prop::Str(value) => format!( + "{{ key: {}, value: {{ str: {} }} }}", + serde_json::to_string(key).unwrap(), + serde_json::to_string(value).unwrap() + ), + Prop::U8(value) => format!( + "{{ key: {}, value: {{ u8: {} }} }}", + serde_json::to_string(key).unwrap(), + value + ), + Prop::U16(value) => format!( + "{{ key: {}, value: {{ u16: {} }} }}", + serde_json::to_string(key).unwrap(), + value + ), + Prop::I32(value) => format!( + "{{ key: {}, value: {{ i32: {} }} }}", + serde_json::to_string(key).unwrap(), + value + ), + Prop::I64(value) => format!( + "{{ key: {}, value: {{ i64: {} }} }}", + serde_json::to_string(key).unwrap(), + value + ), + Prop::U32(value) => format!( + "{{ key: {}, value: {{ u32: {} }} }}", + serde_json::to_string(key).unwrap(), + value + ), + Prop::U64(value) => format!( + "{{ key: {}, value: {{ u64: {} }} }}", + serde_json::to_string(key).unwrap(), + value + ), + Prop::F32(value) => format!( + "{{ key: {}, value: {{ f32: {} }} }}", + serde_json::to_string(key).unwrap(), + value + ), + Prop::F64(value) => format!( + "{{ key: {}, value: {{ f64: {} }} }}", + serde_json::to_string(key).unwrap(), + value + ), + Prop::Bool(value) => format!( + "{{ key: {}, value: {{ bool: {} }} }}", + serde_json::to_string(key).unwrap(), + value + ), + Prop::List(value) => { + let vec: Vec = value.iter().map(|p| inner_collection(&p)).collect(); + format!( + "{{ key: {}, value: {{ list: [{}] }} }}", + serde_json::to_string(key).unwrap(), + vec.join(", ") + ) + } + Prop::Map(value) => { + let properties_array: Vec = value + .iter() + .map(|(k, v)| { + format!( + "{{ key: {}, value: {} }}", + serde_json::to_string(k).unwrap(), + inner_collection(v) + ) + }) + .collect(); + format!( + "{{ key: {}, value: {{ object: [{}] }} }}", + serde_json::to_string(key).unwrap(), + properties_array.join(", ") + ) + } + Prop::DTime(dt) => format!( + "{{ key: {}, value: {{ dtime: \"{}\" }} }}", + serde_json::to_string(key).unwrap(), + dt.to_rfc3339() + ), + Prop::NDTime(ndt) => format!( + "{{ key: {}, value: {{ ndtime: \"{}\" }} }}", + serde_json::to_string(key).unwrap(), + ndt.format("%Y-%m-%dT%H:%M:%S%.3f").to_string() + ), + Prop::Decimal(value) => format!( + "{{ key: {}, value: {{ decimal: \"{}\" }} }}", + serde_json::to_string(key).unwrap(), + value.to_string() + ), + } +} + +pub(crate) fn build_property_string(properties: HashMap) -> String { + let properties_array: Vec = properties + .iter() + .map(|(k, v)| to_graphql_valid(k, v)) + .collect(); + + format!("[{}]", properties_array.join(", ")) +} diff --git a/raphtory-graphql/src/client/raphtory_client.rs b/raphtory-graphql/src/client/raphtory_client.rs new file mode 100644 index 0000000000..c8af4c36ae --- /dev/null +++ b/raphtory-graphql/src/client/raphtory_client.rs @@ -0,0 +1,377 @@ +use crate::{ + client::{ClientError, GraphQLRemoteGraph}, + url_encode::url_decode_graph, +}; +use raphtory::{db::api::view::MaterializedGraph, prelude::Config, serialise::GraphFolder}; +use reqwest::{multipart, multipart::Part, Client}; +use serde_json::{json, Value as JsonValue}; +use std::{collections::HashMap, io::Cursor}; +use url::Url; + +/// Pure Rust client for Raphtory GraphQL operations. +#[derive(Clone, Debug)] +pub struct RaphtoryGraphQLClient { + pub(crate) url: Url, + pub(crate) token: String, + client: Client, +} + +impl RaphtoryGraphQLClient { + /// Create a new client. Does not perform a connectivity check; use [`client::is_online`] first if needed. + pub fn new(url: Url, token: Option) -> Self { + Self { + url, + token: token.unwrap_or_default(), + client: Client::new(), + } + } + + /// Create a new client and verify the server is reachable (GET url, expect 200). + /// Returns an error if the server is not reachable. + pub async fn connect(url: Url, token: Option) -> Result { + let token = token.unwrap_or_default(); + let client = Client::new(); + + let response = client + .get(url.clone()) + .bearer_auth(&token) + .send() + .await + .map_err(|e| { + ClientError::HttpError(format!( + "Could not connect to the given server - no response --{e}" + )) + })?; + if response.status() != 200 { + let text = response.text().await.unwrap_or_default(); + return Err(ClientError::HttpError(format!( + "Could not connect to the given server - response {}", + text + ))); + } + + Ok(Self { url, token, client }) + } + + /// Returns true if the server could be reached and returns a healthy response. + pub async fn is_healthy(&self) -> bool { + let health_url = self.url.join("health").expect("couldn't create health url"); + + let response_res = self + .client + .get(health_url) + .bearer_auth(&self.token) + .send() + .await; + + if let Ok(response) = response_res { + if response.status().is_success() { + if let Ok(v) = response.json::().await { + if v.get("healthy") == Some(&JsonValue::Bool(true)) { + return true; + } + } + } + } + + false + } + + /// Execute a GraphQL query asynchronously. + /// Returns the `data` object as a map; errors if the response contains GraphQL `errors`. + pub async fn query( + &self, + query: &str, + variables: HashMap, + ) -> Result, ClientError> { + let request_body = json!({ + "query": query, + "variables": variables + }); + + let response = self + .client + .post(self.url.clone()) + .bearer_auth(&self.token) + .json(&request_body) + .send() + .await?; + + if !response.status().is_success() { + let status = response.status().as_u16(); + let text = response.text().await.unwrap_or_default(); + return Err(ClientError::HttpError(format!( + "HTTP error: status {status}, body: {text}" + ))); + } + + let mut graphql_result: HashMap = response.json().await?; + + if let Some(errors) = graphql_result.remove("errors") { + let message = match errors { + JsonValue::Array(errors) => errors + .iter() + .map(|e| format!("{}", e)) + .collect::>() + .join("\n\t"), + _ => format!("{}", errors), + }; + return Err(ClientError::GraphQLErrors(format!( + "After sending query to the server:\n\t{}\nGot the following errors:\n\t{}", + query, message + ))); + } + + match graphql_result.remove("data") { + Some(JsonValue::Object(data)) => Ok(data.into_iter().collect()), + _ => Err(ClientError::InvalidResponse(format!( + "Error while reading server response for query:\n\t{query}" + ))), + } + } + + /// Send a graph (base64-encoded string) to the server. + pub async fn send_graph( + &self, + path: &str, + encoded_graph: &str, + overwrite: bool, + ) -> Result<(), ClientError> { + let query = r#" + mutation SendGraph($path: String!, $graph: String!, $overwrite: Boolean!) { + sendGraph(path: $path, graph: $graph, overwrite: $overwrite) + } + "# + .to_owned(); + let variables: HashMap = [ + ("path".to_owned(), json!(path)), + ("graph".to_owned(), json!(encoded_graph)), + ("overwrite".to_owned(), json!(overwrite)), + ] + .into_iter() + .collect(); + + let data = self.query(&query, variables).await?; + match data.get("sendGraph") { + Some(JsonValue::String(_)) => Ok(()), + _ => Err(ClientError::InvalidResponse(format!( + "Error Sending Graph. Got response {:?}", + data + ))), + } + } + + /// Upload a graph from a local file path (zip) via multipart. + pub async fn upload_graph( + &self, + path: &str, + file_path: &str, + overwrite: bool, + ) -> Result<(), ClientError> { + let folder = GraphFolder::from(file_path); + let mut buffer = Vec::new(); + folder.zip_from_folder(Cursor::new(&mut buffer))?; + + let variables = format!( + r#""path": "{}", "overwrite": {}, "graph": null"#, + path, overwrite + ); + let operations = format!( + r#"{{ + "query": "mutation UploadGraph($path: String!, $graph: Upload!, $overwrite: Boolean!) {{ uploadGraph(path: $path, graph: $graph, overwrite: $overwrite) }}", + "variables": {{ {} }} + }}"#, + variables + ); + + let form = multipart::Form::new() + .text("operations", operations) + .text("map", r#"{"0": ["variables.graph"]}"#) + .part("0", Part::bytes(buffer).file_name(file_path.to_string())); + + let response = self + .client + .post(self.url.clone()) + .bearer_auth(&self.token) + .multipart(form) + .send() + .await?; + + let status = response.status(); + let text = response.text().await?; + + if !status.is_success() { + return Err(ClientError::HttpError(format!( + "Error Uploading Graph. Status: {}. Response: {}", + status.as_u16(), + text + ))); + } + + let mut data: HashMap = serde_json::from_str(&text)?; + match data.remove("data") { + Some(JsonValue::Object(_)) => Ok(()), + _ => match data.remove("errors") { + Some(JsonValue::Array(errors)) => Err(ClientError::GraphQLErrors(format!( + "Error Uploading Graph. Got errors:\n\t{:#?}", + errors + ))), + _ => Err(ClientError::InvalidResponse(format!( + "Error Uploading Graph. Unexpected response: {}", + text + ))), + }, + } + } + + /// Copy graph on the server. + pub async fn copy_graph(&self, path: &str, new_path: &str) -> Result<(), ClientError> { + let query = r#" + mutation CopyGraph($path: String!, $newPath: String!) { + copyGraph(path: $path, newPath: $newPath) + }"# + .to_owned(); + let variables: HashMap = [ + ("path".to_owned(), json!(path)), + ("newPath".to_owned(), json!(new_path)), + ] + .into_iter() + .collect(); + + let data = self.query(&query, variables).await?; + match data.get("copyGraph") { + Some(JsonValue::Bool(true)) => Ok(()), + _ => Err(ClientError::InvalidResponse(format!( + "Error while reading server response for query:\n\t{query}\nGot data:\n\t'{data:?}'" + ))), + } + } + + /// Move graph on the server. + pub async fn move_graph(&self, path: &str, new_path: &str) -> Result<(), ClientError> { + let query = r#" + mutation MoveGraph($path: String!, $newPath: String!) { + moveGraph(path: $path, newPath: $newPath) + }"# + .to_owned(); + let variables: HashMap = [ + ("path".to_owned(), json!(path)), + ("newPath".to_owned(), json!(new_path)), + ] + .into_iter() + .collect(); + + let data = self.query(&query, variables).await?; + match data.get("moveGraph") { + Some(JsonValue::Bool(true)) => Ok(()), + _ => Err(ClientError::InvalidResponse(format!( + "Error while reading server response for query:\n\t{query}\nGot data:\n\t'{data:?}'" + ))), + } + } + + /// Delete graph on the server. + pub async fn delete_graph(&self, path: &str) -> Result<(), ClientError> { + let query = r#" + mutation DeleteGraph($path: String!) { + deleteGraph(path: $path) + }"# + .to_owned(); + let variables: HashMap = + [("path".to_owned(), json!(path))].into_iter().collect(); + + let data = self.query(&query, variables).await?; + match data.get("deleteGraph") { + Some(JsonValue::Bool(true)) => Ok(()), + _ => Err(ClientError::InvalidResponse(format!( + "Error while reading server response for query:\n\t{query}\nGot data:\n\t'{data:?}'" + ))), + } + } + + /// Receive graph from the server. Returns the base64-encoded graph string. + pub async fn receive_graph(&self, path: &str) -> Result { + let query = r#" + query ReceiveGraph($path: String!) { + receiveGraph(path: $path) + }"# + .to_owned(); + let variables: HashMap = + [("path".to_owned(), json!(path))].into_iter().collect(); + + let data = self.query(&query, variables).await?; + match data.get("receiveGraph") { + Some(JsonValue::String(s)) => Ok(s.clone()), + _ => Err(ClientError::InvalidResponse(format!( + "Error while reading server response for query:\n\t{query}\nGot data:\n\t'{data:?}'" + ))), + } + } + + /// Receive graph from the server and decode to MaterializedGraph. + pub async fn receive_graph_decoded( + &self, + path: &str, + ) -> Result { + let encoded = self.receive_graph(path).await?; + url_decode_graph(encoded, Config::default()).map_err(ClientError::from) + } + + /// Create a new empty graph on the server. + pub async fn new_graph(&self, path: &str, graph_type: &str) -> Result<(), ClientError> { + let query = r#" + mutation NewGraph($path: String!) { + newGraph(path: $path, graphType: EVENT) + }"# + .to_owned() + .replace("EVENT", graph_type); + + let variables: HashMap = + [("path".to_owned(), json!(path))].into_iter().collect(); + + let data = self.query(&query, variables).await?; + match data.get("newGraph") { + Some(JsonValue::Bool(true)) => Ok(()), + _ => Err(ClientError::InvalidResponse(format!( + "Error while reading server response for query:\n\t{query}\nGot data:\n\t'{data:?}'" + ))), + } + } + + pub fn remote_graph(&self, path: String) -> GraphQLRemoteGraph { + GraphQLRemoteGraph::new(path, self.clone()) + } + + /// Create index on the server. `index_spec` must serialize to a value + /// compatible with the GraphQL `IndexSpecInput` type. + pub async fn create_index( + &self, + path: &str, + index_spec: JsonValue, + in_ram: bool, + ) -> Result<(), ClientError> { + let query = r#" + mutation CreateIndex($path: String!, $indexSpec: IndexSpecInput!, $inRam: Boolean!) { + createIndex(path: $path, indexSpec: $indexSpec, inRam: $inRam) + } + "# + .to_owned(); + + let variables: HashMap = [ + ("path".to_string(), json!(path)), + ("indexSpec".to_string(), index_spec), + ("inRam".to_string(), json!(in_ram)), + ] + .into_iter() + .collect(); + + let data = self.query(&query, variables).await?; + match data.get("createIndex") { + Some(JsonValue::Bool(true)) => Ok(()), + _ => Err(ClientError::InvalidResponse(format!( + "Failed to create index, server returned: {:?}", + data + ))), + } + } +} diff --git a/raphtory-graphql/src/client/remote_edge.rs b/raphtory-graphql/src/client/remote_edge.rs new file mode 100644 index 0000000000..fcacc74e5e --- /dev/null +++ b/raphtory-graphql/src/client/remote_edge.rs @@ -0,0 +1,145 @@ +//! Pure Rust remote edge client for GraphQL updateGraph.edge(...) operations. + +use crate::client::{ + build_property_string, raphtory_client::RaphtoryGraphQLClient, remote_graph::build_query, + ClientError, +}; +use minijinja::context; +use raphtory_api::core::{ + entities::properties::prop::Prop, storage::timeindex::AsTime, utils::time::IntoTime, +}; +use std::collections::HashMap; + +/// Pure Rust remote edge wrapper around `RaphtoryGraphQLClient`. +#[derive(Clone)] +pub struct GraphQLRemoteEdge { + pub path: String, + pub client: RaphtoryGraphQLClient, + pub src: String, + pub dst: String, +} + +impl GraphQLRemoteEdge { + pub fn new(path: String, client: RaphtoryGraphQLClient, src: String, dst: String) -> Self { + Self { + path, + client, + src, + dst, + } + } + + /// Add temporal updates to the edge at the specified time. + pub async fn add_updates( + &self, + t: T, + properties: Option>, + layer: Option, + ) -> Result<(), ClientError> { + let template = r#" + { + updateGraph(path: "{{path}}") { + edge(src: "{{src}}",dst: "{{dst}}") { + addUpdates(time: {{t}} {% if properties is not none %}, properties: {{ properties | safe }} {% endif %} {% if layer is not none %}, layer: "{{layer}}" {% endif %}) + } + } + } + "#; + + let ctx = context! { + path => self.path, + src => self.src, + dst => self.dst, + t => t.into_time().t(), + properties => properties.map(|p| build_property_string(p)), + layer => layer + }; + + let query = build_query(template, ctx).map_err(ClientError::from)?; + self.client.query(&query, HashMap::new()).await.map(|_| ()) + } + + /// Mark the edge as deleted at the specified time. + pub async fn delete( + &self, + t: T, + layer: Option, + ) -> Result<(), ClientError> { + let template = r#" + { + updateGraph(path: "{{path}}") { + edge(src: "{{src}}",dst: "{{dst}}") { + delete(time: {{t}}{% if layer is not none %}, layer: "{{layer}}"{% endif %}) + } + } + } + "#; + + let ctx = context! { + path => self.path, + src => self.src, + dst => self.dst, + t => t.into_time().t(), + layer => layer + }; + + let query = build_query(template, ctx).map_err(ClientError::from)?; + self.client.query(&query, HashMap::new()).await.map(|_| ()) + } + + /// Add metadata to the edge (properties that do not change over time). + pub async fn add_metadata( + &self, + properties: HashMap, + layer: Option, + ) -> Result<(), ClientError> { + let template = r#" + { + updateGraph(path: "{{path}}") { + edge(src: "{{src}}",dst: "{{dst}}") { + addMetadata(properties: {{ properties | safe }} {% if layer is not none %}, layer: "{{layer}}" {% endif %}) + } + } + } + "#; + + let ctx = context! { + path => self.path, + src => self.src, + dst => self.dst, + properties => build_property_string(properties), + layer => layer + }; + + let query = build_query(template, ctx).map_err(ClientError::from)?; + self.client.query(&query, HashMap::new()).await.map(|_| ()) + } + + /// Update metadata of the edge, overwriting existing values. + pub async fn update_metadata( + &self, + properties: HashMap, + layer: Option, + ) -> Result<(), ClientError> { + let template = r#" + { + updateGraph(path: "{{path}}") { + edge(src: "{{src}}",dst: "{{dst}}") { + updateMetadata(properties: {{ properties | safe }} {% if layer is not none %}, layer: "{{layer}}" {% endif %}) + } + } + } + "#; + + let ctx = context! { + path => self.path, + src => self.src, + dst => self.dst, + properties => build_property_string(properties), + layer => layer + }; + + let query = build_query(template, ctx).map_err(ClientError::from)?; + self.client.query(&query, HashMap::new()).await.map(|_| ()) + } +} diff --git a/raphtory-graphql/src/client/remote_graph.rs b/raphtory-graphql/src/client/remote_graph.rs new file mode 100644 index 0000000000..de8ab115bd --- /dev/null +++ b/raphtory-graphql/src/client/remote_graph.rs @@ -0,0 +1,336 @@ +use crate::client::{ + build_property_string, raphtory_client::RaphtoryGraphQLClient, remote_edge::GraphQLRemoteEdge, + remote_node::GraphQLRemoteNode, ClientError, +}; +use minijinja::{context, Environment, Value}; +use raphtory_api::core::{ + entities::{properties::prop::Prop, GID}, + storage::timeindex::{AsTime, EventTime}, + utils::time::IntoTime, +}; +use std::collections::HashMap; + +pub fn build_query(template: &str, context: Value) -> Result { + let mut env = Environment::new(); + env.add_template("template", template) + .map_err(|e| ClientError::JinjaError(e.to_string()))?; + let query = env + .get_template("template") + .map_err(|e| ClientError::JinjaError(e.to_string()))? + .render(context) + .map_err(|e| ClientError::JinjaError(e.to_string()))?; + Ok(query) +} + +/// Pure Rust remote graph wrapper around `RaphtoryGraphQLClient`. +#[derive(Clone)] +pub struct GraphQLRemoteGraph { + pub path: String, + pub client: RaphtoryGraphQLClient, +} + +impl GraphQLRemoteGraph { + pub fn new(path: String, client: RaphtoryGraphQLClient) -> Self { + Self { path, client } + } + + /// Returns a remote node reference for the given node id. + pub fn node(&self, id: impl ToString) -> GraphQLRemoteNode { + GraphQLRemoteNode::new(self.path.clone(), self.client.clone(), id.to_string()) + } + + /// Returns a remote edge reference for the given source and destination node ids. + pub fn edge(&self, src: impl ToString, dst: impl ToString) -> GraphQLRemoteEdge { + GraphQLRemoteEdge::new( + self.path.clone(), + self.client.clone(), + src.to_string(), + dst.to_string(), + ) + } + + pub async fn add_node + ToString, T: IntoTime>( + &self, + timestamp: T, + id: G, + properties: Option>, + node_type: Option, + ) -> Result { + let template = r#" + { + updateGraph(path: "{{ path }}") { + addNode(time: {{ time }}, name: "{{ name }}" {% if properties is not none %}, properties: {{ properties | safe }} {% endif %}{% if node_type is not none %}, nodeType: "{{ node_type }}"{% endif %}) { + success + } + } + } + "#; + + let ctx = context! { + path => self.path, + time => timestamp.into_time().t(), + name => id.to_string(), + properties => properties.map(|p| build_property_string(p)), + node_type => node_type, + }; + + let query = build_query(template, ctx)?; + let res = self.client.query(&query, HashMap::new()).await?; + if res + .get("updateGraph") + .and_then(|x| x.as_object()) + .and_then(|x| x.get("addNode")) + .and_then(|x| x.as_object()) + .and_then(|x| x.get("success")) + .and_then(|x| x.as_bool()) + .is_some_and(|x| x == true) + { + Ok(GraphQLRemoteNode::new( + self.path.clone(), + self.client.clone(), + id.to_string(), + )) + } else { + Err(ClientError::UnsuccessfulResponse) + } + } + + /// Create a new node (fails if the node already exists). Uses the createNode mutation. + pub async fn create_node + ToString, T: IntoTime>( + &self, + timestamp: T, + id: G, + properties: Option>, + node_type: Option, + ) -> Result { + let template = r#" + { + updateGraph(path: "{{ path }}") { + createNode(time: {{ time }}, name: "{{ name }}" {% if properties is not none %}, properties: {{ properties | safe }} {% endif %}{% if node_type is not none %}, nodeType: "{{ node_type }}"{% endif %}) { + success + } + } + } + "#; + + let ctx = context! { + path => self.path, + time => timestamp.into_time().t(), + name => id.to_string(), + properties => properties.map(|p| build_property_string(p)), + node_type => node_type, + }; + + let query = build_query(template, ctx)?; + let res = self.client.query(&query, HashMap::new()).await?; + if res + .get("updateGraph") + .and_then(|x| x.as_object()) + .and_then(|x| x.get("createNode")) + .and_then(|x| x.as_object()) + .and_then(|x| x.get("success")) + .and_then(|x| x.as_bool()) + .is_some_and(|x| x == true) + { + Ok(GraphQLRemoteNode::new( + self.path.clone(), + self.client.clone(), + id.to_string(), + )) + } else { + Err(ClientError::UnsuccessfulResponse) + } + } + + pub async fn add_edge + ToString, T: IntoTime>( + &self, + timestamp: T, + src: G, + dst: G, + properties: Option>, + layer: Option, + ) -> Result { + let template = r#" + { + updateGraph(path: "{{ path }}") { + addEdge(time: {{ time }}, src: "{{ src }}", dst: "{{ dst }}" {% if properties is not none %}, properties: {{ properties | safe }} {% endif %}{% if layer is not none %}, layer: "{{ layer }}"{% endif %}) { + success + } + } + } + "#; + + let ctx = context! { + path => self.path, + time => timestamp.into_time().t(), + src => src.to_string(), + dst => dst.to_string(), + properties => properties.map(|p| build_property_string(p)), + layer => layer, + }; + + let query = build_query(template, ctx)?; + let res = self.client.query(&query, HashMap::new()).await?; + if res + .get("updateGraph") + .and_then(|x| x.as_object()) + .and_then(|x| x.get("addEdge")) + .and_then(|x| x.as_object()) + .and_then(|x| x.get("success")) + .and_then(|x| x.as_bool()) + .is_some_and(|x| x == true) + { + Ok(GraphQLRemoteEdge::new( + self.path.clone(), + self.client.clone(), + src.to_string(), + dst.to_string(), + )) + } else { + Err(ClientError::UnsuccessfulResponse) + } + } + + pub async fn add_property( + &self, + timestamp: EventTime, + properties: HashMap, + ) -> Result<(), ClientError> { + let template = r#" + { + updateGraph(path: "{{ path }}") { + addProperties(t: {{t}} properties: {{ properties | safe }}) + } + } + "#; + + let ctx = context! { + path => self.path, + t => timestamp.into_time().t(), + properties => build_property_string(properties), + }; + + let query = build_query(template, ctx)?; + let res = self.client.query(&query, HashMap::new()).await?; + if res + .get("updateGraph") + .and_then(|x| x.as_object()) + .and_then(|x| x.get("addProperties")) + .and_then(|x| x.as_bool()) + .is_some_and(|x| x == true) + { + Ok(()) + } else { + Err(ClientError::UnsuccessfulResponse) + } + } + + pub async fn add_metadata(&self, properties: HashMap) -> Result<(), ClientError> { + let template = r#" + { + updateGraph(path: "{{ path }}") { + addMetadata(properties: {{ properties | safe }}) + } + } + "#; + + let ctx = context! { + path => self.path, + properties => build_property_string(properties), + }; + + let query = build_query(template, ctx)?; + let res = self.client.query(&query, HashMap::new()).await?; + if res + .get("updateGraph") + .and_then(|x| x.as_object()) + .and_then(|x| x.get("addMetadata")) + .and_then(|x| x.as_bool()) + .is_some_and(|x| x == true) + { + Ok(()) + } else { + Err(ClientError::UnsuccessfulResponse) + } + } + + pub async fn update_metadata( + &self, + properties: HashMap, + ) -> Result<(), ClientError> { + let template = r#" + { + updateGraph(path: "{{ path }}") { + updateMetadata(properties: {{ properties | safe }}) + } + } + "#; + + let ctx = context! { + path => self.path, + properties => build_property_string(properties), + }; + + let query = build_query(template, ctx)?; + let res = self.client.query(&query, HashMap::new()).await?; + if res + .get("updateGraph") + .and_then(|x| x.as_object()) + .and_then(|x| x.get("updateMetadata")) + .and_then(|x| x.as_bool()) + .is_some_and(|x| x == true) + { + Ok(()) + } else { + Err(ClientError::UnsuccessfulResponse) + } + } + + /// Deletes an edge at the given time, src, dst and optional layer. + pub async fn delete_edge + ToString, T: IntoTime>( + &self, + timestamp: T, + src: G, + dst: G, + layer: Option, + ) -> Result { + let template = r#" + { + updateGraph(path: "{{ path }}") { + deleteEdge(time: {{ time }}, src: "{{ src }}", dst: "{{ dst }}" {% if layer is not none %}, layer: "{{ layer }}"{% endif %}) { + success + } + } + } + "#; + + let ctx = context! { + path => self.path, + time => timestamp.into_time().t(), + src => src.to_string(), + dst => dst.to_string(), + layer => layer, + }; + + let query = build_query(template, ctx)?; + let res = self.client.query(&query, HashMap::new()).await?; + if res + .get("updateGraph") + .and_then(|x| x.as_object()) + .and_then(|x| x.get("deleteEdge")) + .and_then(|x| x.as_object()) + .and_then(|x| x.get("success")) + .and_then(|x| x.as_bool()) + .is_some_and(|x| x == true) + { + Ok(GraphQLRemoteEdge::new( + self.path.clone(), + self.client.clone(), + src.to_string(), + dst.to_string(), + )) + } else { + Err(ClientError::UnsuccessfulResponse) + } + } +} diff --git a/raphtory-graphql/src/client/remote_node.rs b/raphtory-graphql/src/client/remote_node.rs new file mode 100644 index 0000000000..66bfcd0607 --- /dev/null +++ b/raphtory-graphql/src/client/remote_node.rs @@ -0,0 +1,121 @@ +//! Pure Rust remote node client for GraphQL updateGraph.node(...) operations. + +use crate::client::{ + build_property_string, raphtory_client::RaphtoryGraphQLClient, remote_graph::build_query, + ClientError, +}; +use minijinja::context; +use raphtory_api::core::{ + entities::properties::prop::Prop, storage::timeindex::AsTime, utils::time::IntoTime, +}; +use std::collections::HashMap; + +/// Pure Rust remote node wrapper around `RaphtoryGraphQLClient`. +#[derive(Clone)] +pub struct GraphQLRemoteNode { + pub path: String, + pub client: RaphtoryGraphQLClient, + pub id: String, +} + +impl GraphQLRemoteNode { + pub fn new(path: String, client: RaphtoryGraphQLClient, id: String) -> Self { + Self { path, client, id } + } + + /// Set the type on the node. This only works if the type has not been previously set. + pub async fn set_node_type(&self, new_type: String) -> Result<(), ClientError> { + let template = r#" + { + updateGraph(path: "{{path}}") { + node(name: "{{name}}") { + setNodeType(newType: "{{new_type}}") + } + } + } + "#; + + let ctx = context! { + path => self.path, + name => self.id, + new_type => new_type + }; + + let query = build_query(template, ctx).map_err(ClientError::from)?; + self.client.query(&query, HashMap::new()).await.map(|_| ()) + } + + /// Add temporal updates to the node at the specified time. + pub async fn add_updates( + &self, + t: T, + properties: Option>, + ) -> Result<(), ClientError> { + let template = r#" + { + updateGraph(path: "{{path}}") { + node(name: "{{name}}") { + addUpdates(time: {{t}} {% if properties is not none %}, properties: {{ properties | safe }} {% endif %}) + } + } + } + "#; + + let ctx = context! { + path => self.path, + name => self.id, + t => t.into_time().t(), + properties => properties.map(|p| build_property_string(p)), + }; + + let query = build_query(template, ctx).map_err(ClientError::from)?; + self.client.query(&query, HashMap::new()).await.map(|_| ()) + } + + /// Add metadata to the node (properties that do not change over time). + pub async fn add_metadata(&self, properties: HashMap) -> Result<(), ClientError> { + let template = r#" + { + updateGraph(path: "{{path}}") { + node(name: "{{name}}") { + addMetadata(properties: {{ properties | safe }} ) + } + } + } + "#; + + let ctx = context! { + path => self.path, + name => self.id, + properties => build_property_string(properties), + }; + + let query = build_query(template, ctx).map_err(ClientError::from)?; + self.client.query(&query, HashMap::new()).await.map(|_| ()) + } + + /// Update metadata of the node, overwriting existing values. + pub async fn update_metadata( + &self, + properties: HashMap, + ) -> Result<(), ClientError> { + let template = r#" + { + updateGraph(path: "{{path}}") { + node(name: "{{name}}") { + updateMetadata(properties: {{ properties | safe }} ) + } + } + } + "#; + + let ctx = context! { + path => self.path, + name => self.id, + properties => build_property_string(properties) + }; + + let query = build_query(template, ctx).map_err(ClientError::from)?; + self.client.query(&query, HashMap::new()).await.map(|_| ()) + } +} diff --git a/raphtory-graphql/src/config/app_config.rs b/raphtory-graphql/src/config/app_config.rs index 9404d678e6..4157dd84df 100644 --- a/raphtory-graphql/src/config/app_config.rs +++ b/raphtory-graphql/src/config/app_config.rs @@ -1,6 +1,7 @@ use super::auth_config::{AuthConfig, PublicKeyError, PUBLIC_KEY_DECODING_ERR_MSG}; use crate::config::{ - cache_config::CacheConfig, log_config::LoggingConfig, otlp_config::TracingConfig, + cache_config::CacheConfig, concurrency_config::ConcurrencyConfig, log_config::LoggingConfig, + otlp_config::TracingConfig, schema_config::SchemaConfig, }; use config::{Config, ConfigError, File}; use serde::{Deserialize, Serialize}; @@ -16,6 +17,8 @@ pub struct AppConfig { pub cache: CacheConfig, pub tracing: TracingConfig, pub auth: AuthConfig, + pub concurrency: ConcurrencyConfig, + pub schema: SchemaConfig, pub public_dir: Option, #[cfg(feature = "search")] pub index: IndexConfig, @@ -26,6 +29,8 @@ pub struct AppConfigBuilder { cache: CacheConfig, tracing: TracingConfig, auth: AuthConfig, + concurrency: ConcurrencyConfig, + schema: SchemaConfig, public_dir: Option, #[cfg(feature = "search")] index: IndexConfig, @@ -38,6 +43,8 @@ impl From for AppConfigBuilder { cache: config.cache, tracing: config.tracing, auth: config.auth, + concurrency: config.concurrency, + schema: config.schema, public_dir: config.public_dir, #[cfg(feature = "search")] index: config.index, @@ -106,8 +113,66 @@ impl AppConfigBuilder { Ok(self) } - pub fn with_auth_enabled_for_reads(mut self, enabled_for_reads: bool) -> Self { - self.auth.enabled_for_reads = enabled_for_reads; + pub fn with_require_auth_for_reads(mut self, require_auth_for_reads: bool) -> Self { + self.auth.require_auth_for_reads = require_auth_for_reads; + self + } + + pub fn with_heavy_query_limit(mut self, heavy_query_limit: Option) -> Self { + self.concurrency.heavy_query_limit = heavy_query_limit; + self + } + + pub fn with_exclusive_writes(mut self, exclusive_writes: bool) -> Self { + self.concurrency.exclusive_writes = exclusive_writes; + self + } + + pub fn with_disable_batching(mut self, disable_batching: bool) -> Self { + self.concurrency.disable_batching = disable_batching; + self + } + + pub fn with_max_batch_size(mut self, max_batch_size: Option) -> Self { + self.concurrency.max_batch_size = max_batch_size; + self + } + + pub fn with_disable_lists(mut self, disable_lists: bool) -> Self { + self.concurrency.disable_lists = disable_lists; + self + } + + pub fn with_max_page_size(mut self, max_page_size: Option) -> Self { + self.concurrency.max_page_size = max_page_size; + self + } + + pub fn with_max_query_depth(mut self, max_query_depth: Option) -> Self { + self.schema.max_query_depth = max_query_depth; + self + } + + pub fn with_max_query_complexity(mut self, max_query_complexity: Option) -> Self { + self.schema.max_query_complexity = max_query_complexity; + self + } + + pub fn with_max_recursive_depth(mut self, max_recursive_depth: Option) -> Self { + self.schema.max_recursive_depth = max_recursive_depth; + self + } + + pub fn with_max_directives_per_field( + mut self, + max_directives_per_field: Option, + ) -> Self { + self.schema.max_directives_per_field = max_directives_per_field; + self + } + + pub fn with_disable_introspection(mut self, disable_introspection: bool) -> Self { + self.schema.disable_introspection = disable_introspection; self } @@ -128,6 +193,8 @@ impl AppConfigBuilder { cache: self.cache, tracing: self.tracing, auth: self.auth, + concurrency: self.concurrency, + schema: self.schema, public_dir: self.public_dir, #[cfg(feature = "search")] index: self.index, @@ -195,8 +262,46 @@ pub fn load_config( .with_auth_public_key(public_key) .map_err(|_| ConfigError::Message(PUBLIC_KEY_DECODING_ERR_MSG.to_owned()))?; } - if let Ok(enabled_for_reads) = settings.get::("auth.enabled_for_reads") { - app_config_builder = app_config_builder.with_auth_enabled_for_reads(enabled_for_reads); + if let Ok(require_auth_for_reads) = settings.get::("auth.require_auth_for_reads") { + app_config_builder = app_config_builder.with_require_auth_for_reads(require_auth_for_reads); + } + + if let Ok(heavy_query_limit) = settings.get::>("concurrency.heavy_query_limit") { + app_config_builder = app_config_builder.with_heavy_query_limit(heavy_query_limit); + } + if let Ok(exclusive_writes) = settings.get::("concurrency.exclusive_writes") { + app_config_builder = app_config_builder.with_exclusive_writes(exclusive_writes); + } + if let Ok(disable_batching) = settings.get::("concurrency.disable_batching") { + app_config_builder = app_config_builder.with_disable_batching(disable_batching); + } + if let Ok(max_batch_size) = settings.get::>("concurrency.max_batch_size") { + app_config_builder = app_config_builder.with_max_batch_size(max_batch_size); + } + if let Ok(disable_lists) = settings.get::("concurrency.disable_lists") { + app_config_builder = app_config_builder.with_disable_lists(disable_lists); + } + if let Ok(max_page_size) = settings.get::>("concurrency.max_page_size") { + app_config_builder = app_config_builder.with_max_page_size(max_page_size); + } + + if let Ok(max_query_depth) = settings.get::>("schema.max_query_depth") { + app_config_builder = app_config_builder.with_max_query_depth(max_query_depth); + } + if let Ok(max_query_complexity) = settings.get::>("schema.max_query_complexity") { + app_config_builder = app_config_builder.with_max_query_complexity(max_query_complexity); + } + if let Ok(max_recursive_depth) = settings.get::>("schema.max_recursive_depth") { + app_config_builder = app_config_builder.with_max_recursive_depth(max_recursive_depth); + } + if let Ok(max_directives_per_field) = + settings.get::>("schema.max_directives_per_field") + { + app_config_builder = + app_config_builder.with_max_directives_per_field(max_directives_per_field); + } + if let Ok(disable_introspection) = settings.get::("schema.disable_introspection") { + app_config_builder = app_config_builder.with_disable_introspection(disable_introspection); } if let Ok(public_dir) = settings.get::>("public_dir") { diff --git a/raphtory-graphql/src/config/auth_config.rs b/raphtory-graphql/src/config/auth_config.rs index 166429ad1f..8a29300cad 100644 --- a/raphtory-graphql/src/config/auth_config.rs +++ b/raphtory-graphql/src/config/auth_config.rs @@ -1,16 +1,52 @@ use base64::{prelude::BASE64_STANDARD, DecodeError, Engine}; -use jsonwebtoken::DecodingKey; +use jsonwebtoken::{Algorithm, DecodingKey}; use serde::{de, Deserialize, Deserializer, Serialize}; use spki::SubjectPublicKeyInfoRef; use std::fmt::Debug; -pub const DEFAULT_AUTH_ENABLED_FOR_READS: bool = true; -pub const PUBLIC_KEY_DECODING_ERR_MSG: &str = "Could not successfully decode the public key. Make sure you use the standard alphabet with padding"; +pub const DEFAULT_REQUIRE_AUTH_FOR_READS: bool = true; +pub const PUBLIC_KEY_DECODING_ERR_MSG: &str = + "Could not decode public key. Provide a base64-encoded DER (X.509 SPKI) public key \ + for Ed25519 or RSA (2048-4096 bit)."; + +/// Describes one family of asymmetric public-key algorithms that Raphtory can validate JWTs with. +/// +/// To add support for a new algorithm family (e.g. EC/ECDSA), append one entry to +/// [`SUPPORTED_ALGORITHMS`] — no other code needs to change. +struct AlgorithmSpec { + /// X.509 SPKI algorithm OID string (e.g. `"1.3.101.112"` for Ed25519). + oid: &'static str, + /// Constructs the `DecodingKey` from the raw subject-public-key bytes extracted from the SPKI + /// structure (i.e. the inner key bytes, not the full DER-encoded SPKI wrapper). + make_key: fn(&[u8]) -> DecodingKey, + /// JWT algorithms accepted for this key family. All listed variants are allowed during + /// validation; the first entry is used as the `Validation` default. + algorithms: &'static [Algorithm], +} + +/// Registry of supported public-key algorithm families. +/// +/// # Adding a new family +/// Append an [`AlgorithmSpec`] entry here. `TryFrom for PublicKey` will pick it up +/// automatically — no other changes required. +const SUPPORTED_ALGORITHMS: &[AlgorithmSpec] = &[ + AlgorithmSpec { + oid: "1.3.101.112", // id-EdDSA (Ed25519) + make_key: DecodingKey::from_ed_der, + algorithms: &[Algorithm::EdDSA], + }, + AlgorithmSpec { + oid: "1.2.840.113549.1.1.1", // rsaEncryption (PKCS#1) + make_key: DecodingKey::from_rsa_der, + algorithms: &[Algorithm::RS256, Algorithm::RS384, Algorithm::RS512], + }, +]; #[derive(Clone)] pub struct PublicKey { source: String, pub(crate) decoding_key: DecodingKey, + pub(crate) algorithms: Vec, } impl PartialEq for PublicKey { @@ -23,8 +59,10 @@ impl PartialEq for PublicKey { pub enum PublicKeyError { #[error(transparent)] Base64(#[from] DecodeError), - #[error("The provided key is not a a valid X.509 Subject Public Key Info ASN.1 structure")] + #[error("The provided key is not a valid X.509 Subject Public Key Info ASN.1 structure")] Spki, + #[error("Key algorithm is not supported; see SUPPORTED_ALGORITHMS for accepted OIDs")] + UnsupportedAlgorithm, } impl TryFrom for PublicKey { @@ -33,10 +71,16 @@ impl TryFrom for PublicKey { let der = BASE64_STANDARD.decode(&value)?; let spki_ref = SubjectPublicKeyInfoRef::try_from(der.as_ref()).map_err(|_| PublicKeyError::Spki)?; - let decoding_key = DecodingKey::from_ed_der(spki_ref.subject_public_key.raw_bytes()); + let oid = spki_ref.algorithm.oid.to_string(); + let spec = SUPPORTED_ALGORITHMS + .iter() + .find(|s| s.oid == oid.as_str()) + .ok_or(PublicKeyError::UnsupportedAlgorithm)?; + let raw = spki_ref.subject_public_key.raw_bytes(); Ok(Self { source: value, - decoding_key, + decoding_key: (spec.make_key)(raw), + algorithms: spec.algorithms.to_vec(), }) } } @@ -69,14 +113,14 @@ impl Debug for PublicKey { #[derive(Debug, Deserialize, Clone, Serialize, PartialEq)] pub struct AuthConfig { pub public_key: Option, - pub enabled_for_reads: bool, + pub require_auth_for_reads: bool, } impl Default for AuthConfig { fn default() -> Self { Self { public_key: None, - enabled_for_reads: DEFAULT_AUTH_ENABLED_FOR_READS, + require_auth_for_reads: DEFAULT_REQUIRE_AUTH_FOR_READS, } } } diff --git a/raphtory-graphql/src/config/concurrency_config.rs b/raphtory-graphql/src/config/concurrency_config.rs new file mode 100644 index 0000000000..5e7d9f754f --- /dev/null +++ b/raphtory-graphql/src/config/concurrency_config.rs @@ -0,0 +1,40 @@ +use serde::{Deserialize, Serialize}; + +pub const DEFAULT_EXCLUSIVE_WRITES: bool = false; +pub const DEFAULT_DISABLE_BATCHING: bool = false; +pub const DEFAULT_DISABLE_LISTS: bool = false; + +/// Controls how Raphtory schedules concurrent GraphQL work. +#[derive(Debug, Default, Deserialize, PartialEq, Clone, Serialize)] +pub struct ConcurrencyConfig { + /// Restricts how many expensive graph traversal queries can execute simultaneously. + /// Covers operations like connected components, edge traversals, and neighbour lookups + /// (outComponent, inComponent, edges, outEdges, inEdges, neighbours, outNeighbours, + /// inNeighbours). Once the limit is exceeded, queries are parked on a semaphore and + /// wait until a slot becomes available before executing. `None` means unlimited. + pub heavy_query_limit: Option, + + /// Ensures only one ingestion/write operation runs at a time and blocks reads until + /// it completes. + pub exclusive_writes: bool, + + /// When true, query batching (sending multiple queries in a single HTTP request) is + /// rejected outright. Batching can otherwise be used to circumvent per-request depth + /// and complexity limits. + pub disable_batching: bool, + + /// Caps the number of queries accepted in a single batched HTTP request. Requests + /// whose batch exceeds this size are rejected. `None` means unlimited (subject to + /// `disable_batching`). + pub max_batch_size: Option, + + /// When true, completely disables bulk list endpoints (e.g. `list` on a collection). + /// Essential for large graphs where unbounded list queries could return billions of + /// results and exhaust server resources. Clients should use `page` instead. + pub disable_lists: bool, + + /// Maximum page size enforced on paged collection queries. Caps the `limit` argument + /// of `page` so clients can't circumvent `disable_lists` by requesting huge pages. + /// `None` means unlimited. + pub max_page_size: Option, +} diff --git a/raphtory-graphql/src/config/mod.rs b/raphtory-graphql/src/config/mod.rs index e0ae764243..9ef7311dec 100644 --- a/raphtory-graphql/src/config/mod.rs +++ b/raphtory-graphql/src/config/mod.rs @@ -1,10 +1,12 @@ pub mod app_config; pub mod auth_config; pub mod cache_config; +pub mod concurrency_config; #[cfg(feature = "search")] pub mod index_config; pub mod log_config; pub mod otlp_config; +pub mod schema_config; #[cfg(test)] mod tests { diff --git a/raphtory-graphql/src/config/schema_config.rs b/raphtory-graphql/src/config/schema_config.rs new file mode 100644 index 0000000000..b327617bb6 --- /dev/null +++ b/raphtory-graphql/src/config/schema_config.rs @@ -0,0 +1,30 @@ +use serde::{Deserialize, Serialize}; + +pub const DEFAULT_DISABLE_INTROSPECTION: bool = false; + +/// Controls GraphQL schema-level protections applied when the server builds its schema. +#[derive(Debug, Default, Deserialize, PartialEq, Clone, Serialize)] +pub struct SchemaConfig { + /// Limits how deeply nested a query can be. For example, a query like + /// graph → nodes → page → edges → page → destination → edges → page → destination + /// would have a depth of 9. `None` means unlimited. + pub max_query_depth: Option, + + /// Limits the total estimated cost of a query based on the number of fields selected. + /// Blocks queries that try to fetch too much data in one request. `None` means unlimited. + pub max_query_complexity: Option, + + /// Internal safety limit to prevent stack overflows from pathologically structured + /// queries. `None` falls back to the async-graphql default of 32. + pub max_recursive_depth: Option, + + /// Limits the number of GraphQL directives on any single field. Directives are + /// annotations prefixed with @ that modify how a field is executed (e.g. @skip, + /// @include, @deprecated). This prevents directive-based abuse. `None` means unlimited. + pub max_directives_per_field: Option, + + /// When true, schema introspection is fully disabled, preventing clients from + /// discovering the API's structure and available fields. Recommended for production + /// to reduce the attack surface. + pub disable_introspection: bool, +} diff --git a/raphtory-graphql/src/data.rs b/raphtory-graphql/src/data.rs index 7fbad3cc53..ce4434be96 100644 --- a/raphtory-graphql/src/data.rs +++ b/raphtory-graphql/src/data.rs @@ -1,68 +1,178 @@ use crate::{ + auth_policy::AuthorizationPolicy, config::app_config::AppConfig, graph::GraphWithVectors, model::blocking_io, - paths::{valid_path, ExistingGraphFolder, ValidGraphFolder}, + paths::{ + mark_dirty, ExistingGraphFolder, InternalPathValidationError, PathValidationError, + ValidGraphPaths, ValidWriteableGraphFolder, + }, + rayon::blocking_compute, + GQLError, }; -use itertools::Itertools; +use futures_util::FutureExt; use moka::future::Cache; use raphtory::{ - db::api::view::MaterializedGraph, - errors::{GraphError, GraphResult, InvalidPathReason}, - prelude::CacheOps, + db::api::{storage::storage::Config, view::MaterializedGraph}, + errors::GraphError, + serialise::GraphPaths, vectors::{ - cache::CachedEmbeddingModel, storage::LazyDiskVectorCache, template::DocumentTemplate, - vectorisable::Vectorisable, vectorised_graph::VectorisedGraph, + cache::{CachedEmbeddingModel, VectorCache}, + storage::LazyDiskVectorCache, + template::DocumentTemplate, + vectorisable::Vectorisable, + vectorised_graph::VectorisedGraph, }, }; use std::{ + collections::HashMap, + fs, io, + io::{Read, Seek}, + ops::{Deref, DerefMut}, path::{Path, PathBuf}, sync::Arc, }; -use tokio::fs; use tracing::{error, warn}; use walkdir::WalkDir; +pub const DIRTY_PATH: &'static str = ".dirty"; + +#[derive(thiserror::Error, Debug)] +pub enum MutationErrorInner { + #[error(transparent)] + GraphError(#[from] GraphError), + #[error(transparent)] + IO(#[from] io::Error), + #[error(transparent)] + InvalidInternal(#[from] InternalPathValidationError), +} + +#[derive(thiserror::Error, Debug)] +pub enum InsertionError { + #[error("Failed to insert graph {graph}: {error}")] + Insertion { + graph: String, + error: MutationErrorInner, + }, + #[error(transparent)] + PathValidation(#[from] PathValidationError), + #[error("Failed to insert graph {graph}: {error}")] + GraphError { graph: String, error: GraphError }, +} + +impl InsertionError { + pub fn from_inner(graph: &str, error: MutationErrorInner) -> Self { + InsertionError::Insertion { + graph: graph.to_string(), + error, + } + } + + pub fn from_graph_err(graph: &str, error: GraphError) -> Self { + InsertionError::GraphError { + graph: graph.to_string(), + error, + } + } +} + +#[derive(thiserror::Error, Debug)] +pub enum DeletionError { + #[error("Failed to delete graph {graph}: {error}")] + Insertion { + graph: String, + error: MutationErrorInner, + }, + #[error(transparent)] + PathValidation(#[from] PathValidationError), +} + +#[derive(thiserror::Error, Debug)] +pub enum MoveError { + #[error("Failed to move graph: {0}")] + Insertion(#[from] InsertionError), + #[error("Failed to move graph: {0}")] + Deletion(#[from] DeletionError), +} + +impl DeletionError { + fn from_inner(graph: &str, error: MutationErrorInner) -> Self { + DeletionError::Insertion { + graph: graph.to_string(), + error, + } + } +} + +/// Get relative path as String joined with `"/"` for use with the validation methods. +/// The path is not validated here! pub(crate) fn get_relative_path( - work_dir: PathBuf, + work_dir: &Path, path: &Path, - namespace: bool, -) -> Result { - let path_buf = path.strip_prefix(work_dir.clone())?.to_path_buf(); - let components = path_buf - .components() - .into_iter() - .map(|c| { - c.as_os_str() - .to_str() - .ok_or(InvalidPathReason::NonUTFCharacters) - }) - .collect::, _>>()?; - let path_str = components.into_iter().join("/"); - valid_path(work_dir, &path_str, namespace)?; +) -> Result { + let relative = path.strip_prefix(work_dir)?; + let mut path_str = String::new(); + let mut components = relative.components().map(|component| { + component + .as_os_str() + .to_str() + .ok_or(InternalPathValidationError::NonUTFCharacters) + }); + if let Some(first) = components.next() { + path_str.push_str(first?); + } + for component in components { + path_str.push('/'); + path_str.push_str(component?); + } Ok(path_str) } +/// Inner struct with a drop implementation that cleans up the graphs +pub struct DataInner { + pub(crate) work_dir: PathBuf, + pub(crate) cache: Cache, + pub(crate) vector_cache: LazyDiskVectorCache, + pub(crate) graph_conf: Config, + pub(crate) auth_policy: Option>, +} + +/// Outer data struct that wraps the inner data to make sure it is only dropped once #[derive(Clone)] pub struct Data { - pub(crate) work_dir: PathBuf, // TODO: move this to config? - pub(crate) cache: Cache, - pub(crate) create_index: bool, // TODO: move this to config? - pub(crate) vector_cache: LazyDiskVectorCache, + inner: Arc, + pub(crate) create_index: bool, +} + +impl Deref for Data { + type Target = DataInner; + + fn deref(&self) -> &Self::Target { + self.inner.deref() + } } impl Data { - pub fn new(work_dir: &Path, configs: &AppConfig) -> Self { + pub fn new(work_dir: &Path, configs: &AppConfig, graph_conf: Config) -> Self { let cache_configs = &configs.cache; - let cache = Cache::::builder() + let cache = Cache::::builder() .max_capacity(cache_configs.capacity) .time_to_idle(std::time::Duration::from_secs(cache_configs.tti_seconds)) - .eviction_listener(|_, graph, _| { - graph - .write_updates() - .unwrap_or_else(|err| error!("Write on eviction failed: {err:?}")) - // FIXME: don't have currently a way to know which embedding updates are pending + .async_eviction_listener(|_, graph, cause| { + // The eviction listener gets called any time a graph is removed from the cache, + // not just when it is evicted. Only serialize on evictions. + async move { + if !cause.was_evicted() { + return; + } + if let Err(e) = + blocking_compute(move || graph.folder.replace_graph_data(graph.graph)).await + { + error!("Error encoding graph to disk on eviction: {e}"); + } + } + .boxed() }) .build(); @@ -74,62 +184,119 @@ impl Data { // TODO: make vector feature optional? Self { - work_dir: work_dir.to_path_buf(), - cache, + inner: Arc::new(DataInner { + work_dir: work_dir.to_path_buf(), + cache, + vector_cache: LazyDiskVectorCache::new(work_dir.join(".vector-cache")), + graph_conf, + auth_policy: None, + }), create_index, - vector_cache: LazyDiskVectorCache::new(work_dir.join(".vector-cache")), } } - pub async fn get_graph( + pub(crate) fn set_auth_policy(&mut self, policy: Arc) { + Arc::get_mut(&mut self.inner) + .expect("Data is not uniquely owned when setting auth_policy") + .auth_policy = Some(policy); + } + + async fn invalidate(&self, path: &str) { + self.cache.invalidate(path).await; + self.cache.run_pending_tasks().await; // make sure the item is actually dropped + } + + pub fn validate_path_for_insert( &self, path: &str, - ) -> Result<(GraphWithVectors, ExistingGraphFolder), Arc> { - let graph_folder = ExistingGraphFolder::try_from(self.work_dir.clone(), path)?; - let graph_folder_clone = graph_folder.clone(); + overwrite: bool, + ) -> Result { + if overwrite { + ValidWriteableGraphFolder::try_existing_or_new(self.work_dir.clone(), path) + } else { + ValidWriteableGraphFolder::try_new(self.work_dir.clone(), path) + } + } + + pub async fn get_graph(&self, path: &str) -> Result> { self.cache - .try_get_with(path.into(), self.read_graph_from_folder(graph_folder_clone)) + .try_get_with(path.into(), self.read_graph_from_disk(path)) .await - .map(|graph| (graph, graph_folder)) + } + + pub async fn get_cached_graph(&self, path: &str) -> Option { + self.cache.get(path).await + } + + pub fn has_graph(&self, path: &str) -> bool { + self.cache.contains_key(path) + || ExistingGraphFolder::try_from(self.work_dir.clone(), path).is_ok() } pub async fn insert_graph( &self, - path: &str, + writeable_folder: ValidWriteableGraphFolder, graph: MaterializedGraph, - ) -> Result<(), GraphError> { - // TODO: replace ValidGraphFolder with ValidNonExistingGraphFolder !!!!!!!!! - // or even a NewGraphFolder, so that we try to create the graph file and if that is sucessful - // we can write to it and its guaranteed to me atomic - let folder = ValidGraphFolder::try_from(self.work_dir.clone(), path)?; - match ExistingGraphFolder::try_from(self.work_dir.clone(), path) { - Ok(_) => Err(GraphError::GraphNameAlreadyExists(folder.to_error_path())), - Err(_) => { - fs::create_dir_all(folder.get_base_path()).await?; - let folder_clone = folder.clone(); - let graph_clone = graph.clone(); - blocking_io(move || graph_clone.cache(folder_clone)).await?; - let graph = GraphWithVectors::new(graph, None); - graph - .folder - .get_or_try_init(|| Ok::<_, GraphError>(folder.into()))?; - self.cache.insert(path.into(), graph).await; - Ok(()) - } - } + ) -> Result<(), InsertionError> { + self.invalidate(writeable_folder.local_path()).await; + let config = self.graph_conf.clone(); + let graph = blocking_compute(move || { + writeable_folder.write_graph_data(graph.clone(), config)?; + let folder = writeable_folder.finish()?; + let graph = GraphWithVectors::new(graph, None, folder.as_existing()?); + Ok::<_, InsertionError>(graph) + }) + .await?; + self.cache + .insert(graph.folder.local_path().into(), graph) + .await; + Ok(()) + } + + /// Insert a graph serialized from a graph folder. + pub async fn insert_graph_as_bytes( + &self, + folder: ValidWriteableGraphFolder, + bytes: R, + ) -> Result<(), InsertionError> { + self.invalidate(folder.local_path()).await; + let conf = self.graph_conf.clone(); + blocking_io(move || { + folder.write_graph_bytes(bytes, conf)?; + folder.finish() + }) + .await?; + Ok(()) } - pub async fn delete_graph(&self, path: &str) -> Result<(), GraphError> { + async fn delete_graph_inner( + &self, + graph_folder: ExistingGraphFolder, + ) -> Result<(), MutationErrorInner> { + let dirty_file = mark_dirty(graph_folder.root())?; + self.invalidate(graph_folder.local_path()).await; + blocking_io(move || { + fs::remove_dir_all(graph_folder.root())?; + fs::remove_file(dirty_file)?; + Ok::<_, MutationErrorInner>(()) + }) + .await?; + Ok(()) + } + + pub async fn delete_graph(&self, path: &str) -> Result<(), DeletionError> { let graph_folder = ExistingGraphFolder::try_from(self.work_dir.clone(), path)?; - fs::remove_dir_all(graph_folder.get_base_path()).await?; - self.cache.remove(&PathBuf::from(path)).await; + self.delete_graph_inner(graph_folder) + .await + .map_err(|err| DeletionError::from_inner(path, err))?; + self.cache.remove(path).await; Ok(()) } async fn vectorise_with_template( &self, graph: MaterializedGraph, - folder: &ValidGraphFolder, + folder: &impl ValidGraphPaths, template: &DocumentTemplate, model: CachedEmbeddingModel, ) -> Option> { @@ -137,14 +304,14 @@ impl Data { .vectorise( model, template.clone(), - Some(&folder.get_vectors_path()), + Some(&folder.graph_folder().vectors_path().ok()?), true, // verbose ) .await; match vectors { Ok(vectors) => Some(vectors), Err(error) => { - let name = folder.get_original_path_str(); + let name = folder.local_path(); warn!("An error occurred when trying to vectorise graph {name}: {error}"); None } @@ -156,160 +323,92 @@ impl Data { folder: &ExistingGraphFolder, template: &DocumentTemplate, model: CachedEmbeddingModel, - ) -> GraphResult<()> { - let graph = self.read_graph_from_folder(folder.clone()).await?.graph; - self.vectorise_with_template(graph, folder, template, model) - .await; - self.cache - .remove(&folder.get_original_path().to_path_buf()) + ) -> Result<(), GQLError> { + let graph = match self.get_cached_graph(folder.local_path()).await { + None => self.read_graph_from_disk_inner(folder.clone()).await?, + Some(graph) => graph, + }; + self.vectorise_with_template(graph.graph, folder, template, model) .await; + self.cache.remove(folder.local_path()).await; Ok(()) } - // TODO: return iter - pub fn get_all_graph_folders(&self) -> Vec { + pub fn get_all_graph_folders(&self) -> impl Iterator { let base_path = self.work_dir.clone(); WalkDir::new(&self.work_dir) .into_iter() - .filter_map(|e| { + .filter_map(move |e| { let entry = e.ok()?; let path = entry.path(); - let relative = get_relative_path(base_path.clone(), path, false).ok()?; + let relative = get_relative_path(&base_path, path).ok()?; let folder = ExistingGraphFolder::try_from(base_path.clone(), &relative).ok()?; Some(folder) }) - .collect() } - async fn read_graph_from_folder( + async fn read_graph_from_disk_inner( &self, folder: ExistingGraphFolder, ) -> Result { - GraphWithVectors::read_from_folder(&folder, &self.vector_cache, self.create_index).await - // FIXME: I need some blocking_io inside of GraphWithVectors::read_from_folder + let create_index = self.create_index; + let config = self.graph_conf.clone(); + let cache = self.vector_cache.clone(); + GraphWithVectors::read_from_folder(&folder, &cache, create_index, config).await } -} -#[cfg(test)] -pub(crate) mod data_tests { - use super::ValidGraphFolder; - use crate::{ - config::app_config::{AppConfig, AppConfigBuilder}, - data::Data, - }; - use itertools::Itertools; - use raphtory::{db::api::view::MaterializedGraph, errors::GraphError, prelude::*}; - use std::{collections::HashMap, fs, fs::File, io, path::Path, time::Duration}; - use tokio::time::sleep; + async fn read_graph_from_disk(&self, path: &str) -> Result { + let folder = ExistingGraphFolder::try_from(self.work_dir.clone(), path)?; + Ok(self.read_graph_from_disk_inner(folder).await?) + } +} - #[cfg(feature = "storage")] - use raphtory_storage::{core_ops::CoreGraphOps, graph::graph::GraphStorage}; - - #[cfg(feature = "storage")] - fn copy_dir_recursive(source_dir: &Path, target_dir: &Path) -> Result<(), GraphError> { - fs::create_dir_all(target_dir)?; - for entry in fs::read_dir(source_dir)? { - let entry = entry?; - let entry_path = entry.path(); - let target_path = target_dir.join(entry.file_name()); - - if entry_path.is_dir() { - copy_dir_recursive(&entry_path, &target_path)?; - } else { - fs::copy(&entry_path, &target_path)?; +impl Drop for DataInner { + fn drop(&mut self) { + // On drop, serialize graphs that don't have underlying storage. + for (_, graph) in self.cache.iter() { + if graph.is_dirty() { + if let Err(e) = graph.folder.replace_graph_data(graph.graph) { + error!("Error encoding graph to disk on drop: {e}"); + } } } - Ok(()) } +} - // This function creates files that mimic disk graph for tests - fn create_ipc_files_in_dir(dir_path: &Path) -> io::Result<()> { - if !dir_path.exists() { - fs::create_dir_all(dir_path)?; - } - - let file_paths = ["file1.ipc", "file2.txt", "file3.ipc"]; - - for &file_name in &file_paths { - let file_path = dir_path.join(file_name); - File::create(file_path)?; - } - - Ok(()) - } +#[cfg(test)] +pub(crate) mod data_tests { + use super::InsertionError; + use crate::{config::app_config::AppConfigBuilder, data::Data}; + use itertools::Itertools; + use raphtory::{ + db::api::view::{internal::InternalStorageOps, MaterializedGraph}, + prelude::*, + serialise::GraphPaths, + }; + use std::{collections::HashMap, fs, path::Path, time::Duration}; + use tokio::time::sleep; fn create_graph_folder(path: &Path) { + // Use empty graph to create folder structure fs::create_dir_all(path).unwrap(); - File::create(path.join(".raph")).unwrap(); - File::create(path.join("graph")).unwrap(); + let graph = Graph::new(); + graph.encode(path).unwrap(); } pub(crate) async fn save_graphs_to_work_dir( - work_dir: &Path, + data: &Data, graphs: &HashMap, - ) -> Result<(), GraphError> { + ) -> Result<(), InsertionError> { for (name, graph) in graphs.into_iter() { - let data = Data::new(work_dir, &AppConfig::default()); - let folder = ValidGraphFolder::try_from(data.work_dir, name)?; - - #[cfg(feature = "storage")] - if let GraphStorage::Disk(dg) = graph.core_graph() { - let disk_graph_path = dg.graph_dir(); - copy_dir_recursive(disk_graph_path, &folder.get_graph_path())?; - File::create(folder.get_meta_path())?; - } else { - graph.encode(folder)?; - } - - #[cfg(not(feature = "storage"))] - graph.encode(folder)?; + let folder = data.validate_path_for_insert(name, true)?; + data.insert_graph(folder, graph.clone()).await?; } Ok(()) } - #[tokio::test] - #[cfg(feature = "storage")] - async fn test_get_disk_graph_from_path() { - let tmp_graph_dir = tempfile::tempdir().unwrap(); - - let graph = Graph::new(); - graph - .add_edge(0, 1, 2, [("name", "test_e1")], None) - .unwrap(); - graph - .add_edge(0, 1, 3, [("name", "test_e2")], None) - .unwrap(); - - let base_path = tmp_graph_dir.path().to_owned(); - let graph_path = base_path.join("test_dg"); - fs::create_dir(&graph_path).unwrap(); - File::create(graph_path.join(".raph")).unwrap(); - let _ = DiskGraphStorage::from_graph(&graph, &graph_path.join("graph")).unwrap(); - - let data = Data::new(&base_path, &Default::default()); - let res = data.get_graph("test_dg").await.unwrap().0; - assert_eq!(res.graph.into_events().unwrap().count_edges(), 2); - - // Dir path doesn't exists - let res = data.get_graph("test_dg1").await; - assert!(res.is_err()); - if let Err(err) = res { - assert!(err.to_string().contains("Graph not found")); - } - - // Dir path exists but is not a disk graph path - // let tmp_graph_dir = tempfile::tempdir().unwrap(); - // let res = read_graph_from_path(base_path, ""); - let res = data.get_graph("").await; - assert!(res.is_err()); - if let Err(err) = res { - assert!(err.to_string().contains("Graph not found")); - } - } - #[tokio::test] async fn test_save_graphs_to_work_dir() { - let tmp_graph_dir = tempfile::tempdir().unwrap(); let tmp_work_dir = tempfile::tempdir().unwrap(); let graph = Graph::new(); @@ -321,26 +420,14 @@ pub(crate) mod data_tests { .add_edge(0, 1, 3, [("name", "test_e2")], None) .unwrap(); - #[cfg(feature = "storage")] - let graph2: MaterializedGraph = graph - .persist_as_disk_graph(tmp_graph_dir.path()) - .unwrap() - .into(); - let graph: MaterializedGraph = graph.into(); let mut graphs = HashMap::new(); graphs.insert("test_g".to_string(), graph); + let data = Data::new(tmp_work_dir.path(), &Default::default(), Default::default()); - #[cfg(feature = "storage")] - graphs.insert("test_dg".to_string(), graph2); - - save_graphs_to_work_dir(tmp_work_dir.path(), &graphs) - .await - .unwrap(); - - let data = Data::new(tmp_work_dir.path(), &Default::default()); + save_graphs_to_work_dir(&data, &graphs).await.unwrap(); for graph in graphs.keys() { assert!(data.get_graph(graph).await.is_ok(), "could not get {graph}") @@ -367,23 +454,23 @@ pub(crate) mod data_tests { .with_cache_tti_seconds(2) .build(); - let data = Data::new(tmp_work_dir.path(), &configs); + let data = Data::new(tmp_work_dir.path(), &configs, Default::default()); - assert!(!data.cache.contains_key(Path::new("test_g"))); - assert!(!data.cache.contains_key(Path::new("test_g2"))); + assert!(!data.cache.contains_key("test_g")); + assert!(!data.cache.contains_key("test_g2")); // Test size based eviction data.get_graph("test_g2").await.unwrap(); - assert!(data.cache.contains_key(Path::new("test_g2"))); - assert!(!data.cache.contains_key(Path::new("test_g"))); + assert!(data.cache.contains_key("test_g2")); + assert!(!data.cache.contains_key("test_g")); data.get_graph("test_g").await.unwrap(); // wait for any eviction data.cache.run_pending_tasks().await; assert_eq!(data.cache.iter().count(), 1); sleep(Duration::from_secs(3)).await; - assert!(!data.cache.contains_key(Path::new("test_g"))); - assert!(!data.cache.contains_key(Path::new("test_g2"))); + assert!(!data.cache.contains_key("test_g")); + assert!(!data.cache.contains_key("test_g2")); // FIXME: this test is not doing anything because calling cache.contains_key() runs // any pending evictions. To actually test it we need this assertion: // assert_eq!(data.cache.entry_count(), 0); @@ -398,6 +485,7 @@ pub(crate) mod data_tests { async fn test_get_graph_paths() { let temp_dir = tempfile::tempdir().unwrap(); let work_dir = temp_dir.path(); + let g0_path = work_dir.join("g0"); let g1_path = work_dir.join("g1"); let g2_path = work_dir.join("shivam/investigations/2024-12-22/g2"); @@ -411,14 +499,13 @@ pub(crate) mod data_tests { create_graph_folder(&g1_path); create_graph_folder(&g2_path); create_graph_folder(&g3_path); + create_graph_folder(&g4_path); create_graph_folder(&g7_path); - fs::create_dir_all(&g4_path.join("graph")).unwrap(); - File::create(g4_path.join(".raph")).unwrap(); - create_ipc_files_in_dir(&g4_path.join("graph")).unwrap(); - + // Empty, non-graph folder fs::create_dir_all(&g5_path).unwrap(); + // Simulate non-graph folder with random files fs::create_dir_all(&g6_path).unwrap(); fs::write(g6_path.join("random-file"), "some-random-content").unwrap(); @@ -427,12 +514,12 @@ pub(crate) mod data_tests { .with_cache_tti_seconds(2) .build(); - let data = Data::new(work_dir, &configs); + let data = Data::new(work_dir, &configs, Default::default()); let paths = data .get_all_graph_folders() .into_iter() - .map(|folder| folder.get_base_path().to_path_buf()) + .map(|folder| folder.0.root().to_path_buf()) .collect_vec(); assert_eq!(paths.len(), 5); @@ -441,14 +528,187 @@ pub(crate) mod data_tests { assert!(paths.contains(&g2_path)); assert!(paths.contains(&g3_path)); assert!(paths.contains(&g4_path)); - assert!(!paths.contains(&g5_path)); // Empty dir is ignored + assert!(!paths.contains(&g5_path)); // Empty folder is ignored + assert!(!paths.contains(&g6_path)); // Non-graph folder is ignored assert!(!paths.contains(&g7_path)); // Hidden path is ignored assert!(data .get_graph("shivam/investigations/2024-12-22/g2") .await .is_ok()); + assert!(data.get_graph("some/random/path").await.is_err()); assert!(data.get_graph(".graph").await.is_err()); } + + #[tokio::test] + async fn test_drop_skips_write_when_graph_is_not_dirty() { + let tmp_work_dir = tempfile::tempdir().unwrap(); + + // Create two graphs and save them to disk + let graph1 = Graph::new(); + graph1 + .add_edge(0, 1, 2, [("name", "test_e1")], None) + .unwrap(); + graph1 + .add_edge(0, 1, 3, [("name", "test_e2")], None) + .unwrap(); + + let graph2 = Graph::new(); + graph2 + .add_edge(0, 2, 3, [("name", "test_e3")], None) + .unwrap(); + graph2 + .add_edge(0, 2, 4, [("name", "test_e4")], None) + .unwrap(); + + let graph1_path = tmp_work_dir.path().join("test_graph1"); + let graph2_path = tmp_work_dir.path().join("test_graph2"); + graph1.encode(&graph1_path).unwrap(); + graph2.encode(&graph2_path).unwrap(); + + // Record modification times before any operations + let graph1_metadata = fs::metadata(&graph1_path).unwrap(); + let graph2_metadata = fs::metadata(&graph2_path).unwrap(); + let graph1_original_time = graph1_metadata.modified().unwrap(); + let graph2_original_time = graph2_metadata.modified().unwrap(); + + let configs = AppConfigBuilder::new() + .with_cache_capacity(10) + .with_cache_tti_seconds(300) + .build(); + + let data = Data::new(tmp_work_dir.path(), &configs, Default::default()); + + let loaded_graph1 = data.get_graph("test_graph1").await.unwrap(); + let loaded_graph2 = data.get_graph("test_graph2").await.unwrap(); + + // TODO: This test doesn't work with disk storage right now, make sure modification dates actually update correctly! + if loaded_graph1.graph.disk_storage_path().is_some() { + assert!( + !loaded_graph1.is_dirty(), + "Graph1 should not be dirty when loaded from disk" + ); + assert!( + !loaded_graph2.is_dirty(), + "Graph2 should not be dirty when loaded from disk" + ); + + // Modify only graph1 to make it dirty + loaded_graph1.set_dirty(true); + assert!( + loaded_graph1.is_dirty(), + "Graph1 should be dirty after modification" + ); + + // Drop the Data instance - this should trigger serialization + drop(data); + + // Check modification times after drop + let graph1_metadata_after = fs::metadata(&graph1_path).unwrap(); + let graph2_metadata_after = fs::metadata(&graph2_path).unwrap(); + let graph1_modified_time = graph1_metadata_after.modified().unwrap(); + let graph2_modified_time = graph2_metadata_after.modified().unwrap(); + + // Graph1 (dirty) modification time should be different + assert_ne!( + graph1_original_time, graph1_modified_time, + "Graph1 (dirty) should have been written to disk on drop" + ); + + // Graph2 (not dirty) modification time should be the same + assert_eq!( + graph2_original_time, graph2_modified_time, + "Graph2 (not dirty) should not have been written to disk on drop" + ); + } + } + + #[tokio::test] + async fn test_eviction_skips_write_when_graph_is_not_dirty() { + let tmp_work_dir = tempfile::tempdir().unwrap(); + + // Create two graphs and save them to disk + let graph1 = Graph::new(); + graph1 + .add_edge(0, 1, 2, [("name", "test_e1")], None) + .unwrap(); + graph1 + .add_edge(0, 1, 3, [("name", "test_e2")], None) + .unwrap(); + + let graph2 = Graph::new(); + graph2 + .add_edge(0, 2, 3, [("name", "test_e3")], None) + .unwrap(); + graph2 + .add_edge(0, 2, 4, [("name", "test_e4")], None) + .unwrap(); + + let graph1_path = tmp_work_dir.path().join("test_graph1"); + let graph2_path = tmp_work_dir.path().join("test_graph2"); + graph1.encode(&graph1_path).unwrap(); + graph2.encode(&graph2_path).unwrap(); + + // Record modification times before any operations + let graph1_metadata = fs::metadata(&graph1_path).unwrap(); + let graph2_metadata = fs::metadata(&graph2_path).unwrap(); + let graph1_original_time = graph1_metadata.modified().unwrap(); + let graph2_original_time = graph2_metadata.modified().unwrap(); + + // Create cache with time to idle 3 seconds to force eviction + let configs = AppConfigBuilder::new() + .with_cache_capacity(10) + .with_cache_tti_seconds(3) + .build(); + + let data = Data::new(tmp_work_dir.path(), &configs, Default::default()); + + // Load first graph + let loaded_graph1 = data.get_graph("test_graph1").await.unwrap(); + assert!( + !loaded_graph1.is_dirty(), + "Graph1 should not be dirty when loaded from disk" + ); + + // Modify graph1 to make it dirty + loaded_graph1.set_dirty(true); + assert!( + loaded_graph1.is_dirty(), + "Graph1 should be dirty after modification" + ); + + // Load second graph + println!("Loading second graph"); + let loaded_graph2 = data.get_graph("test_graph2").await.unwrap(); + assert!( + !loaded_graph2.is_dirty(), + "Graph2 should not be dirty when loaded from disk" + ); + + // Sleep to trigger eviction + sleep(Duration::from_secs(3)).await; + data.cache.run_pending_tasks().await; + + // TODO: This test doesn't work with disk storage right now, make sure modification dates actually update correctly! + if loaded_graph1.graph.disk_storage_path().is_some() { + // Check modification times after eviction + let graph1_metadata_after = fs::metadata(&graph1_path).unwrap(); + let graph2_metadata_after = fs::metadata(&graph2_path).unwrap(); + let graph1_modified_time = graph1_metadata_after.modified().unwrap(); + let graph2_modified_time = graph2_metadata_after.modified().unwrap(); + + // Graph1 (dirty) modification time should be different + assert_ne!( + graph1_original_time, graph1_modified_time, + "Graph1 (dirty) should have been written to disk on eviction" + ); + + // Graph2 (not dirty) modification time should be the same + assert_eq!( + graph2_original_time, graph2_modified_time, + "Graph2 (not dirty) should not have been written to disk on eviction" + ); + } + } } diff --git a/raphtory-graphql/src/graph.rs b/raphtory-graphql/src/graph.rs index fb47251b53..663adbcd23 100644 --- a/raphtory-graphql/src/graph.rs +++ b/raphtory-graphql/src/graph.rs @@ -1,51 +1,67 @@ -use crate::paths::ExistingGraphFolder; -use once_cell::sync::OnceCell; +use crate::{ + paths::{ExistingGraphFolder, ValidGraphPaths}, + rayon::blocking_compute, +}; use raphtory::{ core::entities::nodes::node_ref::AsNodeRef, db::{ - api::view::{ - internal::{ - InheritEdgeHistoryFilter, InheritNodeHistoryFilter, InheritStorageOps, Static, + api::{ + storage::storage::Config, + view::{ + internal::{ + InheritEdgeHistoryFilter, InheritNodeHistoryFilter, InheritStorageOps, Static, + }, + Base, InheritViewOps, MaterializedGraph, }, - Base, InheritViewOps, MaterializedGraph, }, graph::{edge::EdgeView, node::NodeView}, }, errors::{GraphError, GraphResult}, - prelude::{CacheOps, EdgeViewOps, IndexMutationOps}, - serialise::GraphFolder, - storage::core_ops::CoreGraphOps, - vectors::{ - cache::VectorCache, storage::LazyDiskVectorCache, vectorised_graph::VectorisedGraph, - }, + prelude::{EdgeViewOps, StableDecode}, + serialise::GraphPaths, + vectors::{storage::LazyDiskVectorCache, vectorised_graph::VectorisedGraph}, }; use raphtory_storage::{ - core_ops::InheritCoreGraphOps, graph::graph::GraphStorage, layer_ops::InheritLayerOps, - mutation::InheritMutationOps, + core_ops::InheritCoreGraphOps, layer_ops::InheritLayerOps, mutation::InheritMutationOps, +}; +use std::sync::{ + atomic::{AtomicBool, Ordering}, + Arc, }; -#[cfg(feature = "storage")] -use {raphtory::prelude::IntoGraph, raphtory_storage::disk::DiskGraphStorage}; +#[cfg(feature = "search")] +use raphtory::prelude::IndexMutationOps; #[derive(Clone)] pub struct GraphWithVectors { pub graph: MaterializedGraph, pub vectors: Option>, - pub(crate) folder: OnceCell, + pub(crate) folder: ExistingGraphFolder, + pub(crate) is_dirty: Arc, } impl GraphWithVectors { pub(crate) fn new( graph: MaterializedGraph, vectors: Option>, + folder: ExistingGraphFolder, ) -> Self { Self { graph, vectors, - folder: Default::default(), + folder, + is_dirty: Arc::new(AtomicBool::new(false)), } } + pub(crate) fn set_dirty(&self, is_dirty: bool) { + self.is_dirty.store(is_dirty, Ordering::SeqCst); + } + + pub(crate) fn is_dirty(&self) -> bool { + self.is_dirty.load(Ordering::SeqCst) + } + /// Generates and stores embeddings for a batch of nodes. pub(crate) async fn update_node_embeddings( &self, @@ -70,57 +86,45 @@ impl GraphWithVectors { Ok(()) } - pub(crate) fn write_updates(&self) -> Result<(), GraphError> { - match self.graph.core_graph() { - GraphStorage::Mem(_) | GraphStorage::Unlocked(_) => self.graph.write_updates(), - #[cfg(feature = "storage")] - GraphStorage::Disk(_) => Ok(()), - } - } - pub(crate) async fn read_from_folder( folder: &ExistingGraphFolder, cache: &LazyDiskVectorCache, create_index: bool, + config: Config, ) -> Result { - let graph_path = &folder.get_graph_path(); - let graph = if graph_path.is_dir() { - get_disk_graph_from_path(folder)? + let folder_clone = folder.clone(); + let graph_folder = folder.graph_folder(); + let graph = if graph_folder.read_metadata()?.is_diskgraph { + blocking_compute(move || { + MaterializedGraph::load_with_config(folder_clone.graph_folder(), config) + }) + .await? } else { - MaterializedGraph::load_cached(folder.clone())? + blocking_compute(move || { + MaterializedGraph::decode_with_config(folder_clone.graph_folder(), config) + }) + .await? }; let vectors = - VectorisedGraph::read_from_path(&folder.get_vectors_path(), graph.clone(), cache) + VectorisedGraph::read_from_path(&folder.vectors_path()?, graph.clone(), cache) .await .ok(); - println!("Graph loaded = {}", folder.get_original_path_str()); + println!("Graph loaded = {}", folder.local_path()); + #[cfg(feature = "search")] if create_index { graph.create_index()?; - graph.write_updates()?; } + Ok(Self { graph: graph.clone(), vectors, - folder: OnceCell::with_value(folder.clone().into()), + folder: folder.clone().into(), + is_dirty: Arc::new(AtomicBool::new(false)), }) } } -#[cfg(feature = "storage")] -fn get_disk_graph_from_path(path: &ExistingGraphFolder) -> Result { - let disk_graph = DiskGraphStorage::load_from_dir(&path.get_graph_path()) - .map_err(|e| GraphError::LoadFailure(e.to_string()))?; - let graph: MaterializedGraph = disk_graph.into_graph().into(); // TODO: We currently have no way to identify disk graphs as MaterializedGraphs - println!("Disk Graph loaded = {}", path.get_original_path().display()); - Ok(graph) -} - -#[cfg(not(feature = "storage"))] -fn get_disk_graph_from_path(path: &ExistingGraphFolder) -> Result { - Err(GraphError::GraphNotFound(path.to_error_path())) -} - impl Base for GraphWithVectors { type Base = MaterializedGraph; #[inline] diff --git a/raphtory-graphql/src/lib.rs b/raphtory-graphql/src/lib.rs index acc6b85b44..d8ac617035 100644 --- a/raphtory-graphql/src/lib.rs +++ b/raphtory-graphql/src/lib.rs @@ -1,5 +1,15 @@ -pub use crate::server::GraphServer; +pub use crate::{ + auth::{require_jwt_write_access_dynamic, Access}, + model::graph::filtering::GraphAccessFilter, + server::GraphServer, +}; +use crate::{data::InsertionError, paths::PathValidationError}; +use raphtory::errors::GraphError; +use std::sync::Arc; + mod auth; +pub mod auth_policy; +pub mod client; pub mod data; mod graph; pub mod model; @@ -15,26 +25,48 @@ pub mod config; pub mod python; pub mod rayon; +#[derive(thiserror::Error, Debug)] +pub enum GQLError { + #[error(transparent)] + GraphError(#[from] GraphError), + #[error(transparent)] + Validation(#[from] PathValidationError), + #[error("Insertion failed for Graph {graph}: {error}")] + Insertion { + graph: String, + error: InsertionError, + }, + #[error(transparent)] + Arc(#[from] Arc), +} + #[cfg(test)] mod graphql_test { + #[cfg(feature = "search")] + use crate::config::app_config::AppConfigBuilder; use crate::{ - config::app_config::{AppConfig, AppConfigBuilder}, + auth::Access, + config::app_config::AppConfig, data::{data_tests::save_graphs_to_work_dir, Data}, model::App, - url_encode::{url_decode_graph, url_encode_graph}, + url_encode::{url_decode_graph_at, url_encode_graph}, }; - use arrow_array::types::UInt8Type; use async_graphql::{dynamic::Schema, UploadValue}; use dynamic_graphql::{Request, Variables}; + use itertools::Itertools; use raphtory::{ db::{ - api::view::{IntoDynamic, MaterializedGraph}, + api::{ + storage::storage::Config, + view::{IntoDynamic, MaterializedGraph}, + }, graph::views::deletion_graph::PersistentGraph, }, prelude::*, serialise::GraphFolder, + test_utils::json_sort_by_name, }; - use raphtory_api::core::storage::arc_str::ArcStr; + use raphtory_api::core::{entities::GID, storage::arc_str::ArcStr}; use serde_json::{json, Value}; use std::{ collections::{HashMap, HashSet}, @@ -42,6 +74,28 @@ mod graphql_test { }; use tempfile::tempdir; + #[tokio::test] + async fn test_copy_graph() { + let graph = Graph::new(); + graph.add_node(1, "test", NO_PROPS, None, None).unwrap(); + let tmp_dir = tempdir().unwrap(); + let data = Data::new(tmp_dir.path(), &AppConfig::default(), Config::default()); + let namespace = tmp_dir.path().join("test"); + fs::create_dir(&namespace).unwrap(); + graph.encode(namespace.join("g3")).unwrap(); + let schema = App::create_schema().data(data).finish().unwrap(); + let query = r#"mutation { + copyGraph( + path: "test/g3", + newPath: "test/g4", + ) + }"#; + + let req = Request::new(query).data(Access::Rw); + let res = schema.execute(req).await; + assert_eq!(res.errors, []); + } + #[tokio::test] #[cfg(feature = "search")] async fn test_search_nodes_gql() { @@ -76,7 +130,7 @@ mod graphql_test { ]; for (id, name, props) in nodes { - graph.add_node(id, name, props, None).unwrap(); + graph.add_node(id, name, props, None, None).unwrap(); } let metadata = vec![ @@ -99,12 +153,9 @@ mod graphql_test { let graphs = HashMap::from([("master".to_string(), graph)]); let tmp_dir = tempdir().unwrap(); - save_graphs_to_work_dir(tmp_dir.path(), &graphs) - .await - .unwrap(); - let config = AppConfigBuilder::new().with_create_index(true).build(); - let data = Data::new(tmp_dir.path(), &config); + let data = Data::new(tmp_dir.path(), &config, Config::default()); + save_graphs_to_work_dir(&data, &graphs).await.unwrap(); let schema = App::create_schema().data(data).finish().unwrap(); @@ -199,18 +250,15 @@ mod graphql_test { async fn basic_query() { let graph = PersistentGraph::new(); graph - .add_node(0, 11, NO_PROPS, None) + .add_node(0, 11, NO_PROPS, None, None) .expect("Could not add node!"); graph.add_metadata([("name", "lotr")]).unwrap(); let graph: MaterializedGraph = graph.into(); let graphs = HashMap::from([("lotr".to_string(), graph)]); let tmp_dir = tempdir().unwrap(); - save_graphs_to_work_dir(tmp_dir.path(), &graphs) - .await - .unwrap(); - - let data = Data::new(tmp_dir.path(), &AppConfig::default()); + let data = Data::new(tmp_dir.path(), &AppConfig::default(), Config::default()); + save_graphs_to_work_dir(&data, &graphs).await.unwrap(); let schema = App::create_schema().data(data).finish().unwrap(); @@ -236,7 +284,7 @@ mod graphql_test { "nodes": { "list": [ { - "id": "11" + "id": 11 } ] } @@ -257,6 +305,7 @@ mod graphql_test { ("cost", Prop::F32(99.5)), ], Some("a"), + None, ) .unwrap(); graph @@ -268,6 +317,7 @@ mod graphql_test { ("cost", Prop::F32(10.0)), ], Some("a"), + None, ) .unwrap(); graph @@ -279,6 +329,7 @@ mod graphql_test { ("cost", Prop::F32(76.0)), ], Some("a"), + None, ) .unwrap(); graph @@ -319,11 +370,9 @@ mod graphql_test { let graphs = HashMap::from([("graph".to_string(), graph)]); let tmp_dir = tempdir().unwrap(); - save_graphs_to_work_dir(tmp_dir.path(), &graphs) - .await - .unwrap(); + let data = Data::new(tmp_dir.path(), &AppConfig::default(), Config::default()); + save_graphs_to_work_dir(&data, &graphs).await.unwrap(); - let data = Data::new(tmp_dir.path(), &AppConfig::default()); let schema = App::create_schema().data(data).finish().unwrap(); let prop_has_key_filter = r#" { @@ -413,22 +462,15 @@ mod graphql_test { async fn query_nodefilter() { let graph = Graph::new(); graph - .add_node( - 0, - 1, - [("pgraph", Prop::from_arr::(vec![3u8]))], - None, - ) + .add_node(0, 1, [("pgraph", Prop::I32(0))], None, None) .unwrap(); let graph: MaterializedGraph = graph.into(); let graphs = HashMap::from([("graph".to_string(), graph)]); let tmp_dir = tempdir().unwrap(); - save_graphs_to_work_dir(tmp_dir.path(), &graphs) - .await - .unwrap(); + let data = Data::new(tmp_dir.path(), &AppConfig::default(), Config::default()); + save_graphs_to_work_dir(&data, &graphs).await.unwrap(); - let data = Data::new(tmp_dir.path(), &AppConfig::default()); let schema = App::create_schema().data(data).finish().unwrap(); let prop_has_key_filter = r#" { @@ -484,138 +526,79 @@ mod graphql_test { g.add_edge(9, 1, 2, [("state", true)], None).unwrap(); g.add_edge(10, 1, 2, [("state", false)], None).unwrap(); g.add_edge(6, 1, 2, NO_PROPS, None).unwrap(); - g.add_node(11, 3, [("name", "phone")], None).unwrap(); - g.add_node(12, 3, [("name", "fax")], None).unwrap(); - g.add_node(13, 3, [("name", "fax")], None).unwrap(); + g.add_node(11, 3, [("name", "phone")], None, None).unwrap(); + g.add_node(12, 3, [("name", "fax")], None, None).unwrap(); + g.add_node(13, 3, [("name", "fax")], None, None).unwrap(); let graph: MaterializedGraph = g.into(); let graphs = HashMap::from([("graph".to_string(), graph)]); let tmp_dir = tempdir().unwrap(); - save_graphs_to_work_dir(tmp_dir.path(), &graphs) - .await - .unwrap(); + let data = Data::new(tmp_dir.path(), &AppConfig::default(), Config::default()); + save_graphs_to_work_dir(&data, &graphs).await.unwrap(); - let expected = json!({ - "graph": { - "properties": { - "temporal": { - "values": [ - { - "unique": [ - "xyz", - "abc" - ] - } - ] - } - }, - "node": { - "properties": { - "temporal": { - "values": [ - { - "unique": [ - "fax", - "phone" - ] - } - ] - } + let schema = App::create_schema().data(data).finish().unwrap(); + + // Query each `unique` by key so we can assert the typed element shape + // (strings for string props, bools for bool props — not stringified). + let query = r#" + { + graph(path: "graph") { + properties { + temporal { + get(key: "state") { unique } + } + } + node(name: "3") { + properties { + temporal { + get(key: "name") { unique } } - }, - "edge": { - "properties": { - "temporal": { - "values": [ - { - "unique": [ - "open", - "review", - "in-progress" - ] - }, - { - "unique": [ - "false", - "true" - ] - } - ] - } + } + } + edge(src: "1", dst: "2") { + properties { + temporal { + status: get(key: "status") { unique } + state: get(key: "state") { unique } } } } - }); + } + } + "#; - let mut actual_graph_props = HashSet::new(); - let mut actual_node_props = HashSet::new(); - let mut actual_edge_props = HashSet::new(); + let req = Request::new(query); + let res = schema.execute(req).await; + assert!(res.errors.is_empty(), "errors: {:?}", res.errors); + let data = res.data.into_json().unwrap(); - let graph_props = &expected["graph"]["properties"]["temporal"]["values"]; - for value in graph_props.as_array().unwrap().iter() { - let unique_values: HashSet<_> = value["unique"] - .as_array() - .unwrap() - .iter() - .map(|v| v.as_str().unwrap()) - .collect(); - actual_graph_props.extend(unique_values); + fn sorted_unique<'a>(v: &'a Value) -> Vec<&'a Value> { + let mut out: Vec<&Value> = v["unique"].as_array().unwrap().iter().collect(); + // serde_json::Value has a deterministic total order for same-typed values + // and groups by type for mixed inputs — fine for this test. + out.sort_by(|a, b| a.to_string().cmp(&b.to_string())); + out } - let node_props = &expected["graph"]["node"]["properties"]["temporal"]["values"]; - for value in node_props.as_array().unwrap().iter() { - let unique_values: HashSet<_> = value["unique"] - .as_array() - .unwrap() - .iter() - .map(|v| v.as_str().unwrap()) - .collect(); - actual_node_props.extend(unique_values); - } + // graph-level `state` is a string property + let state = sorted_unique(&data["graph"]["properties"]["temporal"]["get"]); + assert_eq!(state, vec![&json!("abc"), &json!("xyz")]); - let edge_props = &expected["graph"]["edge"]["properties"]["temporal"]["values"]; - for value in edge_props.as_array().unwrap().iter() { - let unique_values: HashSet<_> = value["unique"] - .as_array() - .unwrap() - .iter() - .map(|v| v.as_str().unwrap()) - .collect(); - actual_edge_props.extend(unique_values); - } + // node-level `name` is a string property + let name = sorted_unique(&data["graph"]["node"]["properties"]["temporal"]["get"]); + assert_eq!(name, vec![&json!("fax"), &json!("phone")]); + // edge-level `status` is a string property + let status = sorted_unique(&data["graph"]["edge"]["properties"]["temporal"]["status"]); assert_eq!( - actual_graph_props, - expected["graph"]["properties"]["temporal"]["values"][0]["unique"] - .as_array() - .unwrap() - .iter() - .map(|v| v.as_str().unwrap()) - .collect::>() - ); - assert_eq!( - actual_node_props, - expected["graph"]["node"]["properties"]["temporal"]["values"][0]["unique"] - .as_array() - .unwrap() - .iter() - .map(|v| v.as_str().unwrap()) - .collect::>() - ); - assert_eq!( - actual_edge_props, - expected["graph"]["edge"]["properties"]["temporal"]["values"] - .as_array() - .unwrap() - .iter() - .map(|value| value["unique"] - .as_array() - .unwrap() - .iter() - .map(|v| v.as_str().unwrap())) - .flatten() - .collect::>() + status, + vec![&json!("in-progress"), &json!("open"), &json!("review")] ); + + // edge-level `state` is a bool property — must come back as JSON bools, + // not strings "true" / "false". + let edge_state = sorted_unique(&data["graph"]["edge"]["properties"]["temporal"]["state"]); + assert_eq!(edge_state, vec![&json!(false), &json!(true)]); } #[tokio::test] @@ -637,18 +620,16 @@ mod graphql_test { g.add_edge(9, 1, 2, [("state", true)], None).unwrap(); g.add_edge(10, 1, 2, [("state", false)], None).unwrap(); g.add_edge(6, 1, 2, NO_PROPS, None).unwrap(); - g.add_node(11, 3, [("name", "phone")], None).unwrap(); - g.add_node(12, 3, [("name", "fax")], None).unwrap(); - g.add_node(13, 3, [("name", "fax")], None).unwrap(); + g.add_node(11, 3, [("name", "phone")], None, None).unwrap(); + g.add_node(12, 3, [("name", "fax")], None, None).unwrap(); + g.add_node(13, 3, [("name", "fax")], None, None).unwrap(); let g = g.into(); let graphs = HashMap::from([("graph".to_string(), g)]); let tmp_dir = tempdir().unwrap(); - save_graphs_to_work_dir(tmp_dir.path(), &graphs) - .await - .unwrap(); + let data = Data::new(tmp_dir.path(), &AppConfig::default(), Config::default()); + save_graphs_to_work_dir(&data, &graphs).await.unwrap(); - let data = Data::new(tmp_dir.path(), &AppConfig::default()); let schema = App::create_schema().data(data).finish().unwrap(); let prop_has_key_filter = r#" @@ -962,22 +943,15 @@ mod graphql_test { async fn query_properties() { let graph = Graph::new(); graph - .add_node( - 0, - 1, - [("pgraph", Prop::from_arr::(vec![3u8]))], - None, - ) + .add_node(0, 1, [("pgraph", Prop::I32(0))], None, None) .unwrap(); let graph = graph.into(); let graphs = HashMap::from([("graph".to_string(), graph)]); let tmp_dir = tempdir().unwrap(); - save_graphs_to_work_dir(tmp_dir.path(), &graphs) - .await - .unwrap(); + let data = Data::new(tmp_dir.path(), &AppConfig::default(), Config::default()); + save_graphs_to_work_dir(&data, &graphs).await.unwrap(); - let data = Data::new(tmp_dir.path(), &AppConfig::default()); let schema = App::create_schema().data(data).finish().unwrap(); let prop_has_key_filter = r#" { @@ -1017,7 +991,7 @@ mod graphql_test { #[tokio::test] async fn test_graph_injection() { let g = PersistentGraph::new(); - g.add_node(0, 1, NO_PROPS, None).unwrap(); + g.add_node(0, 1, NO_PROPS, None, None).unwrap(); let tmp_dir = tempfile::TempDir::new().unwrap(); let zip_path = tmp_dir.path().join("graph.zip"); g.encode(GraphFolder::new_as_zip(&zip_path)).unwrap(); @@ -1029,7 +1003,7 @@ mod graphql_test { }; let tmp_dir = tempdir().unwrap(); - let data = Data::new(tmp_dir.path(), &AppConfig::default()); + let data = Data::new(tmp_dir.path(), &AppConfig::default(), Config::default()); let schema = App::create_schema().data(data).finish().unwrap(); let query = r##" @@ -1039,7 +1013,9 @@ mod graphql_test { "##; let variables = json!({ "file": null, "overwrite": false }); - let mut req = Request::new(query).variables(Variables::from_json(variables)); + let mut req = Request::new(query) + .variables(Variables::from_json(variables)) + .data(Access::Rw); req.set_upload("variables.file", upload_val); let res = schema.execute(req).await; assert_eq!(res.errors, vec![]); @@ -1060,23 +1036,20 @@ mod graphql_test { let req = Request::new(list_nodes); let res = schema.execute(req).await; - assert_eq!(res.errors.len(), 0); + assert_eq!(res.errors, []); let res_json = res.data.into_json().unwrap(); - assert_eq!( - res_json, - json!({"graph": {"nodes": {"list": [{"id": "1"}]}}}) - ); + assert_eq!(res_json, json!({"graph": {"nodes": {"list": [{"id": 1}]}}})); } #[tokio::test] async fn test_graph_send_receive_base64() { let g = PersistentGraph::new(); - g.add_node(0, 1, NO_PROPS, None).unwrap(); + g.add_node(0, 1, NO_PROPS, None, None).unwrap(); let graph_str = url_encode_graph(g.clone()).unwrap(); let tmp_dir = tempdir().unwrap(); - let data = Data::new(tmp_dir.path(), &AppConfig::default()); + let data = Data::new(tmp_dir.path(), &AppConfig::default(), Config::default()); let schema = App::create_schema().data(data).finish().unwrap(); let query = r#" @@ -1084,12 +1057,14 @@ mod graphql_test { sendGraph(path: "test", graph: $graph, overwrite: $overwrite) } "#; - let req = Request::new(query).variables(Variables::from_json( - json!({ "graph": graph_str, "overwrite": false }), - )); + let req = Request::new(query) + .variables(Variables::from_json( + json!({ "graph": graph_str, "overwrite": false }), + )) + .data(Access::Rw); let res = schema.execute(req).await; - assert_eq!(res.errors.len(), 0); + assert_eq!(res.errors, []); let res_json = res.data.into_json().unwrap(); assert_eq!(res_json, json!({"sendGraph": "test"})); @@ -1109,10 +1084,7 @@ mod graphql_test { let res = schema.execute(req).await; assert_eq!(res.errors.len(), 0); let res_json = res.data.into_json().unwrap(); - assert_eq!( - res_json, - json!({"graph": {"nodes": {"list": [{"id": "1"}]}}}) - ); + assert_eq!(res_json, json!({"graph": {"nodes": {"list": [{"id": 1}]}}})); let receive_graph = r#" query { @@ -1125,7 +1097,11 @@ mod graphql_test { assert_eq!(res.errors.len(), 0); let res_json = res.data.into_json().unwrap(); let graph_encoded = res_json.get("receiveGraph").unwrap().as_str().unwrap(); - let graph_roundtrip = url_decode_graph(graph_encoded).unwrap().into_dynamic(); + let temp_dir = tempdir().unwrap(); + let graph_roundtrip = + url_decode_graph_at(graph_encoded, temp_dir.path(), Config::default()) + .unwrap() + .into_dynamic(); assert_eq!(g, graph_roundtrip); } @@ -1133,12 +1109,12 @@ mod graphql_test { async fn test_type_filter() { let graph = Graph::new(); graph.add_metadata([("name", "graph")]).unwrap(); - graph.add_node(1, 1, NO_PROPS, Some("a")).unwrap(); - graph.add_node(1, 2, NO_PROPS, Some("b")).unwrap(); - graph.add_node(1, 3, NO_PROPS, Some("b")).unwrap(); - graph.add_node(1, 4, NO_PROPS, Some("a")).unwrap(); - graph.add_node(1, 5, NO_PROPS, Some("c")).unwrap(); - graph.add_node(1, 6, NO_PROPS, Some("e")).unwrap(); + graph.add_node(1, 1, NO_PROPS, Some("a"), None).unwrap(); + graph.add_node(1, 2, NO_PROPS, Some("b"), None).unwrap(); + graph.add_node(1, 3, NO_PROPS, Some("b"), None).unwrap(); + graph.add_node(1, 4, NO_PROPS, Some("a"), None).unwrap(); + graph.add_node(1, 5, NO_PROPS, Some("c"), None).unwrap(); + graph.add_node(1, 6, NO_PROPS, Some("e"), None).unwrap(); graph.add_edge(2, 1, 2, NO_PROPS, Some("a")).unwrap(); graph.add_edge(2, 3, 2, NO_PROPS, Some("a")).unwrap(); graph.add_edge(2, 2, 4, NO_PROPS, Some("a")).unwrap(); @@ -1150,11 +1126,9 @@ mod graphql_test { let graph = graph.into(); let graphs = HashMap::from([("graph".to_string(), graph)]); let tmp_dir = tempdir().unwrap(); - save_graphs_to_work_dir(tmp_dir.path(), &graphs) - .await - .unwrap(); + let data = Data::new(tmp_dir.path(), &AppConfig::default(), Config::default()); + save_graphs_to_work_dir(&data, &graphs).await.unwrap(); - let data = Data::new(tmp_dir.path(), &AppConfig::default()); let schema = App::create_schema().data(data).finish().unwrap(); let req = r#" @@ -1173,7 +1147,7 @@ mod graphql_test { let req = Request::new(req); let res = schema.execute(req).await; - let data = res.data.into_json().unwrap(); + let data = json_sort_by_name(res.data.into_json().unwrap()); assert_eq!( data, json!({ @@ -1200,6 +1174,7 @@ mod graphql_test { nodes { typeFilter(nodeTypes: ["a"]) { list { + name neighbours { list { name @@ -1214,7 +1189,7 @@ mod graphql_test { let req = Request::new(req); let res = schema.execute(req).await; - let data = res.data.into_json().unwrap(); + let data = json_sort_by_name(res.data.into_json().unwrap()); assert_eq!( data, json!({ @@ -1223,7 +1198,8 @@ mod graphql_test { "typeFilter": { "list": [ { - "neighbours": { + "name": "1", + "neighbours": { "list": [ { "name": "2" @@ -1232,7 +1208,8 @@ mod graphql_test { } }, { - "neighbours": { + "name": "4", + "neighbours": { "list": [ { "name": "2" @@ -1255,12 +1232,12 @@ mod graphql_test { async fn test_paging() { let graph1 = Graph::new(); graph1.add_metadata([("name", "graph1")]).unwrap(); - graph1.add_node(1, 1, NO_PROPS, Some("a")).unwrap(); - graph1.add_node(1, 2, NO_PROPS, Some("b")).unwrap(); - graph1.add_node(1, 3, NO_PROPS, Some("b")).unwrap(); - graph1.add_node(1, 4, NO_PROPS, Some("a")).unwrap(); - graph1.add_node(1, 5, NO_PROPS, Some("c")).unwrap(); - graph1.add_node(1, 6, NO_PROPS, Some("e")).unwrap(); + graph1.add_node(1, 1, NO_PROPS, Some("a"), None).unwrap(); + graph1.add_node(1, 2, NO_PROPS, Some("b"), None).unwrap(); + graph1.add_node(1, 3, NO_PROPS, Some("b"), None).unwrap(); + graph1.add_node(1, 4, NO_PROPS, Some("a"), None).unwrap(); + graph1.add_node(1, 5, NO_PROPS, Some("c"), None).unwrap(); + graph1.add_node(1, 6, NO_PROPS, Some("e"), None).unwrap(); graph1.add_edge(2, 1, 2, NO_PROPS, Some("a")).unwrap(); graph1.add_edge(2, 3, 2, NO_PROPS, Some("a")).unwrap(); graph1.add_edge(2, 2, 4, NO_PROPS, Some("a")).unwrap(); @@ -1268,21 +1245,50 @@ mod graphql_test { graph1.add_edge(2, 4, 6, NO_PROPS, Some("a")).unwrap(); graph1.add_edge(2, 5, 6, NO_PROPS, Some("a")).unwrap(); graph1.add_edge(2, 3, 6, NO_PROPS, Some("a")).unwrap(); + + let all_nodes: Vec<_> = graph1.nodes().name().into_iter_values().collect(); + + // make sure we have the correct nodes + assert_eq!( + all_nodes.iter().sorted().collect_vec(), + ["1", "2", "3", "4", "5", "6"] + ); + let all_edges: Vec<_> = graph1 + .edges() + .id() + .map(|(src, dst)| { + let src = match src { + GID::U64(u) => u, + GID::Str(_) => unreachable!("integer-indexed graph"), + }; + let dst = match dst { + GID::U64(u) => u, + GID::Str(_) => unreachable!("integer-indexed graph"), + }; + (src, dst) + }) + .collect(); + + // make sure we have the correct edges + assert_eq!( + all_edges.iter().cloned().sorted().collect_vec(), + [(1, 2), (2, 4), (3, 2), (3, 6), (4, 5), (4, 6), (5, 6),] + ); let graph2 = Graph::new(); graph2.add_metadata([("name", "graph2")]).unwrap(); - graph2.add_node(1, 1, NO_PROPS, Some("a")).unwrap(); + graph2.add_node(1, 1, NO_PROPS, Some("a"), None).unwrap(); let graph3 = Graph::new(); graph3.add_metadata([("name", "graph3")]).unwrap(); - graph3.add_node(1, 1, NO_PROPS, Some("a")).unwrap(); + graph3.add_node(1, 1, NO_PROPS, Some("a"), None).unwrap(); let graph4 = Graph::new(); graph4.add_metadata([("name", "graph4")]).unwrap(); - graph4.add_node(1, 1, NO_PROPS, Some("a")).unwrap(); + graph4.add_node(1, 1, NO_PROPS, Some("a"), None).unwrap(); let graph5 = Graph::new(); graph5.add_metadata([("name", "graph5")]).unwrap(); - graph5.add_node(1, 1, NO_PROPS, Some("a")).unwrap(); + graph5.add_node(1, 1, NO_PROPS, Some("a"), None).unwrap(); let graph6 = Graph::new(); graph6.add_metadata([("name", "graph6")]).unwrap(); - graph6.add_node(1, 1, NO_PROPS, Some("a")).unwrap(); + graph6.add_node(1, 1, NO_PROPS, Some("a"), None).unwrap(); let graphs = HashMap::from([ ("graph1".to_string(), graph1.into()), @@ -1293,11 +1299,8 @@ mod graphql_test { ("graph6".to_string(), graph6.into()), ]); let tmp_dir = tempdir().unwrap(); - save_graphs_to_work_dir(tmp_dir.path(), &graphs) - .await - .unwrap(); - - let data = Data::new(tmp_dir.path(), &AppConfig::default()); + let data = Data::new(tmp_dir.path(), &AppConfig::default(), Config::default()); + save_graphs_to_work_dir(&data, &graphs).await.unwrap(); let schema = App::create_schema().data(data).finish().unwrap(); let req = r#" @@ -1315,22 +1318,16 @@ mod graphql_test { let req = Request::new(req); let res = schema.execute(req).await; let data = res.data.into_json().unwrap(); + let expected_page: Vec<_> = all_nodes[1..4] + .iter() + .map(|node| json!({"name": node})) + .collect(); assert_eq!( data, json!({ "graph": { "nodes": { - "page": [ - { - "name": "2" - }, - { - "name": "3" - }, - { - "name": "4" - } - ] + "page": expected_page } } }), @@ -1377,19 +1374,16 @@ mod graphql_test { let req = Request::new(req); let res = schema.execute(req).await; let data = res.data.into_json().unwrap(); + let expected_page: Vec<_> = all_nodes[2..4] + .iter() + .map(|node| json!({"name": node})) + .collect(); assert_eq!( data, json!({ "graph": { "nodes": { - "page": [ - { - "name": "3" - }, - { - "name": "4" - } - ] + "page": expected_page } } }), @@ -1410,19 +1404,16 @@ mod graphql_test { let req = Request::new(req); let res = schema.execute(req).await; let data = res.data.into_json().unwrap(); + let expected_page: Vec<_> = all_edges[5..7] + .iter() + .map(|edge| json!({"id": edge})) + .collect(); assert_eq!( data, json!({ "graph": { "edges": { - "page": [ - { - "id": ["5", "6"] - }, - { - "id": ["3", "6"] - } - ] + "page": expected_page } } }), @@ -1443,16 +1434,16 @@ mod graphql_test { let req = Request::new(req); let res = schema.execute(req).await; let data = res.data.into_json().unwrap(); + let expected_page: Vec<_> = all_edges[6..] + .iter() + .map(|edge| json!({"id": edge})) + .collect(); assert_eq!( data, json!({ "graph": { "edges": { - "page": [ - { - "id": ["3", "6"] - }, - ] + "page": expected_page } } }), @@ -1495,92 +1486,16 @@ mod graphql_test { ); } - #[cfg(feature = "storage")] - #[tokio::test] - async fn test_disk_graph() { - let graph = Graph::new(); - graph.add_metadata([("name", "graph")]).unwrap(); - graph.add_node(1, 1, NO_PROPS, Some("a")).unwrap(); - graph.add_node(1, 2, NO_PROPS, Some("b")).unwrap(); - graph.add_node(1, 3, NO_PROPS, Some("b")).unwrap(); - graph.add_node(1, 4, NO_PROPS, Some("a")).unwrap(); - graph.add_node(1, 5, NO_PROPS, Some("c")).unwrap(); - graph.add_node(1, 6, NO_PROPS, Some("e")).unwrap(); - graph.add_edge(22, 1, 2, NO_PROPS, Some("a")).unwrap(); - graph.add_edge(22, 3, 2, NO_PROPS, Some("a")).unwrap(); - graph.add_edge(22, 2, 4, NO_PROPS, Some("a")).unwrap(); - graph.add_edge(22, 4, 5, NO_PROPS, Some("a")).unwrap(); - graph.add_edge(22, 4, 5, NO_PROPS, Some("a")).unwrap(); - graph.add_edge(22, 5, 6, NO_PROPS, Some("a")).unwrap(); - graph.add_edge(22, 3, 6, NO_PROPS, Some("a")).unwrap(); - - let tmp_work_dir = tempdir().unwrap(); - let tmp_work_dir = tmp_work_dir.path(); - - let disk_graph_path = tmp_work_dir.join("graph"); - fs::create_dir(&disk_graph_path).unwrap(); - fs::File::create(disk_graph_path.join(".raph")).unwrap(); - let _ = DiskGraphStorage::from_graph(&graph, disk_graph_path.join("graph")).unwrap(); - - let data = Data::new(&tmp_work_dir, &AppConfig::default()); - let schema = App::create_schema().data(data).finish().unwrap(); - - let req = r#" - { - graph(path: "graph") { - nodes { - list { - name - } - } - } - } - "#; - - let req = Request::new(req); - let res = schema.execute(req).await; - let data = res.data.into_json().unwrap(); - assert_eq!( - data, - json!({ - "graph": { - "nodes": { - "list": [ - { - "name": "1" - }, - { - "name": "2" - }, - { - "name": "3" - }, - { - "name": "4" - }, - { - "name": "5" - }, - { - "name": "6" - } - ] - } - } - }), - ); - } - #[tokio::test] async fn test_query_namespace() { let graph = Graph::new(); graph.add_metadata([("name", "graph")]).unwrap(); - graph.add_node(1, 1, NO_PROPS, Some("a")).unwrap(); - graph.add_node(1, 2, NO_PROPS, Some("b")).unwrap(); - graph.add_node(1, 3, NO_PROPS, Some("b")).unwrap(); - graph.add_node(1, 4, NO_PROPS, Some("a")).unwrap(); - graph.add_node(1, 5, NO_PROPS, Some("c")).unwrap(); - graph.add_node(1, 6, NO_PROPS, Some("e")).unwrap(); + graph.add_node(1, 1, NO_PROPS, Some("a"), None).unwrap(); + graph.add_node(1, 2, NO_PROPS, Some("b"), None).unwrap(); + graph.add_node(1, 3, NO_PROPS, Some("b"), None).unwrap(); + graph.add_node(1, 4, NO_PROPS, Some("a"), None).unwrap(); + graph.add_node(1, 5, NO_PROPS, Some("c"), None).unwrap(); + graph.add_node(1, 6, NO_PROPS, Some("e"), None).unwrap(); graph.add_edge(2, 1, 2, NO_PROPS, Some("a")).unwrap(); graph.add_edge(2, 3, 2, NO_PROPS, Some("a")).unwrap(); graph.add_edge(2, 2, 4, NO_PROPS, Some("a")).unwrap(); @@ -1592,11 +1507,8 @@ mod graphql_test { let graph = graph.into(); let graphs = HashMap::from([("graph".to_string(), graph)]); let tmp_dir = tempdir().unwrap(); - save_graphs_to_work_dir(tmp_dir.path(), &graphs) - .await - .unwrap(); - - let data = Data::new(tmp_dir.path(), &AppConfig::default()); + let data = Data::new(tmp_dir.path(), &AppConfig::default(), Config::default()); + save_graphs_to_work_dir(&data, &graphs).await.unwrap(); let schema = App::create_schema().data(data).finish().unwrap(); let req = r#" @@ -1661,7 +1573,7 @@ mod graphql_test { createSubgraph(parentPath: "graph", newPath: "graph2", nodes: ["1", "2"], overwrite: false) } "#; - let req = Request::new(req); + let req = Request::new(req).data(Access::Rw); let res = schema.execute(req).await; assert_eq!(res.errors, vec![]); let req = r#" @@ -1669,7 +1581,7 @@ mod graphql_test { createSubgraph(parentPath: "graph", newPath: "namespace1/graph3", nodes: ["2", "3", "4"], overwrite: false) } "#; - let req = Request::new(req); + let req = Request::new(req).data(Access::Rw); let res = schema.execute(req).await; assert_eq!(res.errors, vec![]); @@ -1853,7 +1765,8 @@ mod graphql_test { async fn test_new_graph(schema: &Schema, path: &str, should_work: bool) { let req = Request::new(format!( r#"mutation {{ newGraph(path: "{path}", graphType: EVENT) }}"#, - )); + )) + .data(Access::Rw); let res = schema.execute(req).await; if should_work { @@ -1908,7 +1821,7 @@ mod graphql_test { #[tokio::test] async fn test_new_graph_rejects_hidden_path_components() { let tmp_dir = tempdir().unwrap(); - let data = Data::new(tmp_dir.path(), &AppConfig::default()); + let data = Data::new(tmp_dir.path(), &AppConfig::default(), Config::default()); let schema = App::create_schema().data(data).finish().unwrap(); // Valid paths diff --git a/raphtory-graphql/src/model/graph/collection.rs b/raphtory-graphql/src/model/graph/collection.rs index aa345e984a..f2b5b546d8 100644 --- a/raphtory-graphql/src/model/graph/collection.rs +++ b/raphtory-graphql/src/model/graph/collection.rs @@ -1,10 +1,42 @@ -use crate::rayon::blocking_compute; +use crate::{config::concurrency_config::ConcurrencyConfig, rayon::blocking_compute}; +use async_graphql::{Context, Error, Result}; use dynamic_graphql::{ internal::{OutputTypeName, ResolveOwned, TypeName}, ResolvedObject, ResolvedObjectFields, }; use std::{borrow::Cow, sync::Arc}; +/// Returns an error when `concurrency.disable_lists` is set. Called from every `list` +/// resolver on paginated collections. +pub(crate) fn check_list_allowed(ctx: &Context<'_>) -> Result<()> { + if ctx + .data_opt::() + .map(|cfg| cfg.disable_lists) + .unwrap_or(false) + { + return Err(Error::new( + "Bulk list endpoints are disabled on this server. Use `page` instead.", + )); + } + Ok(()) +} + +/// Returns an error when `limit` exceeds `concurrency.max_page_size`. Called from every +/// `page` resolver on paginated collections. +pub(crate) fn check_page_limit(ctx: &Context<'_>, limit: usize) -> Result<()> { + if let Some(max) = ctx + .data_opt::() + .and_then(|cfg| cfg.max_page_size) + { + if limit > max { + return Err(Error::new(format!( + "page limit {limit} exceeds the maximum allowed page size {max}" + ))); + } + } + Ok(()) +} + /// Collection of items #[derive(ResolvedObject, Clone)] #[graphql(get_type_name = true)] @@ -47,18 +79,31 @@ where T: for<'a> ResolveOwned<'a>, { /// Returns a list of collection objects. - async fn list(&self) -> Vec { + async fn list(&self, ctx: &Context<'_>) -> Result> { + check_list_allowed(ctx)?; let self_clone = self.clone(); - blocking_compute(move || self_clone.items.to_vec()).await + Ok(blocking_compute(move || self_clone.items.to_vec()).await) } /// Fetch one page with a number of items up to a specified limit, optionally offset by a specified amount. The page_index sets the number of pages to skip (defaults to 0). /// /// For example, if page(5, 2, 1) is called, a page with 5 items, offset by 11 items (2 pages of 5 + 1), /// will be returned. - async fn page(&self, limit: usize, offset: Option, page_index: Option) -> Vec { + + async fn page( + &self, + ctx: &Context<'_>, + #[graphql(desc = "Maximum number of items to return on this page.")] limit: usize, + #[graphql(desc = "Extra items to skip on top of `pageIndex` paging (default 0).")] + offset: Option, + #[graphql( + desc = "Zero-based page number; multiplies `limit` to determine where to start (default 0)." + )] + page_index: Option, + ) -> Result> { + check_page_limit(ctx, limit)?; let self_clone = self.clone(); - blocking_compute(move || { + Ok(blocking_compute(move || { let start = page_index.unwrap_or(0) * limit + offset.unwrap_or(0); self_clone .items @@ -68,7 +113,7 @@ where .cloned() .collect() }) - .await + .await) } /// Returns a count of collection objects. diff --git a/raphtory-graphql/src/model/graph/edge.rs b/raphtory-graphql/src/model/graph/edge.rs index db1d8b02f6..29fd37c6d4 100644 --- a/raphtory-graphql/src/model/graph/edge.rs +++ b/raphtory-graphql/src/model/graph/edge.rs @@ -4,6 +4,7 @@ use crate::{ filtering::{EdgeViewCollection, GqlEdgeFilter}, history::GqlHistory, node::GqlNode, + node_id::GqlNodeId, property::{GqlMetadata, GqlProperties}, timeindex::{GqlEventTime, GqlTimeInput}, windowset::GqlEdgeWindowSet, @@ -61,7 +62,11 @@ impl GqlEdge { /// Returns a view of Edge containing all layers in the list of names. /// /// Errors if any of the layers do not exist. - async fn layers(&self, names: Vec) -> GqlEdge { + + async fn layers( + &self, + #[graphql(desc = "Layer names to include.")] names: Vec, + ) -> GqlEdge { let self_clone = self.clone(); blocking_compute(move || self_clone.ee.valid_layers(names).into()).await } @@ -69,7 +74,11 @@ impl GqlEdge { /// Returns a view of Edge containing all layers except the excluded list of names. /// /// Errors if any of the layers do not exist. - async fn exclude_layers(&self, names: Vec) -> GqlEdge { + + async fn exclude_layers( + &self, + #[graphql(desc = "Layer names to exclude.")] names: Vec, + ) -> GqlEdge { let self_clone = self.clone(); blocking_compute(move || self_clone.ee.exclude_valid_layers(names).into()).await } @@ -77,14 +86,19 @@ impl GqlEdge { /// Returns a view of Edge containing the specified layer. /// /// Errors if any of the layers do not exist. - async fn layer(&self, name: String) -> GqlEdge { + + async fn layer(&self, #[graphql(desc = "Layer name to include.")] name: String) -> GqlEdge { self.ee.valid_layers(name).into() } /// Returns a view of Edge containing all layers except the excluded layer specified. /// /// Errors if any of the layers do not exist. - async fn exclude_layer(&self, name: String) -> GqlEdge { + + async fn exclude_layer( + &self, + #[graphql(desc = "Layer name to exclude.")] name: String, + ) -> GqlEdge { self.ee.exclude_valid_layers(name).into() } @@ -97,10 +111,20 @@ impl GqlEdge { /// e.g. "1 month and 1 day" will align at the start of the day. /// Note that passing a step larger than window while alignment_unit is not "Unaligned" may lead to some entries appearing before /// the start of the first window and/or after the end of the last window (i.e. not included in any window). + async fn rolling( &self, + #[graphql( + desc = "Width of each window. Pass either `{epoch: }` for a discrete number of milliseconds (e.g. `{epoch: 1000}` for 1 second), or `{duration: }` for a calendar duration (e.g. `{duration: 1 day}` or `{duration: 2 hours and 30 minutes}`)." + )] window: WindowDuration, + #[graphql( + desc = "Optional gap between the start of one window and the start of the next. Accepts the same `{epoch: }` or `{duration: }` values as `window`. Defaults to `window` — i.e. windows touch end-to-end with no overlap and no gap." + )] step: Option, + #[graphql( + desc = "Optional anchor for window boundaries — pass `Unaligned` to disable, or one of the unit values (e.g. `Day`, `Hour`, `Minute`) to align edges to that calendar unit. Defaults to the smallest unit present in `step` (or `window` if no step is set)." + )] alignment_unit: Option, ) -> Result { let window = window.try_into_interval()?; @@ -120,9 +144,16 @@ impl GqlEdge { /// alignment_unit optionally aligns the windows to the specified unit. "Unaligned" can be passed for no alignment. /// If unspecified (i.e. by default), alignment is done on the smallest unit of time in the step. /// e.g. "1 month and 1 day" will align at the start of the day. + async fn expanding( &self, + #[graphql( + desc = "How much the window grows by on each step. Pass either `{epoch: }` for a discrete number of milliseconds, or `{duration: }` for a calendar duration (e.g. `{duration: 1 day}`)." + )] step: WindowDuration, + #[graphql( + desc = "Optional anchor for window boundaries — pass `Unaligned` to disable, or one of the unit values (e.g. `Day`, `Hour`, `Minute`) to align edges to that calendar unit. Defaults to the smallest unit present in `step`." + )] alignment_unit: Option, ) -> Result { let step = step.try_into_interval()?; @@ -137,16 +168,28 @@ impl GqlEdge { /// Creates a view of the Edge including all events between the specified start (inclusive) and end (exclusive). /// /// For persistent graphs, any edge which exists at any point during the window will be included. You may want to restrict this to only edges that are present at the end of the window using the is_valid function. - async fn window(&self, start: GqlTimeInput, end: GqlTimeInput) -> GqlEdge { + + async fn window( + &self, + #[graphql(desc = "Inclusive lower bound.")] start: GqlTimeInput, + #[graphql(desc = "Exclusive upper bound.")] end: GqlTimeInput, + ) -> GqlEdge { self.ee.window(start.into_time(), end.into_time()).into() } /// Creates a view of the Edge including all events at a specified time. - async fn at(&self, time: GqlTimeInput) -> GqlEdge { + + async fn at( + &self, + #[graphql(desc = "Instant to pin the view to.")] time: GqlTimeInput, + ) -> GqlEdge { self.ee.at(time.into_time()).into() } - /// Returns a view of the edge at the latest time of the graph. + /// View of this edge pinned to the graph's latest time — equivalent to + /// `at(graph.latestTime)`. The edge's properties and metadata show their + /// most recent values, and (for persistent graphs) validity is evaluated + /// at that instant. async fn latest(&self) -> GqlEdge { self.ee.latest().into() } @@ -154,7 +197,11 @@ impl GqlEdge { /// Creates a view of the Edge including all events that are valid at time. /// /// This is equivalent to before(time + 1) for Graph and at(time) for PersistentGraph. - async fn snapshot_at(&self, time: GqlTimeInput) -> GqlEdge { + + async fn snapshot_at( + &self, + #[graphql(desc = "Instant at which entities must be valid.")] time: GqlTimeInput, + ) -> GqlEdge { self.ee.snapshot_at(time.into_time()).into() } @@ -166,34 +213,66 @@ impl GqlEdge { } /// Creates a view of the Edge including all events before a specified end (exclusive). - async fn before(&self, time: GqlTimeInput) -> GqlEdge { + + async fn before( + &self, + #[graphql(desc = "Exclusive upper bound.")] time: GqlTimeInput, + ) -> GqlEdge { self.ee.before(time.into_time()).into() } /// Creates a view of the Edge including all events after a specified start (exclusive). - async fn after(&self, time: GqlTimeInput) -> GqlEdge { + + async fn after( + &self, + #[graphql(desc = "Exclusive lower bound.")] time: GqlTimeInput, + ) -> GqlEdge { self.ee.after(time.into_time()).into() } /// Shrinks both the start and end of the window. - async fn shrink_window(&self, start: GqlTimeInput, end: GqlTimeInput) -> Self { + + async fn shrink_window( + &self, + #[graphql(desc = "Proposed new start (TimeInput); ignored if it would widen the window.")] + start: GqlTimeInput, + #[graphql(desc = "Proposed new end (TimeInput); ignored if it would widen the window.")] + end: GqlTimeInput, + ) -> Self { self.ee .shrink_window(start.into_time(), end.into_time()) .into() } /// Set the start of the window. - async fn shrink_start(&self, start: GqlTimeInput) -> Self { + + async fn shrink_start( + &self, + #[graphql(desc = "Proposed new start (TimeInput); ignored if it would widen the window.")] + start: GqlTimeInput, + ) -> Self { self.ee.shrink_start(start.into_time()).into() } /// Set the end of the window. - async fn shrink_end(&self, end: GqlTimeInput) -> Self { + + async fn shrink_end( + &self, + #[graphql(desc = "Proposed new end (TimeInput); ignored if it would widen the window.")] + end: GqlTimeInput, + ) -> Self { self.ee.shrink_end(end.into_time()).into() } /// Takes a specified selection of views and applies them in given order. - async fn apply_views(&self, views: Vec) -> Result { + + async fn apply_views( + &self, + #[graphql( + desc = "Ordered list of view operations; each entry is a one-of variant (`window`, `layer`, `filter`, ...) applied to the running result." + )] + views: Vec, + ) -> Result { let mut return_view: GqlEdge = self.ee.clone().into(); for view in views { return_view = match view { @@ -248,6 +327,10 @@ impl GqlEdge { self.ee.earliest_time().into() } + /// The timestamp of the first event in this edge's history (first update, first + /// deletion, or anything in between). Differs from `earliestTime` in that + /// `earliestTime` reports when the edge is first *valid*; `firstUpdate` reports + /// when its history actually begins. async fn first_update(&self) -> GqlEventTime { let self_clone = self.clone(); blocking_compute(move || self_clone.ee.history().earliest_time().into()).await @@ -258,6 +341,10 @@ impl GqlEdge { self.ee.latest_time().into() } + /// The timestamp of the last event in this edge's history (last update, last + /// deletion, or anything in between). Differs from `latestTime` in that + /// `latestTime` reports when the edge is last *valid*; `lastUpdate` reports + /// when its history actually ends. async fn last_update(&self) -> GqlEventTime { let self_clone = self.clone(); blocking_compute(move || self_clone.ee.history().latest_time().into()).await @@ -302,13 +389,12 @@ impl GqlEdge { self.ee.nbr().into() } - /// Returns the id of the edge. - /// - /// Returns: - /// list[str]: - async fn id(&self) -> Vec { - let (src_name, dst_name) = self.ee.id(); - vec![src_name.to_string(), dst_name.to_string()] + /// Returns the `[src, dst]` id pair of the edge. Each id is a `String` + /// for string-indexed graphs or a non-negative `Int` for integer-indexed + /// graphs. + async fn id(&self) -> Vec { + let (src_id, dst_id) = self.ee.id(); + vec![GqlNodeId(src_id), GqlNodeId(dst_id)] } /// Returns a view of the properties of the edge. @@ -393,7 +479,14 @@ impl GqlEdge { self.ee.is_self_loop() } - async fn filter(&self, expr: GqlEdgeFilter) -> Result { + /// Apply an edge filter in place, returning an edge view whose properties / + /// metadata / history are restricted to the matching subset. + + async fn filter( + &self, + #[graphql(desc = "Composite edge filter (by property, layer, src/dst, etc.).")] + expr: GqlEdgeFilter, + ) -> Result { let self_clone = self.clone(); blocking_compute(move || { let filter: CompositeEdgeFilter = expr.try_into()?; diff --git a/raphtory-graphql/src/model/graph/edges.rs b/raphtory-graphql/src/model/graph/edges.rs index f83d21c67d..084e2d37c3 100644 --- a/raphtory-graphql/src/model/graph/edges.rs +++ b/raphtory-graphql/src/model/graph/edges.rs @@ -1,6 +1,7 @@ use crate::{ model::{ graph::{ + collection::{check_list_allowed, check_page_limit}, edge::GqlEdge, filtering::EdgesViewCollection, timeindex::{GqlEventTime, GqlTimeInput}, @@ -11,6 +12,7 @@ use crate::{ }, rayon::blocking_compute, }; +use async_graphql::Context; use dynamic_graphql::{ResolvedObject, ResolvedObjectFields}; use itertools::Itertools; use raphtory::{ @@ -30,6 +32,9 @@ use raphtory::db::{ api::view::Filter, graph::views::filter::model::edge_filter::CompositeEdgeFilter, }; +/// A lazy collection of edges from a graph view. Supports the usual view +/// transforms (window, layer, filter, ...), plus edge-specific ones like +/// `explode` and `explodeLayers`, pagination, and sorting. #[derive(ResolvedObject, Clone)] #[graphql(name = "Edges")] pub(crate) struct GqlEdges { @@ -68,24 +73,37 @@ impl GqlEdges { } /// Returns a collection containing only edges belonging to the listed layers. - async fn layers(&self, names: Vec) -> Self { + + async fn layers( + &self, + #[graphql(desc = "Layer names to include.")] names: Vec, + ) -> Self { let self_clone = self.clone(); blocking_compute(move || self_clone.update(self_clone.ee.valid_layers(names))).await } /// Returns a collection containing edges belonging to all layers except the excluded list of layers. - async fn exclude_layers(&self, names: Vec) -> Self { + + async fn exclude_layers( + &self, + #[graphql(desc = "Layer names to exclude.")] names: Vec, + ) -> Self { let self_clone = self.clone(); blocking_compute(move || self_clone.update(self_clone.ee.exclude_valid_layers(names))).await } /// Returns a collection containing edges belonging to the specified layer. - async fn layer(&self, name: String) -> Self { + + async fn layer(&self, #[graphql(desc = "Layer name to include.")] name: String) -> Self { self.update(self.ee.valid_layers(name)) } /// Returns a collection containing edges belonging to all layers except the excluded layer specified. - async fn exclude_layer(&self, name: String) -> Self { + + async fn exclude_layer( + &self, + #[graphql(desc = "Layer name to exclude.")] name: String, + ) -> Self { self.update(self.ee.exclude_valid_layers(name)) } @@ -98,10 +116,20 @@ impl GqlEdges { /// e.g. "1 month and 1 day" will align at the start of the day. /// Note that passing a step larger than window while alignment_unit is not "Unaligned" may lead to some entries appearing before /// the start of the first window and/or after the end of the last window (i.e. not included in any window). + async fn rolling( &self, + #[graphql( + desc = "Width of each window. Pass either `{epoch: }` for a discrete number of milliseconds (e.g. `{epoch: 1000}` for 1 second), or `{duration: }` for a calendar duration (e.g. `{duration: 1 day}` or `{duration: 2 hours and 30 minutes}`)." + )] window: WindowDuration, + #[graphql( + desc = "Optional gap between the start of one window and the start of the next. Accepts the same `{epoch: }` or `{duration: }` values as `window`. Defaults to `window` — i.e. windows touch end-to-end with no overlap and no gap." + )] step: Option, + #[graphql( + desc = "Optional anchor for window boundaries — pass `Unaligned` to disable, or one of the unit values (e.g. `Day`, `Hour`, `Minute`) to align edges to that calendar unit. Defaults to the smallest unit present in `step` (or `window` if no step is set)." + )] alignment_unit: Option, ) -> Result { let window = window.try_into_interval()?; @@ -121,9 +149,16 @@ impl GqlEdges { /// alignment_unit optionally aligns the windows to the specified unit. "Unaligned" can be passed for no alignment. /// If unspecified (i.e. by default), alignment is done on the smallest unit of time in the step. /// e.g. "1 month and 1 day" will align at the start of the day. + async fn expanding( &self, + #[graphql( + desc = "How much the window grows by on each step. Pass either `{epoch: }` for a discrete number of milliseconds, or `{duration: }` for a calendar duration (e.g. `{duration: 1 day}`)." + )] step: WindowDuration, + #[graphql( + desc = "Optional anchor for window boundaries — pass `Unaligned` to disable, or one of the unit values (e.g. `Day`, `Hour`, `Minute`) to align edges to that calendar unit. Defaults to the smallest unit present in `step`." + )] alignment_unit: Option, ) -> Result { let step = step.try_into_interval()?; @@ -136,15 +171,25 @@ impl GqlEdges { } /// Creates a view of the Edge including all events between the specified start (inclusive) and end (exclusive). - async fn window(&self, start: GqlTimeInput, end: GqlTimeInput) -> Self { + + async fn window( + &self, + #[graphql(desc = "Inclusive lower bound.")] start: GqlTimeInput, + #[graphql(desc = "Exclusive upper bound.")] end: GqlTimeInput, + ) -> Self { self.update(self.ee.window(start.into_time(), end.into_time())) } /// Creates a view of the Edge including all events at a specified time. - async fn at(&self, time: GqlTimeInput) -> Self { + + async fn at( + &self, + #[graphql(desc = "Instant to pin the view to.")] time: GqlTimeInput, + ) -> Self { self.update(self.ee.at(time.into_time())) } + /// View showing only the latest state of each edge (equivalent to `at(latestTime)`). async fn latest(&self) -> Self { let e = self.ee.clone(); let latest = blocking_compute(move || e.latest()).await; @@ -152,7 +197,11 @@ impl GqlEdges { } /// Creates a view of the Edge including all events that are valid at time. This is equivalent to before(time + 1) for Graph and at(time) for PersistentGraph. - async fn snapshot_at(&self, time: GqlTimeInput) -> Self { + + async fn snapshot_at( + &self, + #[graphql(desc = "Instant at which entities must be valid.")] time: GqlTimeInput, + ) -> Self { self.update(self.ee.snapshot_at(time.into_time())) } @@ -162,32 +211,58 @@ impl GqlEdges { } /// Creates a view of the Edge including all events before a specified end (exclusive). - async fn before(&self, time: GqlTimeInput) -> Self { + + async fn before(&self, #[graphql(desc = "Exclusive upper bound.")] time: GqlTimeInput) -> Self { self.update(self.ee.before(time.into_time())) } /// Creates a view of the Edge including all events after a specified start (exclusive). - async fn after(&self, time: GqlTimeInput) -> Self { + + async fn after(&self, #[graphql(desc = "Exclusive lower bound.")] time: GqlTimeInput) -> Self { self.update(self.ee.after(time.into_time())) } /// Shrinks both the start and end of the window. - async fn shrink_window(&self, start: GqlTimeInput, end: GqlTimeInput) -> Self { + + async fn shrink_window( + &self, + #[graphql(desc = "Proposed new start (TimeInput); ignored if it would widen the window.")] + start: GqlTimeInput, + #[graphql(desc = "Proposed new end (TimeInput); ignored if it would widen the window.")] + end: GqlTimeInput, + ) -> Self { self.update(self.ee.shrink_window(start.into_time(), end.into_time())) } /// Set the start of the window. - async fn shrink_start(&self, start: GqlTimeInput) -> Self { + + async fn shrink_start( + &self, + #[graphql(desc = "Proposed new start (TimeInput); ignored if it would widen the window.")] + start: GqlTimeInput, + ) -> Self { self.update(self.ee.shrink_start(start.into_time())) } /// Set the end of the window. - async fn shrink_end(&self, end: GqlTimeInput) -> Self { + + async fn shrink_end( + &self, + #[graphql(desc = "Proposed new end (TimeInput); ignored if it would widen the window.")] + end: GqlTimeInput, + ) -> Self { self.update(self.ee.shrink_end(end.into_time())) } /// Takes a specified selection of views and applies them in order given. - async fn apply_views(&self, views: Vec) -> Result { + + async fn apply_views( + &self, + #[graphql( + desc = "Ordered list of view operations; each entry is a one-of variant (`window`, `layer`, `filter`, ...) applied to the running result." + )] + views: Vec, + ) -> Result { let mut return_view: GqlEdges = self.update(self.ee.clone()); for view in views { return_view = match view { @@ -236,7 +311,9 @@ impl GqlEdges { Ok(return_view) } - /// Returns an edge object for each update within the original edge. + /// Expand each edge into one edge per update: if `A->B` has three updates, it + /// becomes three `A->B` entries each at a distinct timestamp. Use this to + /// iterate per-event rather than per-edge. async fn explode(&self) -> Self { self.update(self.ee.explode()) } @@ -248,8 +325,16 @@ impl GqlEdges { self.update(self.ee.explode_layers()) } - /// Specify a sort order from: source, destination, property, time. You can also reverse the ordering. - async fn sorted(&self, sort_bys: Vec) -> Self { + /// Sort the edges. Multiple criteria are applied lexicographically (ties + /// on the first key break to the second, etc.). + + async fn sorted( + &self, + #[graphql( + desc = "Ordered list of sort keys. Each entry chooses exactly one of `src` / `dst` / `time` / `property`, with an optional `reverse: true` to flip order." + )] + sort_bys: Vec, + ) -> Self { let self_clone = self.clone(); blocking_compute(move || { let sorted: Arc<[_]> = self_clone @@ -342,28 +427,55 @@ impl GqlEdges { /// /// For example, if page(5, 2, 1) is called, a page with 5 items, offset by 11 items (2 pages of 5 + 1), /// will be returned. + async fn page( &self, - limit: usize, + ctx: &Context<'_>, + #[graphql(desc = "Maximum number of items to return on this page.")] limit: usize, + #[graphql(desc = "Extra items to skip on top of `pageIndex` paging (default 0).")] offset: Option, + #[graphql( + desc = "Zero-based page number; multiplies `limit` to determine where to start (default 0)." + )] page_index: Option, - ) -> Vec { + ) -> async_graphql::Result> { + check_page_limit(ctx, limit)?; let self_clone = self.clone(); - blocking_compute(move || { + Ok(blocking_compute(move || { let start = page_index.unwrap_or(0) * limit + offset.unwrap_or(0); self_clone.iter().skip(start).take(limit).collect() }) - .await + .await) } /// Returns a list of all objects in the current selection of the collection. You should filter the collection first then call list. - async fn list(&self) -> Vec { + async fn list(&self, ctx: &Context<'_>) -> async_graphql::Result> { + check_list_allowed(ctx)?; let self_clone = self.clone(); - blocking_compute(move || self_clone.iter().collect()).await + Ok(blocking_compute(move || self_clone.iter().collect()).await) } - /// Returns a filtered view that applies to list down the chain - async fn filter(&self, expr: GqlEdgeFilter) -> Result { + /// Narrow the collection to edges matching `expr`. The filter sticks to the + /// returned view — every subsequent traversal through these edges (their + /// properties, their endpoints' neighbours, etc.) continues to see the + /// filtered scope. + /// + /// Useful when you want one scoping rule to apply across the whole query. + /// E.g. restricting everything to a specific week: + /// + /// ```text + /// edges { filter(expr: {window: {start: 1234, end: 5678}}) { + /// list { src { neighbours { list { name } } } } # neighbours still windowed + /// } } + /// ``` + /// + /// Contrast with `select`, which applies here and is not carried through. + + async fn filter( + &self, + #[graphql(desc = "Composite edge filter (by property, layer, src/dst, etc.).")] + expr: GqlEdgeFilter, + ) -> Result { let self_clone = self.clone(); blocking_compute(move || { let filter: CompositeEdgeFilter = expr.try_into()?; @@ -373,8 +485,30 @@ impl GqlEdges { .await } - /// Returns filtered list of edges - async fn select(&self, expr: GqlEdgeFilter) -> Result { + /// Narrow the collection to edges matching `expr`, but only at this step — + /// subsequent traversals out of these edges see the unfiltered graph again. + /// + /// Useful when you want different scopes at different hops. E.g. Monday's + /// edges, then the neighbours of their endpoints on Tuesday, then *those* + /// neighbours on Wednesday: + /// + /// ```text + /// edges { select(expr: {window: {...monday...}}) { + /// list { src { select(expr: {window: {...tuesday...}}) { + /// neighbours { select(expr: {window: {...wednesday...}}) { + /// neighbours { list { name } } + /// } } + /// } } } + /// } } + /// ``` + /// + /// Contrast with `filter`, which persists the scope through subsequent ops. + + async fn select( + &self, + #[graphql(desc = "Composite edge filter (by property, layer, src/dst, etc.).")] + expr: GqlEdgeFilter, + ) -> Result { let self_clone = self.clone(); blocking_compute(move || { let filter: CompositeEdgeFilter = expr.try_into()?; diff --git a/raphtory-graphql/src/model/graph/filtering.rs b/raphtory-graphql/src/model/graph/filtering.rs index fdbfd8187e..0309e7dc6f 100644 --- a/raphtory-graphql/src/model/graph/filtering.rs +++ b/raphtory-graphql/src/model/graph/filtering.rs @@ -1,4 +1,4 @@ -use crate::model::graph::{property::Value, timeindex::GqlTimeInput}; +use crate::model::graph::{node_id::GqlNodeId, property::Value, timeindex::GqlTimeInput}; use async_graphql::dynamic::ValueAccessor; use dynamic_graphql::{ internal::{ @@ -30,7 +30,9 @@ use raphtory::{ use raphtory_api::core::{ entities::{properties::prop::Prop, Layer, GID}, storage::timeindex::{AsTime, EventTime}, + utils::time::IntoTime, }; +use serde::{Deserialize, Serialize}; use std::{ borrow::Cow, collections::HashSet, @@ -40,7 +42,7 @@ use std::{ sync::Arc, }; -#[derive(InputObject, Clone, Debug)] +#[derive(InputObject, Clone, Debug, Serialize, Deserialize)] pub struct Window { /// Window start time. pub start: GqlTimeInput, @@ -59,11 +61,11 @@ pub enum GraphViewCollection { /// Single excluded layer. ExcludeLayer(String), /// Subgraph nodes. - Subgraph(Vec), + Subgraph(Vec), /// Subgraph node types. SubgraphNodeTypes(Vec), /// List of excluded nodes. - ExcludeNodes(Vec), + ExcludeNodes(Vec), /// Valid state. Valid(bool), /// Window between a start and end time. @@ -260,7 +262,8 @@ pub enum PathFromNodeViewCollection { ShrinkEnd(GqlTimeInput), } -#[derive(Enum, Copy, Clone, Debug)] +#[derive(Enum, Copy, Clone, Debug, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] pub enum NodeField { /// Node ID field. /// @@ -303,7 +306,7 @@ impl Display for NodeField { /// ```graphql /// { Property: { name: "weight", where: { Gt: 0.5 } } } /// ``` -#[derive(InputObject, Clone, Debug)] +#[derive(InputObject, Clone, Debug, Serialize, Deserialize)] pub struct PropertyFilterNew { /// Property (or metadata) key. pub name: String, @@ -311,6 +314,7 @@ pub struct PropertyFilterNew { /// /// Exposed as `where` in GraphQL. #[graphql(name = "where")] + #[serde(rename = "where")] pub where_: PropCondition, } @@ -331,7 +335,8 @@ pub struct PropertyFilterNew { /// - `Value` is interpreted according to the property’s type. /// - Aggregators/qualifiers like `Sum` and `Len` apply when the underlying /// property is list-like or aggregatable (depending on your engine rules). -#[derive(OneOfInput, Clone, Debug)] +#[derive(OneOfInput, Clone, Debug, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] pub enum PropCondition { /// Equality: property value equals the given value. Eq(Value), @@ -448,7 +453,7 @@ impl PropCondition { /// ```graphql /// { Window: { start: 0, end: 10, expr: { Layers: { names: ["A"] } } } } /// ``` -#[derive(InputObject, Clone, Debug)] +#[derive(InputObject, Clone, Debug, Serialize, Deserialize)] pub struct GraphWindowExpr { /// Window start time (inclusive). pub start: GqlTimeInput, @@ -464,7 +469,7 @@ pub struct GraphWindowExpr { /// /// Example: /// `{ At: { time: 5, expr: { Layers: { names: ["L1"] } } } }` -#[derive(InputObject, Clone, Debug)] +#[derive(InputObject, Clone, Debug, Serialize, Deserialize)] pub struct GraphTimeExpr { /// Reference time for the operation. pub time: GqlTimeInput, @@ -475,7 +480,7 @@ pub struct GraphTimeExpr { /// Graph view restriction that takes only a nested expression. /// /// Used for unary view operations like `Latest` and `SnapshotLatest`. -#[derive(InputObject, Clone, Debug)] +#[derive(InputObject, Clone, Debug, Serialize, Deserialize)] pub struct GraphUnaryExpr { /// Optional nested filter applied after the unary operation. pub expr: Option>, @@ -484,7 +489,7 @@ pub struct GraphUnaryExpr { /// Graph view restriction by layer membership, optionally chaining another `GraphFilter`. /// /// Used by `GqlGraphFilter::Layers`. -#[derive(InputObject, Clone, Debug)] +#[derive(InputObject, Clone, Debug, Serialize, Deserialize)] pub struct GraphLayersExpr { /// Layer names to include. pub names: Vec, @@ -504,8 +509,9 @@ pub struct GraphLayersExpr { /// /// These filters can be nested via the `expr` field on the corresponding /// `*Expr` input objects to form pipelines. -#[derive(OneOfInput, Clone, Debug)] +#[derive(OneOfInput, Clone, Debug, Serialize, Deserialize)] #[graphql(name = "GraphFilter")] +#[serde(rename_all = "camelCase")] pub enum GqlGraphFilter { /// Restrict evaluation to a time window (inclusive start, exclusive end). Window(GraphWindowExpr), @@ -534,7 +540,8 @@ pub enum GqlGraphFilter { /// /// Supports comparisons, string predicates, and set membership. /// (Presence checks and aggregations are handled via property filters instead.) -#[derive(OneOfInput, Clone, Debug)] +#[derive(OneOfInput, Clone, Debug, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] pub enum NodeFieldCondition { /// Equality. Eq(Value), @@ -590,7 +597,7 @@ impl NodeFieldCondition { /// ```graphql /// { Node: { field: NodeName, where: { Contains: "ali" } } } /// ``` -#[derive(InputObject, Clone, Debug)] +#[derive(InputObject, Clone, Debug, Serialize, Deserialize)] pub struct NodeFieldFilterNew { /// Which built-in field to filter. pub field: NodeField, @@ -598,6 +605,7 @@ pub struct NodeFieldFilterNew { /// /// Exposed as `where` in GraphQL. #[graphql(name = "where")] + #[serde(rename = "where")] pub where_: NodeFieldCondition, } @@ -606,7 +614,7 @@ pub struct NodeFieldFilterNew { /// Used by `GqlNodeFilter::Window`. /// /// The window is inclusive of `start` and exclusive of `end`. -#[derive(InputObject, Clone, Debug)] +#[derive(InputObject, Clone, Debug, Serialize, Deserialize)] pub struct NodeWindowExpr { /// Window start time (inclusive). pub start: GqlTimeInput, @@ -619,7 +627,7 @@ pub struct NodeWindowExpr { /// Restricts node evaluation to a single time bound and applies a nested `NodeFilter`. /// /// Used by `At`, `Before`, and `After` node filters. -#[derive(InputObject, Clone, Debug)] +#[derive(InputObject, Clone, Debug, Serialize, Deserialize)] pub struct NodeTimeExpr { /// Reference time for the operation. pub time: GqlTimeInput, @@ -630,7 +638,7 @@ pub struct NodeTimeExpr { /// Applies a unary node-view operation and then evaluates a nested `NodeFilter`. /// /// Used by `Latest` and `SnapshotLatest` node filters. -#[derive(InputObject, Clone, Debug)] +#[derive(InputObject, Clone, Debug, Serialize, Deserialize)] pub struct NodeUnaryExpr { /// Filter evaluated after applying the unary operation. pub expr: Wrapped, @@ -639,7 +647,7 @@ pub struct NodeUnaryExpr { /// Restricts node evaluation to one or more layers and applies a nested `NodeFilter`. /// /// Used by `GqlNodeFilter::Layers`. -#[derive(InputObject, Clone, Debug)] +#[derive(InputObject, Clone, Debug, Serialize, Deserialize)] pub struct NodeLayersExpr { /// Layer names to include. pub names: Vec, @@ -661,8 +669,9 @@ pub struct NodeLayersExpr { /// /// Filters can be combined recursively using logical operators /// (`And`, `Or`, `Not`). -#[derive(OneOfInput, Clone, Debug)] +#[derive(OneOfInput, Clone, Debug, Serialize, Deserialize)] #[graphql(name = "NodeFilter")] +#[serde(rename_all = "camelCase")] pub enum GqlNodeFilter { /// Filters a built-in node field (ID, name, or type). Node(NodeFieldFilterNew), @@ -718,7 +727,7 @@ pub enum GqlNodeFilter { /// Used by `GqlEdgeFilter::Window`. /// /// The window is inclusive of `start` and exclusive of `end`. -#[derive(InputObject, Clone, Debug)] +#[derive(InputObject, Clone, Debug, Serialize, Deserialize)] pub struct EdgeWindowExpr { /// Window start time (inclusive). pub start: GqlTimeInput, @@ -731,7 +740,7 @@ pub struct EdgeWindowExpr { /// Restricts edge evaluation to a single time bound and applies a nested `EdgeFilter`. /// /// Used by `At`, `Before`, and `After` edge filters. -#[derive(InputObject, Clone, Debug)] +#[derive(InputObject, Clone, Debug, Serialize, Deserialize)] pub struct EdgeTimeExpr { /// Reference time for the operation. pub time: GqlTimeInput, @@ -742,7 +751,7 @@ pub struct EdgeTimeExpr { /// Applies a unary edge-view operation and then evaluates a nested `EdgeFilter`. /// /// Used by `Latest` and `SnapshotLatest` edge filters. -#[derive(InputObject, Clone, Debug)] +#[derive(InputObject, Clone, Debug, Serialize, Deserialize)] pub struct EdgeUnaryExpr { /// Filter evaluated after applying the unary operation. pub expr: Wrapped, @@ -751,7 +760,7 @@ pub struct EdgeUnaryExpr { /// Restricts edge evaluation to one or more layers and applies a nested `EdgeFilter`. /// /// Used by `GqlEdgeFilter::Layers`. -#[derive(InputObject, Clone, Debug)] +#[derive(InputObject, Clone, Debug, Serialize, Deserialize)] pub struct EdgeLayersExpr { /// Layer names to include. pub names: Vec, @@ -787,8 +796,9 @@ pub struct EdgeLayersExpr { /// } /// } /// ``` -#[derive(OneOfInput, Clone, Debug)] +#[derive(OneOfInput, Clone, Debug, Serialize, Deserialize)] #[graphql(name = "EdgeFilter")] +#[serde(rename_all = "camelCase")] pub enum GqlEdgeFilter { /// Applies a filter to the **source node** of the edge. /// @@ -903,7 +913,8 @@ pub enum GqlEdgeFilter { IsSelfLoop(bool), } -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Serialize, Deserialize)] +#[serde(transparent)] pub struct Wrapped(Box); impl Deref for Wrapped { type Target = T; @@ -1416,13 +1427,15 @@ impl TryFrom for CompositeNodeFilter { GqlNodeFilter::Window(w) => { let inner: CompositeNodeFilter = w.expr.deref().clone().try_into()?; Ok(CompositeNodeFilter::Windowed(Box::new(Windowed::new( - w.start.0, w.end.0, inner, + w.start.into_time(), + w.end.into_time(), + inner, )))) } GqlNodeFilter::At(t) => { let inner: CompositeNodeFilter = t.expr.deref().clone().try_into()?; - let et: EventTime = t.time.0; + let et = t.time.into_time(); Ok(CompositeNodeFilter::Windowed(Box::new(Windowed::new( et, EventTime::end(et.t().saturating_add(1)), @@ -1434,14 +1447,14 @@ impl TryFrom for CompositeNodeFilter { let inner: CompositeNodeFilter = t.expr.deref().clone().try_into()?; Ok(CompositeNodeFilter::Windowed(Box::new(Windowed::new( EventTime::start(i64::MIN), - EventTime::end(t.time.0.t()), + EventTime::end(t.time.t()), inner, )))) } GqlNodeFilter::After(t) => { let inner: CompositeNodeFilter = t.expr.deref().clone().try_into()?; - let start = EventTime::start(t.time.0.t().saturating_add(1)); + let start = EventTime::start(t.time.t().saturating_add(1)); Ok(CompositeNodeFilter::Windowed(Box::new(Windowed::new( start, EventTime::end(i64::MAX), @@ -1459,7 +1472,7 @@ impl TryFrom for CompositeNodeFilter { GqlNodeFilter::SnapshotAt(t) => { let inner: CompositeNodeFilter = t.expr.deref().clone().try_into()?; Ok(CompositeNodeFilter::SnapshotAt(Box::new( - SnapshotAtWrap::new(t.time.0, inner), + SnapshotAtWrap::new(t.time.into_time(), inner), ))) } @@ -1581,13 +1594,15 @@ impl TryFrom for CompositeEdgeFilter { GqlEdgeFilter::Window(w) => { let inner: CompositeEdgeFilter = w.expr.deref().clone().try_into()?; Ok(CompositeEdgeFilter::Windowed(Box::new(Windowed::new( - w.start.0, w.end.0, inner, + w.start.into_time(), + w.end.into_time(), + inner, )))) } GqlEdgeFilter::At(t) => { let inner: CompositeEdgeFilter = t.expr.deref().clone().try_into()?; - let et: EventTime = t.time.0; + let et = t.time.into_time(); Ok(CompositeEdgeFilter::Windowed(Box::new(Windowed::new( et, EventTime::end(et.t().saturating_add(1)), @@ -1599,14 +1614,14 @@ impl TryFrom for CompositeEdgeFilter { let inner: CompositeEdgeFilter = t.expr.deref().clone().try_into()?; Ok(CompositeEdgeFilter::Windowed(Box::new(Windowed::new( EventTime::start(i64::MIN), - EventTime::end(t.time.0.t()), + EventTime::end(t.time.t()), inner, )))) } GqlEdgeFilter::After(t) => { let inner: CompositeEdgeFilter = t.expr.deref().clone().try_into()?; - let start = EventTime::start(t.time.0.t().saturating_add(1)); + let start = EventTime::start(t.time.t().saturating_add(1)); Ok(CompositeEdgeFilter::Windowed(Box::new(Windowed::new( start, EventTime::end(i64::MAX), @@ -1624,7 +1639,7 @@ impl TryFrom for CompositeEdgeFilter { GqlEdgeFilter::SnapshotAt(t) => { let inner: CompositeEdgeFilter = t.expr.deref().clone().try_into()?; Ok(CompositeEdgeFilter::SnapshotAt(Box::new( - SnapshotAtWrap::new(t.time.0, inner), + SnapshotAtWrap::new(t.time.into_time(), inner), ))) } @@ -1734,3 +1749,23 @@ impl TryFrom for DynView { }) } } + +/// Combined filter input covering all three filter levels (node, edge, graph-level). +/// Used by `grantGraphFilteredReadOnly` to express a data-access restriction +/// that is transparently applied whenever the role queries the graph. +/// Use `and` / `or` to compose multiple sub-filters. +#[derive(OneOfInput, Clone, Debug, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub enum GraphAccessFilter { + /// Filter by node properties, fields, or temporal state. + Node(GqlNodeFilter), + /// Filter by edge properties, source/destination, or temporal state. + Edge(GqlEdgeFilter), + /// Apply a graph-level view (window, snapshot, layer restriction, …). + Graph(GqlGraphFilter), + /// All sub-filters must pass (intersection). + And(Vec), + /// At least one sub-filter must pass (union within each filter type; + /// cross-type sub-filters are applied as independent restrictions). + Or(Vec), +} diff --git a/raphtory-graphql/src/model/graph/graph.rs b/raphtory-graphql/src/model/graph/graph.rs index 13bfccc535..3a76bece60 100644 --- a/raphtory-graphql/src/model/graph/graph.rs +++ b/raphtory-graphql/src/model/graph/graph.rs @@ -1,5 +1,6 @@ use crate::{ data::Data, + graph::GraphWithVectors, model::{ graph::{ edge::GqlEdge, @@ -7,6 +8,7 @@ use crate::{ filtering::{GqlEdgeFilter, GqlGraphFilter, GqlNodeFilter, GraphViewCollection}, index::GqlIndexSpec, node::GqlNode, + node_id::GqlNodeId, nodes::GqlNodes, property::{GqlMetadata, GqlProperties}, timeindex::{GqlEventTime, GqlTimeInput}, @@ -16,12 +18,15 @@ use crate::{ plugins::graph_algorithm_plugin::GraphAlgorithmPlugin, schema::graph_schema::GraphSchema, }, - paths::ExistingGraphFolder, + paths::{ExistingGraphFolder, PathValidationError, ValidGraphPaths}, rayon::blocking_compute, + GQLError, }; use async_graphql::Context; -use dynamic_graphql::{ResolvedObject, ResolvedObjectFields}; +use dynamic_graphql::{ResolvedObject, ResolvedObjectFields, Result}; use itertools::Itertools; +#[cfg(feature = "search")] +use raphtory::db::api::view::SearchableGraphOps; use raphtory::{ core::{ entities::nodes::node_ref::{AsNodeRef, NodeRef}, @@ -32,7 +37,7 @@ use raphtory::{ properties::dyn_props::DynProperties, view::{ filter_ops::NodeSelect, DynamicGraph, EdgeSelect, Filter, IntoDynamic, NodeViewOps, - SearchableGraphOps, StaticGraphViewOps, TimeOps, + StaticGraphViewOps, TimeOps, }, }, graph::{ @@ -43,7 +48,7 @@ use raphtory::{ }, }, }, - errors::{GraphError, InvalidPathReason}, + errors::GraphError, prelude::*, }; use raphtory_api::core::{storage::timeindex::AsTime, utils::time::IntoTime}; @@ -53,6 +58,9 @@ use std::{ sync::Arc, }; +/// A view of a Raphtory graph. Every field here returns either data from the +/// view or a derived view (`window`, `layer`, `at`, `filter`, ...) that you can +/// keep chaining. Views are cheap — they don't copy the underlying data. #[derive(ResolvedObject, Clone)] #[graphql(name = "Graph")] pub(crate) struct GqlGraph { @@ -60,6 +68,12 @@ pub(crate) struct GqlGraph { graph: DynamicGraph, } +impl From for GqlGraph { + fn from(value: GraphWithVectors) -> Self { + GqlGraph::new(value.folder, value.graph) + } +} + impl GqlGraph { pub fn new(path: ExistingGraphFolder, graph: G) -> Self { Self { @@ -87,58 +101,101 @@ impl GqlGraph { //////////////////////// /// Returns the names of all layers in the graphview. + /// Distinct layer names observed in the current view — any layer that has at + /// least one edge event visible here. Excludes layers that exist elsewhere in + /// the graph but whose edges have been filtered out. async fn unique_layers(&self) -> Vec { let self_clone = self.clone(); blocking_compute(move || self_clone.graph.unique_layers().map_into().collect()).await } - /// Returns a view containing only the default layer. + /// View restricted to the default layer — where nodes and edges end up + /// when `addNode` / `addEdge` is called without a `layer` argument. + /// Useful for separating "unlayered" base-graph events from named-layer + /// ones. async fn default_layer(&self) -> GqlGraph { self.apply(|g| g.default_layer()) } - /// Returns a view containing all the specified layers. - async fn layers(&self, names: Vec) -> GqlGraph { + /// View restricted to the named layers. Updates on any other layer are hidden; + /// if that leaves a node or edge with no updates left, it disappears from the + /// view. + + async fn layers( + &self, + #[graphql(desc = "Layer names to include.")] names: Vec, + ) -> GqlGraph { let self_clone = self.clone(); blocking_compute(move || self_clone.apply(|g| g.valid_layers(names.clone()))).await } - /// Returns a view containing all layers except the specified excluded layers. - async fn exclude_layers(&self, names: Vec) -> GqlGraph { + /// View with the named layers hidden. Updates on those layers are removed; if + /// that leaves a node or edge with no updates left, it disappears from the + /// view. + + async fn exclude_layers( + &self, + #[graphql(desc = "Layer names to exclude.")] names: Vec, + ) -> GqlGraph { let self_clone = self.clone(); blocking_compute(move || self_clone.apply(|g| g.exclude_valid_layers(names.clone()))).await } - /// Returns a view containing the layer specified. - async fn layer(&self, name: String) -> GqlGraph { + /// View restricted to a single layer. Convenience form of + /// `layers(names: [name])` — updates on any other layer are hidden, and + /// entities with nothing left disappear. + + async fn layer(&self, #[graphql(desc = "Layer name to include.")] name: String) -> GqlGraph { self.apply(|g| g.valid_layers(name.clone())) } - /// Returns a view containing all layers except the specified excluded layer. - async fn exclude_layer(&self, name: String) -> GqlGraph { + /// View with one layer hidden. Convenience form of + /// `excludeLayers(names: [name])` — updates on that layer are removed, and + /// entities with nothing left disappear. + + async fn exclude_layer( + &self, + #[graphql(desc = "Layer name to exclude.")] name: String, + ) -> GqlGraph { self.apply(|g| g.exclude_valid_layers(name.clone())) } - /// Returns a subgraph of a specified set of nodes which contains only the edges that connect nodes of the subgraph to each other. - async fn subgraph(&self, nodes: Vec) -> GqlGraph { + /// View restricted to a chosen set of nodes and the edges between them. Edges + /// connecting a selected node to a non-selected node are hidden. + + async fn subgraph( + &self, + #[graphql(desc = "Node ids to keep.")] nodes: Vec, + ) -> GqlGraph { let self_clone = self.clone(); blocking_compute(move || self_clone.apply(|g| g.subgraph(nodes.clone()))).await } - /// Returns a view of the graph that only includes valid edges. + /// View containing only valid edges — for persistent graphs this drops edges + /// whose most recent event is a deletion at the latest time of the current + /// view (a later re-addition would keep them). On event graphs this is a + /// no-op. async fn valid(&self) -> GqlGraph { self.apply(|g| g.valid()) } - /// Returns a subgraph filtered by the specified node types. - async fn subgraph_node_types(&self, node_types: Vec) -> GqlGraph { + /// View restricted to nodes with the given node types. + + async fn subgraph_node_types( + &self, + #[graphql(desc = "Node types to include.")] node_types: Vec, + ) -> GqlGraph { let self_clone = self.clone(); blocking_compute(move || self_clone.apply(|g| g.subgraph_node_types(node_types.clone()))) .await } - /// Returns a subgraph containing all nodes except the specified excluded nodes. - async fn exclude_nodes(&self, nodes: Vec) -> GqlGraph { + /// View with a set of nodes removed (along with any edges touching them). + + async fn exclude_nodes( + &self, + #[graphql(desc = "Node ids to exclude.")] nodes: Vec, + ) -> GqlGraph { let self_clone = self.clone(); blocking_compute(move || { let nodes: Vec = nodes.iter().map(|v| v.as_node_ref()).collect(); @@ -156,10 +213,20 @@ impl GqlGraph { /// e.g. "1 month and 1 day" will align at the start of the day. /// Note that passing a step larger than window while alignment_unit is not "Unaligned" may lead to some entries appearing before /// the start of the first window and/or after the end of the last window (i.e. not included in any window). + async fn rolling( &self, + #[graphql( + desc = "Width of each window. Pass either `{epoch: }` for a discrete number of milliseconds (e.g. `{epoch: 1000}` for 1 second), or `{duration: }` for a calendar duration (e.g. `{duration: 1 day}` or `{duration: 2 hours and 30 minutes}`)." + )] window: WindowDuration, + #[graphql( + desc = "Optional gap between the start of one window and the start of the next. Accepts the same `{epoch: }` or `{duration: }` values as `window`. Defaults to `window` — i.e. windows touch end-to-end with no overlap and no gap." + )] step: Option, + #[graphql( + desc = "Optional anchor for window boundaries — pass `Unaligned` to disable, or one of the unit values (e.g. `Day`, `Hour`, `Minute`) to align edges to that calendar unit. Defaults to the smallest unit present in `step` (or `window` if no step is set)." + )] alignment_unit: Option, ) -> Result { let window = window.try_into_interval()?; @@ -179,9 +246,16 @@ impl GqlGraph { /// alignment_unit optionally aligns the windows to the specified unit. "Unaligned" can be passed for no alignment. /// If unspecified (i.e. by default), alignment is done on the smallest unit of time in the step. /// e.g. "1 month and 1 day" will align at the start of the day. + async fn expanding( &self, + #[graphql( + desc = "How much the window grows by on each step. Pass either `{epoch: }` for a discrete number of milliseconds, or `{duration: }` for a calendar duration (e.g. `{duration: 1 day}`)." + )] step: WindowDuration, + #[graphql( + desc = "Optional anchor for window boundaries — pass `Unaligned` to disable, or one of the unit values (e.g. `Day`, `Hour`, `Minute`) to align edges to that calendar unit. Defaults to the smallest unit present in `step`." + )] alignment_unit: Option, ) -> Result { let step = step.try_into_interval()?; @@ -194,14 +268,23 @@ impl GqlGraph { } /// Return a graph containing only the activity between start and end, by default raphtory stores times in milliseconds from the unix epoch. - async fn window(&self, start: GqlTimeInput, end: GqlTimeInput) -> GqlGraph { + + async fn window( + &self, + #[graphql(desc = "Inclusive lower bound.")] start: GqlTimeInput, + #[graphql(desc = "Exclusive upper bound.")] end: GqlTimeInput, + ) -> GqlGraph { let start = start.into_time(); let end = end.into_time(); self.apply(|g| g.window(start, end)) } /// Creates a view including all events at a specified time. - async fn at(&self, time: GqlTimeInput) -> GqlGraph { + + async fn at( + &self, + #[graphql(desc = "Instant to pin the view to.")] time: GqlTimeInput, + ) -> GqlGraph { let time = time.into_time(); self.apply(|g| g.at(time)) } @@ -213,7 +296,11 @@ impl GqlGraph { } /// Create a view including all events that are valid at the specified time. - async fn snapshot_at(&self, time: GqlTimeInput) -> GqlGraph { + + async fn snapshot_at( + &self, + #[graphql(desc = "Instant at which entities must be valid.")] time: GqlTimeInput, + ) -> GqlGraph { let time = time.into_time(); self.apply(|g| g.snapshot_at(time)) } @@ -224,32 +311,62 @@ impl GqlGraph { } /// Create a view including all events before a specified end (exclusive). - async fn before(&self, time: GqlTimeInput) -> GqlGraph { + + async fn before( + &self, + #[graphql(desc = "Exclusive upper bound.")] time: GqlTimeInput, + ) -> GqlGraph { let time = time.into_time(); self.apply(|g| g.before(time)) } /// Create a view including all events after a specified start (exclusive). - async fn after(&self, time: GqlTimeInput) -> GqlGraph { + + async fn after( + &self, + #[graphql(desc = "Exclusive lower bound.")] time: GqlTimeInput, + ) -> GqlGraph { let time = time.into_time(); self.apply(|g| g.after(time)) } - /// Shrink both the start and end of the window. - async fn shrink_window(&self, start: GqlTimeInput, end: GqlTimeInput) -> Self { + /// Shrink both the start and end of the window. The new bounds are taken as the + /// intersection with the current window; this never widens the view. + + async fn shrink_window( + &self, + #[graphql(desc = "Proposed new start (TimeInput); ignored if before the current start.")] + start: GqlTimeInput, + #[graphql(desc = "Proposed new end (TimeInput); ignored if after the current end.")] + end: GqlTimeInput, + ) -> Self { let start = start.into_time(); let end = end.into_time(); self.apply(|g| g.shrink_window(start, end)) } /// Set the start of the window to the larger of the specified value or current start. - async fn shrink_start(&self, start: GqlTimeInput) -> Self { + + async fn shrink_start( + &self, + #[graphql( + desc = "Proposed new start (TimeInput); has no effect if it would widen the window." + )] + start: GqlTimeInput, + ) -> Self { let start = start.into_time(); self.apply(|g| g.shrink_start(start)) } /// Set the end of the window to the smaller of the specified value or current end. - async fn shrink_end(&self, end: GqlTimeInput) -> Self { + + async fn shrink_end( + &self, + #[graphql( + desc = "Proposed new end (TimeInput); has no effect if it would widen the window." + )] + end: GqlTimeInput, + ) -> Self { let end = end.into_time(); self.apply(|g| g.shrink_end(end)) } @@ -258,78 +375,93 @@ impl GqlGraph { //// TIME QUERIES ////// //////////////////////// - /// Returns the timestamp for the creation of the graph. - async fn created(&self) -> Result { - self.path.created_async().await + /// Filesystem creation timestamp (epoch millis) of the graph's on-disk folder + /// — i.e. when this graph was first saved to the server, not when its earliest + /// event occurred. Use `earliestTime` for the latter. + async fn created(&self) -> Result { + Ok(self.path.created_async().await?) } /// Returns the graph's last opened timestamp according to system time. - async fn last_opened(&self) -> Result { - self.path.last_opened_async().await + async fn last_opened(&self) -> Result { + Ok(self.path.last_opened_async().await?) } /// Returns the graph's last updated timestamp. - async fn last_updated(&self) -> Result { - self.path.last_updated_async().await + async fn last_updated(&self) -> Result { + Ok(self.path.last_updated_async().await?) } /// Returns the time entry of the earliest activity in the graph. - async fn earliest_time(&self) -> GqlEventTime { + async fn earliest_time(&self) -> Result { let self_clone = self.clone(); - blocking_compute(move || self_clone.graph.earliest_time().into()).await + Ok(blocking_compute(move || self_clone.graph.earliest_time().into()).await) } /// Returns the time entry of the latest activity in the graph. - async fn latest_time(&self) -> GqlEventTime { + async fn latest_time(&self) -> Result { let self_clone = self.clone(); - blocking_compute(move || self_clone.graph.latest_time().into()).await + Ok(blocking_compute(move || self_clone.graph.latest_time().into()).await) } /// Returns the start time of the window. Errors if there is no window. - async fn start(&self) -> GqlEventTime { - self.graph.start().into() + async fn start(&self) -> Result { + Ok(self.graph.start().into()) } /// Returns the end time of the window. Errors if there is no window. - async fn end(&self) -> GqlEventTime { - self.graph.end().into() + async fn end(&self) -> Result { + Ok(self.graph.end().into()) } - /// Returns the earliest time that any edge in this graph is valid. - async fn earliest_edge_time(&self, include_negative: Option) -> GqlEventTime { + /// The earliest time at which any edge in this graph is valid. + /// + /// * `includeNegative` — if false, edge events with a timestamp `< 0` are + /// skipped when computing the minimum. Defaults to true. + async fn earliest_edge_time( + &self, + #[graphql( + desc = "If false, edge events with a timestamp `< 0` are skipped when computing the minimum. Defaults to true." + )] + include_negative: Option, + ) -> Result { let self_clone = self.clone(); - blocking_compute(move || { + Ok(blocking_compute(move || { let include_negative = include_negative.unwrap_or(true); - let all_edges = self_clone + self_clone .graph .edges() .earliest_time() .into_iter() .filter_map(|edge_time| edge_time.filter(|&time| include_negative || time.t() >= 0)) .min() - .into(); - all_edges + .into() }) - .await + .await) } - /// Returns the latest time that any edge in this graph is valid. - async fn latest_edge_time(&self, include_negative: Option) -> GqlEventTime { + /// The latest time at which any edge in this graph is valid. + + async fn latest_edge_time( + &self, + #[graphql( + desc = "If false, edge events with a timestamp `< 0` are skipped when computing the maximum. Defaults to true." + )] + include_negative: Option, + ) -> Result { let self_clone = self.clone(); - blocking_compute(move || { + Ok(blocking_compute(move || { let include_negative = include_negative.unwrap_or(true); - let all_edges = self_clone + self_clone .graph .edges() .latest_time() .into_iter() .filter_map(|edge_time| edge_time.filter(|&time| include_negative || time.t() >= 0)) .max() - .into(); - - all_edges + .into() }) - .await + .await) } //////////////////////// @@ -363,34 +495,57 @@ impl GqlGraph { //// EXISTS CHECKERS /// //////////////////////// - /// Returns true if the graph contains the specified node. - async fn has_node(&self, name: String) -> bool { - self.graph.has_node(name) + /// Returns true if a node with the given id exists in this view. + + async fn has_node( + &self, + #[graphql(desc = "Node id to look up.")] name: GqlNodeId, + ) -> Result { + Ok(self.graph.has_node(name)) } - /// Returns true if the graph contains the specified edge. Edges are specified by providing a source and destination node id. You can restrict the search to a specified layer. - async fn has_edge(&self, src: String, dst: String, layer: Option) -> bool { - match layer { + /// Returns true if an edge exists between `src` and `dst` in this view, optionally + /// restricted to a single layer. + + async fn has_edge( + &self, + #[graphql(desc = "Source node id.")] src: GqlNodeId, + #[graphql(desc = "Destination node id.")] dst: GqlNodeId, + #[graphql( + desc = "Optional; if provided, only checks whether the edge exists on this layer. If null or omitted, any layer counts." + )] + layer: Option, + ) -> Result { + Ok(match layer { Some(name) => self .graph .layers(name) .map(|l| l.has_edge(src, dst)) .unwrap_or(false), None => self.graph.has_edge(src, dst), - } + }) } //////////////////////// //////// GETTERS /////// //////////////////////// - /// Gets the node with the specified id. - async fn node(&self, name: String) -> Option { - self.graph.node(name).map(|node| node.into()) + /// Look up a single node by id. Returns null if the node doesn't exist in this + /// view. + + async fn node(&self, #[graphql(desc = "Node id.")] name: GqlNodeId) -> Result> { + Ok(self.graph.node(name).map(|node| node.into())) } - /// Gets (optionally a subset of) the nodes in the graph. - async fn nodes(&self, select: Option) -> Result { + /// All nodes in this view, optionally narrowed by a filter. + + async fn nodes( + &self, + #[graphql( + desc = "Optional node filter (by name, property, type, etc.). If omitted, every node in the view is returned." + )] + select: Option, + ) -> Result { let nn = self.graph.nodes(); if let Some(sel) = select { @@ -406,13 +561,26 @@ impl GqlGraph { Ok(GqlNodes::new(nn)) } - /// Gets the edge with the specified source and destination nodes. - async fn edge(&self, src: String, dst: String) -> Option { - self.graph.edge(src, dst).map(|e| e.into()) + /// Look up a single edge by its endpoint ids. Returns null if no edge exists + /// between `src` and `dst` in this view. + + async fn edge( + &self, + #[graphql(desc = "Source node id.")] src: GqlNodeId, + #[graphql(desc = "Destination node id.")] dst: GqlNodeId, + ) -> Result> { + Ok(self.graph.edge(src, dst).map(|e| e.into())) } - /// Gets the edges in the graph. - async fn edges<'a>(&self, select: Option) -> Result { + /// All edges in this view, optionally narrowed by a filter. + + async fn edges<'a>( + &self, + #[graphql( + desc = "Optional edge filter (by property, layer, src/dst, etc.). If omitted, every edge in the view is returned." + )] + select: Option, + ) -> Result { let base = self.graph.edges_unlocked(); if let Some(sel) = select { @@ -429,13 +597,13 @@ impl GqlGraph { //////////////////////// /// Returns the properties of the graph. - async fn properties(&self) -> GqlProperties { - Into::::into(self.graph.properties()).into() + async fn properties(&self) -> Result { + Ok(Into::::into(self.graph.properties()).into()) } /// Returns the metadata of the graph. - async fn metadata(&self) -> GqlMetadata { - self.graph.metadata().into() + async fn metadata(&self) -> Result { + Ok(self.graph.metadata().into()) } //////////////////////// @@ -446,48 +614,49 @@ impl GqlGraph { //if someone write non-utf characters as a filename /// Returns the graph name. - async fn name(&self) -> Result { + async fn name(&self) -> Result { self.path.get_graph_name() } /// Returns path of graph. - async fn path(&self) -> Result { - Ok(self - .path - .get_original_path() - .to_str() - .ok_or(InvalidPathReason::PathNotParsable( - self.path.to_error_path(), - ))? - .to_owned()) + async fn path(&self) -> String { + self.path.local_path().into() } /// Returns namespace of graph. - async fn namespace(&self) -> Result { - Ok(self - .path - .get_original_path() - .parent() - .and_then(|p| p.to_str().map(|s| s.to_string())) - .ok_or(InvalidPathReason::PathNotParsable( - self.path.to_error_path(), - ))? - .to_owned()) + async fn namespace(&self) -> String { + self.path + .local_path() + .rsplit_once("/") + .map_or("", |(prefix, _)| prefix) + .to_string() } /// Returns the graph schema. - async fn schema(&self) -> GraphSchema { + async fn schema(&self) -> Result { let self_clone = self.clone(); - blocking_compute(move || GraphSchema::new(&self_clone.graph)).await + Ok(blocking_compute(move || GraphSchema::new(&self_clone.graph)).await) } + /// Access registered graph algorithms (PageRank, shortest path, etc.) for this + /// graph view. The set of available algorithms is defined by the plugin registry + /// loaded at server startup. async fn algorithms(&self) -> GraphAlgorithmPlugin { self.graph.clone().into() } - async fn shared_neighbours(&self, selected_nodes: Vec) -> Vec { + /// Nodes that are neighbours of every node in `selectedNodes`. Returns the + /// intersection of each selected node's neighbour set (undirected). + + async fn shared_neighbours( + &self, + #[graphql( + desc = "Node ids whose common neighbours you want. Returns an empty list if `selectedNodes` is empty or any id does not exist." + )] + selected_nodes: Vec, + ) -> Result> { let self_clone = self.clone(); - blocking_compute(move || { + Ok(blocking_compute(move || { if selected_nodes.is_empty() { return vec![]; } @@ -513,28 +682,39 @@ impl GqlGraph { None => vec![], } }) - .await + .await) } - /// Export all nodes and edges from this graph view to another existing graph + /// Copy all nodes and edges of the current graph view into another already- + /// existing graph stored on the server. The destination graph is preserved + /// — this only adds; it does not replace. + async fn export_to<'a>( &self, ctx: &Context<'a>, - path: String, - ) -> Result> { + #[graphql(desc = "Destination graph path relative to the root namespace.")] path: String, + ) -> Result { let data = ctx.data_unchecked::(); - let other_g = data.get_graph(path.as_ref()).await?.0; + let other_g = data.get_graph(path.as_ref()).await?.graph; let g = self.graph.clone(); blocking_compute(move || { other_g.import_nodes(g.nodes(), true)?; other_g.import_edges(g.edges(), true)?; - other_g.write_updates()?; Ok(true) }) .await } - async fn filter(&self, expr: Option) -> Result { + /// Returns a filtered view of the graph. Applies a mixed node/edge filter + /// expression and narrows nodes, edges, and their properties to what matches. + + async fn filter( + &self, + #[graphql( + desc = "Optional composite filter combining node, edge, property, and metadata conditions. If omitted, applies the identity filter (equivalent to no filtering)." + )] + expr: Option, + ) -> Result { let self_clone = self.clone(); blocking_compute(move || { let filter: DynView = match expr { @@ -550,7 +730,14 @@ impl GqlGraph { .await } - async fn filter_nodes(&self, expr: GqlNodeFilter) -> Result { + /// Returns a graph view restricted to nodes that match the given filter; edges + /// are kept only if both endpoints survive. + + async fn filter_nodes( + &self, + #[graphql(desc = "Composite node filter (by name, property, type, etc.).")] + expr: GqlNodeFilter, + ) -> Result { let self_clone = self.clone(); blocking_compute(move || { let filter: CompositeNodeFilter = expr.try_into()?; @@ -563,7 +750,14 @@ impl GqlGraph { .await } - async fn filter_edges(&self, expr: GqlEdgeFilter) -> Result { + /// Returns a graph view restricted to edges that match the given filter. Nodes + /// remain in the view even if all their edges are filtered out. + + async fn filter_edges( + &self, + #[graphql(desc = "Composite edge filter (by property, layer, src/dst, etc.).")] + expr: GqlEdgeFilter, + ) -> Result { let self_clone = self.clone(); blocking_compute(move || { let filter: CompositeEdgeFilter = expr.try_into()?; @@ -600,15 +794,17 @@ impl GqlGraph { } } - /// (Experimental) Searches for nodes which match the given filter expression. - /// - /// Uses Tantivy's exact search. + /// (Experimental) Searches for nodes which match the given filter + /// expression. Uses Tantivy's exact search; requires the graph to have + /// been indexed. + async fn search_nodes( &self, + #[graphql(desc = "Composite node filter (by name, property, type, etc.).")] filter: GqlNodeFilter, - limit: usize, - offset: usize, - ) -> Result, GraphError> { + #[graphql(desc = "Maximum number of nodes to return.")] limit: usize, + #[graphql(desc = "Number of matches to skip before returning results.")] offset: usize, + ) -> Result> { #[cfg(feature = "search")] { let self_clone = self.clone(); @@ -626,15 +822,17 @@ impl GqlGraph { } } - /// (Experimental) Searches the index for edges which match the given filter expression. - /// - /// Uses Tantivy's exact search. + /// (Experimental) Searches the index for edges which match the given + /// filter expression. Uses Tantivy's exact search; requires the graph to + /// have been indexed. + async fn search_edges( &self, + #[graphql(desc = "Composite edge filter (by property, layer, src/dst, etc.).")] filter: GqlEdgeFilter, - limit: usize, - offset: usize, - ) -> Result, GraphError> { + #[graphql(desc = "Maximum number of edges to return.")] limit: usize, + #[graphql(desc = "Number of matches to skip before returning results.")] offset: usize, + ) -> Result> { #[cfg(feature = "search")] { let self_clone = self.clone(); @@ -652,9 +850,17 @@ impl GqlGraph { } } - /// Returns the specified graph view or if none is specified returns the default view. - /// This allows you to specify multiple operations together. - async fn apply_views(&self, views: Vec) -> Result { + /// Apply a list of view operations in the given order and return the + /// resulting graph view. Lets callers compose multiple view transforms + /// (window, layer, filter, snapshot, ...) in a single call. + + async fn apply_views( + &self, + #[graphql( + desc = "Ordered list of view operations; each entry is a one-of variant applied to the running result." + )] + views: Vec, + ) -> Result { let mut return_view: GqlGraph = GqlGraph::new(self.path.clone(), self.graph.clone()); for view in views { return_view = match view { diff --git a/raphtory-graphql/src/model/graph/history.rs b/raphtory-graphql/src/model/graph/history.rs index 58c15c34d3..fbb629da22 100644 --- a/raphtory-graphql/src/model/graph/history.rs +++ b/raphtory-graphql/src/model/graph/history.rs @@ -1,8 +1,11 @@ use crate::{ - model::graph::timeindex::{dt_format_str_is_valid, GqlEventTime}, + model::graph::{ + collection::{check_list_allowed, check_page_limit}, + timeindex::{dt_format_str_is_valid, GqlEventTime}, + }, rayon::blocking_compute, }; -use async_graphql::Error; +use async_graphql::{Context, Error}; use dynamic_graphql::{ResolvedObject, ResolvedObjectFields}; use raphtory::db::api::view::history::{ History, HistoryDateTime, HistoryEventId, HistoryTimestamp, InternalHistoryOps, Intervals, @@ -50,15 +53,20 @@ impl GqlHistory { } /// List all time entries present in this history. - async fn list(&self) -> Vec { + async fn list(&self, ctx: &Context<'_>) -> async_graphql::Result> { + check_list_allowed(ctx)?; let self_clone = self.clone(); - blocking_compute(move || self_clone.history.iter().map(|t| t.into()).collect()).await + Ok(blocking_compute(move || self_clone.history.iter().map(|t| t.into()).collect()).await) } /// List all time entries present in this history in reverse order. - async fn list_rev(&self) -> Vec { + async fn list_rev(&self, ctx: &Context<'_>) -> async_graphql::Result> { + check_list_allowed(ctx)?; let self_clone = self.clone(); - blocking_compute(move || self_clone.history.iter_rev().map(|t| t.into()).collect()).await + Ok( + blocking_compute(move || self_clone.history.iter_rev().map(|t| t.into()).collect()) + .await, + ) } /// Fetch one page of EventTime entries with a number of items up to a specified limit, @@ -66,14 +74,21 @@ impl GqlHistory { /// /// For example, if page(5, 2, 1) is called, a page with 5 items, offset by 11 items (2 pages of 5 + 1), /// will be returned. + async fn page( &self, - limit: usize, + ctx: &Context<'_>, + #[graphql(desc = "Maximum number of items to return on this page.")] limit: usize, + #[graphql(desc = "Extra items to skip on top of `pageIndex` paging (default 0).")] offset: Option, + #[graphql( + desc = "Zero-based page number; multiplies `limit` to determine where to start (default 0)." + )] page_index: Option, - ) -> Vec { + ) -> async_graphql::Result> { + check_page_limit(ctx, limit)?; let self_clone = self.clone(); - blocking_compute(move || { + Ok(blocking_compute(move || { let start = page_index.unwrap_or(0) * limit + offset.unwrap_or(0); self_clone .history @@ -83,7 +98,7 @@ impl GqlHistory { .map(|t| t.into()) .collect() }) - .await + .await) } /// Fetch one page of EventTime entries with a number of items up to a specified limit, @@ -91,14 +106,21 @@ impl GqlHistory { /// /// For example, if page_rev(5, 2, 1) is called, a page with 5 items, offset by 11 items (2 pages of 5 + 1), /// will be returned. + async fn page_rev( &self, - limit: usize, + ctx: &Context<'_>, + #[graphql(desc = "Maximum number of items to return on this page.")] limit: usize, + #[graphql(desc = "Extra items to skip on top of `pageIndex` paging (default 0).")] offset: Option, + #[graphql( + desc = "Zero-based page number; multiplies `limit` to determine where to start (default 0)." + )] page_index: Option, - ) -> Vec { + ) -> async_graphql::Result> { + check_page_limit(ctx, limit)?; let self_clone = self.clone(); - blocking_compute(move || { + Ok(blocking_compute(move || { let start = page_index.unwrap_or(0) * limit + offset.unwrap_or(0); self_clone .history @@ -108,7 +130,7 @@ impl GqlHistory { .map(|t| t.into()) .collect() }) - .await + .await) } /// Returns True if the history is empty. @@ -137,7 +159,14 @@ impl GqlHistory { /// Useful for converting millisecond timestamps into easily readable datetime strings. /// Optionally, a format string can be passed to format the output. Defaults to RFC 3339 if not provided (e.g., "2023-12-25T10:30:45.123Z"). /// Refer to chrono::format::strftime for formatting specifiers and escape sequences. - async fn datetimes(&self, format_string: Option) -> GqlHistoryDateTime { + + async fn datetimes( + &self, + #[graphql( + desc = "Optional format string for the rendered datetime. Uses `%`-style specifiers — for example `%Y-%m-%d` for `2024-01-15`, `%Y-%m-%d %H:%M:%S` for `2024-01-15 10:30:00`, or `%H:%M` for `10:30`. Defaults to RFC 3339 (e.g. `2024-01-15T10:30:45.123+00:00`) when omitted." + )] + format_string: Option, + ) -> GqlHistoryDateTime { let self_clone = self.clone(); blocking_compute(move || GqlHistoryDateTime { history_dt: HistoryDateTime::new(self_clone.history.0.clone()), // clone the Arc, not the underlying object @@ -156,7 +185,10 @@ impl GqlHistory { .await } - /// Returns an Intervals object which calculates the intervals between consecutive EventTime timestamps. + /// Inter-event gap analysis for this history. The returned `Intervals` + /// object exposes each gap (in milliseconds) between consecutive events, + /// plus summary statistics — `min` / `max` / `mean` / `median` — and + /// paginated access via `list` / `listRev` / `page` / `pageRev`. async fn intervals(&self) -> GqlIntervals { let self_clone = self.clone(); blocking_compute(move || GqlIntervals { @@ -176,15 +208,17 @@ pub struct GqlHistoryTimestamp { #[ResolvedObjectFields] impl GqlHistoryTimestamp { /// List all timestamps. - async fn list(&self) -> Vec { + async fn list(&self, ctx: &Context<'_>) -> async_graphql::Result> { + check_list_allowed(ctx)?; let self_clone = self.clone(); - blocking_compute(move || self_clone.history_t.collect()).await + Ok(blocking_compute(move || self_clone.history_t.collect()).await) } /// List all timestamps in reverse order. - async fn list_rev(&self) -> Vec { + async fn list_rev(&self, ctx: &Context<'_>) -> async_graphql::Result> { + check_list_allowed(ctx)?; let self_clone = self.clone(); - blocking_compute(move || self_clone.history_t.collect_rev()).await + Ok(blocking_compute(move || self_clone.history_t.collect_rev()).await) } /// Fetch one page of timestamps with a number of items up to a specified limit, optionally offset by a specified amount. @@ -192,14 +226,21 @@ impl GqlHistoryTimestamp { /// /// For example, if page(5, 2, 1) is called, a page with 5 items, offset by 11 items (2 pages of 5 + 1), /// will be returned. + async fn page( &self, - limit: usize, + ctx: &Context<'_>, + #[graphql(desc = "Maximum number of items to return on this page.")] limit: usize, + #[graphql(desc = "Extra items to skip on top of `pageIndex` paging (default 0).")] offset: Option, + #[graphql( + desc = "Zero-based page number; multiplies `limit` to determine where to start (default 0)." + )] page_index: Option, - ) -> Vec { + ) -> async_graphql::Result> { + check_page_limit(ctx, limit)?; let self_clone = self.clone(); - blocking_compute(move || { + Ok(blocking_compute(move || { let start = page_index.unwrap_or(0) * limit + offset.unwrap_or(0); self_clone .history_t @@ -208,7 +249,7 @@ impl GqlHistoryTimestamp { .take(limit) .collect() }) - .await + .await) } /// Fetch one page of timestamps in reverse order with a number of items up to a specified limit, @@ -216,14 +257,21 @@ impl GqlHistoryTimestamp { /// /// For example, if page_rev(5, 2, 1) is called, a page with 5 items, offset by 11 items (2 pages of 5 + 1), /// will be returned. + async fn page_rev( &self, - limit: usize, + ctx: &Context<'_>, + #[graphql(desc = "Maximum number of items to return on this page.")] limit: usize, + #[graphql(desc = "Extra items to skip on top of `pageIndex` paging (default 0).")] offset: Option, + #[graphql( + desc = "Zero-based page number; multiplies `limit` to determine where to start (default 0)." + )] page_index: Option, - ) -> Vec { + ) -> async_graphql::Result> { + check_page_limit(ctx, limit)?; let self_clone = self.clone(); - blocking_compute(move || { + Ok(blocking_compute(move || { let start = page_index.unwrap_or(0) * limit + offset.unwrap_or(0); self_clone .history_t @@ -232,7 +280,7 @@ impl GqlHistoryTimestamp { .take(limit) .collect() }) - .await + .await) } } @@ -249,7 +297,16 @@ impl GqlHistoryDateTime { /// List all datetimes formatted as strings. /// If filter_broken is set to True, time conversion errors will be ignored. If set to False, a TimeError /// will be raised on time conversion error. Defaults to False. - async fn list(&self, filter_broken: Option) -> Result, Error> { + + async fn list( + &self, + ctx: &Context<'_>, + #[graphql( + desc = "If true, ignore unconvertible timestamps; if false, raise an error on the first conversion failure. Defaults to false." + )] + filter_broken: Option, + ) -> Result, Error> { + check_list_allowed(ctx)?; let self_clone = self.clone(); blocking_compute(move || { let fmt_string = self_clone.format_string.as_deref().unwrap_or("%+"); // %+ is RFC 3339 @@ -280,7 +337,16 @@ impl GqlHistoryDateTime { /// List all datetimes formatted as strings in reverse chronological order. /// If filter_broken is set to True, time conversion errors will be ignored. If set to False, a TimeError /// will be raised on time conversion error. Defaults to False. - async fn list_rev(&self, filter_broken: Option) -> Result, Error> { + + async fn list_rev( + &self, + ctx: &Context<'_>, + #[graphql( + desc = "If true, ignore unconvertible timestamps; if false, raise an error on the first conversion failure. Defaults to false." + )] + filter_broken: Option, + ) -> Result, Error> { + check_list_allowed(ctx)?; let self_clone = self.clone(); blocking_compute(move || { let fmt_string = self_clone.format_string.as_deref().unwrap_or("%+"); // %+ is RFC 3339 @@ -315,13 +381,23 @@ impl GqlHistoryDateTime { /// /// For example, if page(5, 2, 1) is called, a page with 5 items, offset by 11 items (2 pages of 5 + 1), /// will be returned. + async fn page( &self, - limit: usize, + ctx: &Context<'_>, + #[graphql(desc = "Maximum number of items to return on this page.")] limit: usize, + #[graphql(desc = "Extra items to skip on top of `pageIndex` paging (default 0).")] offset: Option, + #[graphql( + desc = "Zero-based page number; multiplies `limit` to determine where to start (default 0)." + )] page_index: Option, + #[graphql( + desc = "If true, skip timestamps whose conversion fails; if false, raise an error on the first conversion failure. Defaults to false." + )] filter_broken: Option, ) -> Result, Error> { + check_page_limit(ctx, limit)?; let self_clone = self.clone(); blocking_compute(move || { let start = page_index.unwrap_or(0) * limit + offset.unwrap_or(0); @@ -360,13 +436,23 @@ impl GqlHistoryDateTime { /// /// For example, if page_rev(5, 2, 1) is called, a page with 5 items, offset by 11 items (2 pages of 5 + 1), /// will be returned. + async fn page_rev( &self, - limit: usize, + ctx: &Context<'_>, + #[graphql(desc = "Maximum number of items to return on this page.")] limit: usize, + #[graphql(desc = "Extra items to skip on top of `pageIndex` paging (default 0).")] offset: Option, + #[graphql( + desc = "Zero-based page number; multiplies `limit` to determine where to start (default 0)." + )] page_index: Option, + #[graphql( + desc = "If true, skip timestamps whose conversion fails; if false, raise an error on the first conversion failure. Defaults to false." + )] filter_broken: Option, ) -> Result, Error> { + check_page_limit(ctx, limit)?; let self_clone = self.clone(); blocking_compute(move || { let start = page_index.unwrap_or(0) * limit + offset.unwrap_or(0); @@ -409,29 +495,31 @@ pub struct GqlHistoryEventId { #[ResolvedObjectFields] impl GqlHistoryEventId { /// List event ids. - async fn list(&self) -> Vec { + async fn list(&self, ctx: &Context<'_>) -> async_graphql::Result> { + check_list_allowed(ctx)?; let self_clone = self.clone(); - blocking_compute(move || { + Ok(blocking_compute(move || { self_clone .history_s .iter() .map(|s: usize| s as u64) .collect() }) - .await + .await) } /// List event ids in reverse order. - async fn list_rev(&self) -> Vec { + async fn list_rev(&self, ctx: &Context<'_>) -> async_graphql::Result> { + check_list_allowed(ctx)?; let self_clone = self.clone(); - blocking_compute(move || { + Ok(blocking_compute(move || { self_clone .history_s .iter_rev() .map(|s: usize| s as u64) .collect() }) - .await + .await) } /// Fetch one page of event ids with a number of items up to a specified limit, @@ -439,14 +527,21 @@ impl GqlHistoryEventId { /// /// For example, if page(5, 2, 1) is called, a page with 5 items, offset by 11 items (2 pages of 5 + 1), /// will be returned. + async fn page( &self, - limit: usize, + ctx: &Context<'_>, + #[graphql(desc = "Maximum number of items to return on this page.")] limit: usize, + #[graphql(desc = "Extra items to skip on top of `pageIndex` paging (default 0).")] offset: Option, + #[graphql( + desc = "Zero-based page number; multiplies `limit` to determine where to start (default 0)." + )] page_index: Option, - ) -> Vec { + ) -> async_graphql::Result> { + check_page_limit(ctx, limit)?; let self_clone = self.clone(); - blocking_compute(move || { + Ok(blocking_compute(move || { let start = page_index.unwrap_or(0) * limit + offset.unwrap_or(0); self_clone .history_s @@ -456,7 +551,7 @@ impl GqlHistoryEventId { .map(|s: usize| s as u64) .collect() }) - .await + .await) } /// Fetch one page of event ids in reverse chronological order with a number of items up to a specified limit, @@ -464,14 +559,21 @@ impl GqlHistoryEventId { /// /// For example, if page_rev(5, 2, 1) is called, a page with 5 items, offset by 11 items (2 pages of 5 + 1), /// will be returned. + async fn page_rev( &self, - limit: usize, + ctx: &Context<'_>, + #[graphql(desc = "Maximum number of items to return on this page.")] limit: usize, + #[graphql(desc = "Extra items to skip on top of `pageIndex` paging (default 0).")] offset: Option, + #[graphql( + desc = "Zero-based page number; multiplies `limit` to determine where to start (default 0)." + )] page_index: Option, - ) -> Vec { + ) -> async_graphql::Result> { + check_page_limit(ctx, limit)?; let self_clone = self.clone(); - blocking_compute(move || { + Ok(blocking_compute(move || { let start = page_index.unwrap_or(0) * limit + offset.unwrap_or(0); self_clone .history_s @@ -481,7 +583,7 @@ impl GqlHistoryEventId { .map(|s: usize| s as u64) .collect() }) - .await + .await) } } @@ -495,15 +597,17 @@ pub struct GqlIntervals { #[ResolvedObjectFields] impl GqlIntervals { /// List time intervals between consecutive timestamps in milliseconds. - async fn list(&self) -> Vec { + async fn list(&self, ctx: &Context<'_>) -> async_graphql::Result> { + check_list_allowed(ctx)?; let self_clone = self.clone(); - blocking_compute(move || self_clone.intervals.collect()).await + Ok(blocking_compute(move || self_clone.intervals.collect()).await) } /// List millisecond time intervals between consecutive timestamps in reverse order. - async fn list_rev(&self) -> Vec { + async fn list_rev(&self, ctx: &Context<'_>) -> async_graphql::Result> { + check_list_allowed(ctx)?; let self_clone = self.clone(); - blocking_compute(move || self_clone.intervals.collect_rev()).await + Ok(blocking_compute(move || self_clone.intervals.collect_rev()).await) } /// Fetch one page of intervals between consecutive timestamps with a number of items up to a specified limit, @@ -511,14 +615,21 @@ impl GqlIntervals { /// /// For example, if page(5, 2, 1) is called, a page with 5 items, offset by 11 items (2 pages of 5 + 1), /// will be returned. + async fn page( &self, - limit: usize, + ctx: &Context<'_>, + #[graphql(desc = "Maximum number of items to return on this page.")] limit: usize, + #[graphql(desc = "Extra items to skip on top of `pageIndex` paging (default 0).")] offset: Option, + #[graphql( + desc = "Zero-based page number; multiplies `limit` to determine where to start (default 0)." + )] page_index: Option, - ) -> Vec { + ) -> async_graphql::Result> { + check_page_limit(ctx, limit)?; let self_clone = self.clone(); - blocking_compute(move || { + Ok(blocking_compute(move || { let start = page_index.unwrap_or(0) * limit + offset.unwrap_or(0); self_clone .intervals @@ -527,7 +638,7 @@ impl GqlIntervals { .take(limit) .collect() }) - .await + .await) } /// Fetch one page of intervals between consecutive timestamps in reverse order with a number of items up to a specified limit, @@ -535,14 +646,21 @@ impl GqlIntervals { /// /// For example, if page(5, 2, 1) is called, a page with 5 items, offset by 11 items (2 pages of 5 + 1), /// will be returned. + async fn page_rev( &self, - limit: usize, + ctx: &Context<'_>, + #[graphql(desc = "Maximum number of items to return on this page.")] limit: usize, + #[graphql(desc = "Extra items to skip on top of `pageIndex` paging (default 0).")] offset: Option, + #[graphql( + desc = "Zero-based page number; multiplies `limit` to determine where to start (default 0)." + )] page_index: Option, - ) -> Vec { + ) -> async_graphql::Result> { + check_page_limit(ctx, limit)?; let self_clone = self.clone(); - blocking_compute(move || { + Ok(blocking_compute(move || { let start = page_index.unwrap_or(0) * limit + offset.unwrap_or(0); self_clone .intervals @@ -551,7 +669,7 @@ impl GqlIntervals { .take(limit) .collect() }) - .await + .await) } /// Compute the mean interval between consecutive timestamps. Returns None if fewer than 1 timestamp. diff --git a/raphtory-graphql/src/model/graph/meta_graph.rs b/raphtory-graphql/src/model/graph/meta_graph.rs index 72316bf2b2..30aac5eff9 100644 --- a/raphtory-graphql/src/model/graph/meta_graph.rs +++ b/raphtory-graphql/src/model/graph/meta_graph.rs @@ -1,10 +1,22 @@ -use crate::{model::graph::property::GqlProperty, paths::ExistingGraphFolder}; -use dynamic_graphql::{ResolvedObject, ResolvedObjectFields}; -use raphtory::{errors::GraphError, serialise::metadata::GraphMetadata}; +use crate::{ + data::Data, + model::graph::property::GqlProperty, + paths::{ExistingGraphFolder, ValidGraphPaths}, +}; +use async_graphql::Context; +use dynamic_graphql::{ResolvedObject, ResolvedObjectFields, Result}; +use raphtory::{ + db::api::storage::storage::{Extension, PersistenceStrategy}, + prelude::{GraphViewOps, PropertiesOps}, + serialise::{metadata::GraphMetadata, parquet::decode_graph_metadata}, +}; use std::{cmp::Ordering, sync::Arc}; use tokio::sync::OnceCell; -/// +/// Lightweight summary of a stored graph — its name, path, counts, and +/// filesystem timestamps — served without deserializing the full graph. +/// Useful for listing what's available on the server before committing to a +/// full load. #[derive(ResolvedObject, Clone)] pub(crate) struct MetaGraph { folder: ExistingGraphFolder, @@ -39,10 +51,15 @@ impl MetaGraph { } } - async fn meta(&self) -> Result<&GraphMetadata, GraphError> { - self.meta + pub(crate) fn local_path(&self) -> &str { + self.folder.local_path() + } + + async fn meta(&self) -> Result<&GraphMetadata> { + Ok(self + .meta .get_or_try_init(|| self.folder.read_metadata_async()) - .await + .await?) } } @@ -56,26 +73,26 @@ impl MetaGraph { /// Returns path of graph. async fn path(&self) -> String { - self.folder.get_original_path_str().to_owned() + self.folder.local_path().into() } /// Returns the timestamp for the creation of the graph. - async fn created(&self) -> Result { - self.folder.created_async().await + async fn created(&self) -> Result { + Ok(self.folder.created_async().await?) } /// Returns the graph's last opened timestamp according to system time. - async fn last_opened(&self) -> Result { - self.folder.last_opened_async().await + async fn last_opened(&self) -> Result { + Ok(self.folder.last_opened_async().await?) } /// Returns the graph's last updated timestamp. - async fn last_updated(&self) -> Result { - self.folder.last_updated_async().await + async fn last_updated(&self) -> Result { + Ok(self.folder.last_updated_async().await?) } /// Returns the number of nodes in the graph. - async fn node_count(&self) -> Result { + async fn node_count(&self) -> Result { Ok(self.meta().await?.node_count) } @@ -83,18 +100,31 @@ impl MetaGraph { /// /// Returns: /// int: - async fn edge_count(&self) -> Result { + async fn edge_count(&self) -> Result { Ok(self.meta().await?.edge_count) } /// Returns the metadata of the graph. - async fn metadata(&self) -> Result, GraphError> { - Ok(self - .meta() - .await? - .metadata - .iter() - .map(|(key, prop)| GqlProperty::new(key.to_string(), prop.clone())) - .collect()) + async fn metadata(&self, ctx: &Context<'_>) -> Result> { + let data: &Data = ctx.data_unchecked(); + let maybe_cached = if Extension::disk_storage_enabled() { + let graph = data.get_graph(self.folder.local_path()).await?; + Some(graph) + } else { + data.get_cached_graph(self.folder.local_path()).await + }; + let res = match maybe_cached { + None => decode_graph_metadata(self.folder.graph_folder())? + .into_iter() + .filter_map(|(key, value)| value.map(|prop| GqlProperty::new(key, prop))) + .collect(), + Some(graph) => graph + .graph + .metadata() + .iter() + .filter_map(|(key, value)| value.map(|prop| GqlProperty::new(key.into(), prop))) + .collect(), + }; + Ok(res) } } diff --git a/raphtory-graphql/src/model/graph/mod.rs b/raphtory-graphql/src/model/graph/mod.rs index a056b01236..1791b75cfe 100644 --- a/raphtory-graphql/src/model/graph/mod.rs +++ b/raphtory-graphql/src/model/graph/mod.rs @@ -6,15 +6,16 @@ pub(crate) mod collection; mod document; pub(crate) mod edge; mod edges; -pub(crate) mod filtering; +pub mod filtering; pub(crate) mod graph; pub(crate) mod history; pub(crate) mod index; pub(crate) mod meta_graph; pub(crate) mod mutable_graph; pub(crate) mod namespace; -mod namespaced_item; +pub(crate) mod namespaced_item; pub(crate) mod node; +pub(crate) mod node_id; mod nodes; mod path_from_node; pub(crate) mod property; diff --git a/raphtory-graphql/src/model/graph/mutable_graph.rs b/raphtory-graphql/src/model/graph/mutable_graph.rs index 49eeeb1356..2cd2537398 100644 --- a/raphtory-graphql/src/model/graph/mutable_graph.rs +++ b/raphtory-graphql/src/model/graph/mutable_graph.rs @@ -1,17 +1,25 @@ use crate::{ graph::{GraphWithVectors, UpdateEmbeddings}, - model::graph::{edge::GqlEdge, graph::GqlGraph, node::GqlNode, property::Value}, - paths::ExistingGraphFolder, + model::{ + graph::{ + edge::GqlEdge, graph::GqlGraph, node::GqlNode, node_id::GqlNodeId, property::Value, + timeindex::GqlTimeInput, + }, + GqlGraphType, + }, rayon::blocking_write, }; use dynamic_graphql::{InputObject, ResolvedObject, ResolvedObjectFields}; use itertools::Itertools; use raphtory::{ - db::graph::{edge::EdgeView, node::NodeView}, + db::{ + api::view::MaterializedGraph, + graph::{edge::EdgeView, node::NodeView}, + }, errors::GraphError, prelude::*, }; -use raphtory_api::core::storage::arc_str::OptionAsStr; +use raphtory_api::core::{storage::arc_str::OptionAsStr, utils::time::IntoTime}; use std::{ error::Error, fmt::{Debug, Display, Formatter}, @@ -78,30 +86,33 @@ pub struct GqlPropertyInput { #[derive(InputObject, Clone)] pub struct TemporalPropertyInput { - /// Time. - time: i64, + /// Time of the update — accepts the same forms as `TimeInput` (epoch + /// millis Int, RFC3339 string, or `{timestamp, eventId}` object). + time: GqlTimeInput, /// Properties. properties: Option>, } #[derive(InputObject, Clone)] pub struct NodeAddition { - /// Name. - name: String, + /// Node id (string or non-negative integer). + name: GqlNodeId, /// Node type. node_type: Option, /// Metadata. metadata: Option>, /// Updates. updates: Option>, + /// Layer. + layer: Option, } #[derive(InputObject, Clone)] pub struct EdgeAddition { - /// Source node. - src: String, - /// Destination node. - dst: String, + /// Source node id (string or non-negative integer). + src: GqlNodeId, + /// Destination node id (string or non-negative integer). + dst: GqlNodeId, /// Layer. layer: Option, /// Metadata. @@ -110,25 +121,26 @@ pub struct EdgeAddition { updates: Option>, } +/// Write-enabled handle for a graph. Obtained by calling `updateGraph(path)` +/// on the root query with a path you have write permission for. Supports +/// adding nodes and edges (individually or in batches), attaching +/// properties/metadata, and looking up mutable `node`/`edge` handles. Use the +/// read-only `graph(path)` resolver for queries. #[derive(ResolvedObject, Clone)] #[graphql(name = "MutableGraph")] pub struct GqlMutableGraph { - path: ExistingGraphFolder, graph: GraphWithVectors, } -impl GqlMutableGraph { - pub(crate) fn new(path: ExistingGraphFolder, graph: GraphWithVectors) -> Self { - Self { - path: path.into(), - graph, - } +impl From for GqlMutableGraph { + fn from(graph: GraphWithVectors) -> Self { + Self { graph } } } fn as_properties( properties: Vec, -) -> Result, GraphError> { +) -> Result, GraphError> { let props: Result, GraphError> = properties .into_iter() .map(|p| { @@ -142,62 +154,127 @@ fn as_properties( #[ResolvedObjectFields] impl GqlMutableGraph { - /// Get the non-mutable graph. - async fn graph(&self) -> GqlGraph { - GqlGraph::new(self.path.clone(), self.graph.graph.clone()) + /// Read-only view of this graph — identical to what you'd get from + /// `graph(path:)` on the query root. Use this when you want to compose + /// queries on the graph you've just mutated. `graphType` lets you + /// re-interpret the graph at query time (see `graph(path:)` for + /// semantics); defaults to the stored graph's native type. + async fn graph( + &self, + #[graphql( + desc = "Optional override for graph semantics — `EVENT` treats every update as a point-in-time event, `PERSISTENT` carries values forward until overwritten or deleted. Defaults to the stored graph's native type." + )] + graph_type: Option, + ) -> GqlGraph { + let folder = self.graph.folder.clone(); + match graph_type { + Some(GqlGraphType::Event) => match self.graph.graph.clone() { + MaterializedGraph::EventGraph(g) => GqlGraph::new(folder, g), + MaterializedGraph::PersistentGraph(g) => GqlGraph::new(folder, g.event_graph()), + }, + Some(GqlGraphType::Persistent) => match self.graph.graph.clone() { + MaterializedGraph::EventGraph(g) => GqlGraph::new(folder, g.persistent_graph()), + MaterializedGraph::PersistentGraph(g) => GqlGraph::new(folder, g), + }, + None => GqlGraph::new(folder, self.graph.graph.clone()), + } } - /// Get mutable existing node. - async fn node(&self, name: String) -> Option { - self.graph.node(name).map(|n| n.into()) + /// Look up an existing node for mutation. Returns null if the node doesn't + /// exist; use `addNode` or `createNode` to create one. + + async fn node(&self, #[graphql(desc = "Node id.")] name: GqlNodeId) -> Option { + self.graph.node(name).map(|n| GqlMutableNode::new(n)) } - /// Add a new node or add updates to an existing node. + /// Add a new node or append an update to an existing one. Upsert semantics: + /// no error if the node already exists — properties and type are merged. + async fn add_node( &self, - time: i64, - name: String, - properties: Option>, + #[graphql(desc = "Time of the event.")] time: GqlTimeInput, + #[graphql(desc = "Node id.")] name: GqlNodeId, + #[graphql(desc = "Optional property updates attached to this event.")] properties: Option< + Vec, + >, + #[graphql( + desc = "Optional node type to assign. If provided, sets the node's type at this event." + )] node_type: Option, + #[graphql(desc = "Optional layer name. If omitted, the default layer is used.")] + layer: Option, ) -> Result { let self_clone = self.clone(); let node = blocking_write(move || { let prop_iter = as_properties(properties.unwrap_or(vec![]))?; - let node = self_clone - .graph - .add_node(time, &name, prop_iter, node_type.as_str())?; - self_clone.graph.write_updates()?; + let node = self_clone.graph.add_node( + time.into_input_time(), + &name, + prop_iter, + node_type.as_str(), + layer.as_str(), + )?; + Ok::<_, GraphError>(node) }) .await?; + + self.post_mutation_ops().await; let _ = node.update_embeddings().await; - Ok(node.into()) + + Ok(GqlMutableNode::new(node)) } - /// Create a new node or fail if it already exists. + /// Create a new node or fail if it already exists. Strict alternative to + /// `addNode` — use this when you want to detect collisions. + async fn create_node( &self, - time: i64, - name: String, - properties: Option>, + #[graphql(desc = "Time of the create event.")] time: GqlTimeInput, + #[graphql(desc = "Node id.")] name: GqlNodeId, + #[graphql(desc = "Optional property updates attached to this event.")] properties: Option< + Vec, + >, + #[graphql( + desc = "Optional node type to assign. If provided, sets the node's type at this event." + )] node_type: Option, + #[graphql(desc = "Optional layer name. If omitted, the default layer is used.")] + layer: Option, ) -> Result { let self_clone = self.clone(); let node = blocking_write(move || { let prop_iter = as_properties(properties.unwrap_or(vec![]))?; - let node = self_clone - .graph - .create_node(time, &name, prop_iter, node_type.as_str())?; - self_clone.graph.write_updates()?; + let node = self_clone.graph.create_node( + time.into_input_time(), + &name, + prop_iter, + node_type.as_str(), + layer.as_str(), + )?; + Ok::<_, GraphError>(node) }) .await?; + + self.post_mutation_ops().await; let _ = node.update_embeddings().await; - Ok(node.into()) + + Ok(GqlMutableNode::new(node)) } - /// Add a batch of nodes. - async fn add_nodes(&self, nodes: Vec) -> Result { + /// Batch-add multiple nodes in one call. For each `NodeAddition`, applies every + /// update it carries (time/properties pairs), then optionally sets its node type + /// and adds any metadata. On partial failure, returns a `BatchFailures` error + /// describing which entries failed and why; otherwise returns true. + + async fn add_nodes( + &self, + #[graphql( + desc = "List of `NodeAddition` inputs, each specifying a node's name, optional type, layer, per-timestamp updates, and metadata." + )] + nodes: Vec, + ) -> Result { let self_clone = self.clone(); let (succeeded, batch_failures) = blocking_write(move || { @@ -205,13 +282,19 @@ impl GqlMutableGraph { .iter() .map(|node| { let node = node.clone(); - let name = node.name.as_str(); + let name = &node.name; + let node_type = node.node_type.as_str(); + let layer = node.layer.as_str(); for prop in node.updates.unwrap_or(vec![]) { let prop_iter = as_properties(prop.properties.unwrap_or(vec![]))?; - self_clone - .graph - .add_node(prop.time, name, prop_iter, None)?; + self_clone.graph.add_node( + prop.time.into_input_time(), + name, + prop_iter, + node_type, + layer, + )?; } if let Some(node_type) = node.node_type.as_str() { self_clone.get_node_view(name)?.set_node_type(node_type)?; @@ -224,10 +307,13 @@ impl GqlMutableGraph { self_clone.get_node_view(name) }) .collect(); - let write_res = self_clone.graph.write_updates(); - split_failures(nodes, write_res) + + split_failures(nodes, Ok(())) }) .await; + + self.post_mutation_ops().await; + // Generate embeddings let _ = self.graph.update_node_embeddings(succeeded).await; if let Some(failures) = batch_failures { @@ -237,165 +323,243 @@ impl GqlMutableGraph { } } - /// Get a mutable existing edge. - async fn edge(&self, src: String, dst: String) -> Option { - self.graph.edge(src, dst).map(|e| e.into()) + /// Look up an existing edge for mutation. Returns null if no such edge exists. + + async fn edge( + &self, + #[graphql(desc = "Source node id.")] src: GqlNodeId, + #[graphql(desc = "Destination node id.")] dst: GqlNodeId, + ) -> Option { + self.graph.edge(src, dst).map(|e| GqlMutableEdge::new(e)) } - /// Add a new edge or add updates to an existing edge. + /// Add a new edge or append an update to an existing one. Upsert semantics: + /// safe to call on an edge that already exists — creates missing endpoints if + /// needed. + async fn add_edge( &self, - time: i64, - src: String, - dst: String, - properties: Option>, + #[graphql(desc = "Time of the event.")] time: GqlTimeInput, + #[graphql(desc = "Source node id.")] src: GqlNodeId, + #[graphql(desc = "Destination node id.")] dst: GqlNodeId, + #[graphql(desc = "Optional property updates attached to this event.")] properties: Option< + Vec, + >, + #[graphql(desc = "Optional layer name. If omitted, the default layer is used.")] layer: Option, ) -> Result { let self_clone = self.clone(); let edge = blocking_write(move || { let prop_iter = as_properties(properties.unwrap_or(vec![]))?; - let edge = self_clone - .graph - .add_edge(time, src, dst, prop_iter, layer.as_str())?; - self_clone.graph.write_updates()?; + let edge = self_clone.graph.add_edge( + time.into_input_time(), + src, + dst, + prop_iter, + layer.as_str(), + )?; + Ok::<_, GraphError>(edge) }) .await?; + + self.post_mutation_ops().await; let _ = edge.update_embeddings().await; - Ok(edge.into()) + + Ok(GqlMutableEdge::new(edge)) } - /// Add a batch of edges. - async fn add_edges(&self, edges: Vec) -> Result { + /// Batch-add multiple edges in one call. For each `EdgeAddition`, applies every + /// update it carries, then adds any metadata. On partial failure, returns a + /// `BatchFailures` error describing which entries failed; otherwise returns + /// true. + + async fn add_edges( + &self, + #[graphql( + desc = "List of `EdgeAddition` inputs, each specifying an edge's `src`, `dst`, optional layer, per-timestamp updates, and metadata." + )] + edges: Vec, + ) -> Result { let self_clone = self.clone(); let (edge_pairs, failures) = blocking_write(move || { let edge_res: Vec<_> = edges .into_iter() .map(|edge| { - let src = edge.src.as_str(); - let dst = edge.dst.as_str(); + let src = &edge.src; + let dst = &edge.dst; let layer = edge.layer.as_str(); for prop in edge.updates.unwrap_or(vec![]) { let prop_iter = as_properties(prop.properties.unwrap_or(vec![]))?; - self_clone - .graph - .add_edge(prop.time, src, dst, prop_iter, layer)?; + self_clone.graph.add_edge( + prop.time.into_input_time(), + src, + dst, + prop_iter, + layer, + )?; } let metadata = edge.metadata.unwrap_or(vec![]); if !metadata.is_empty() { let prop_iter = as_properties(metadata)?; self_clone - .get_edge_view(src.to_string(), dst.to_string())? + .get_edge_view(src, dst)? .add_metadata(prop_iter, layer)?; } Ok((edge.src, edge.dst)) }) .collect(); - let write_res = self_clone.graph.write_updates(); - split_failures(edge_res, write_res) + + split_failures(edge_res, Ok(())) }) .await; + self.post_mutation_ops().await; let _ = self.graph.update_edge_embeddings(edge_pairs).await; + match failures { None => Ok(true), Some(failures) => Err(failures), } } - /// Mark an edge as deleted (creates the edge if it did not exist). + /// Mark an edge as deleted at the given time. Persistent graphs treat this + /// as a tombstone (the edge becomes invalid from `time` onwards); event + /// graphs simply log the deletion event. Creates the edge first if it did + /// not exist. + async fn delete_edge( &self, - time: i64, - src: String, - dst: String, + #[graphql(desc = "Time of the deletion.")] time: GqlTimeInput, + #[graphql(desc = "Source node id.")] src: GqlNodeId, + #[graphql(desc = "Destination node id.")] dst: GqlNodeId, + #[graphql(desc = "Optional layer name. If omitted, the default layer is used.")] layer: Option, ) -> Result { let self_clone = self.clone(); let edge = blocking_write(move || { - let edge = self_clone - .graph - .delete_edge(time, src, dst, layer.as_str())?; - self_clone.graph.write_updates()?; + let edge = + self_clone + .graph + .delete_edge(time.into_input_time(), src, dst, layer.as_str())?; + Ok::<_, GraphError>(edge) }) .await?; + + self.post_mutation_ops().await; let _ = edge.update_embeddings().await; - Ok(edge.into()) + + Ok(GqlMutableEdge::new(edge)) } - /// Add temporal properties to graph. + /// Add temporal properties to the graph itself (not a node or edge). Each + /// call records a property update at `t`. + async fn add_properties( &self, - t: i64, - properties: Vec, + #[graphql(desc = "Time of the update.")] t: GqlTimeInput, + #[graphql(desc = "List of `{key, value}` pairs to set.")] properties: Vec, ) -> Result { let self_clone = self.clone(); - blocking_write(move || { + let result = blocking_write(move || { self_clone .graph - .add_properties(t, as_properties(properties)?)?; - self_clone.graph.write_updates()?; + .add_properties(t.into_input_time(), as_properties(properties)?)?; Ok(true) }) - .await + .await; + + self.post_mutation_ops().await; + + result } - /// Add metadata to graph (errors if the property already exists). - async fn add_metadata(&self, properties: Vec) -> Result { + /// Add metadata to the graph itself. Errors if any of the keys already + /// exists — use `updateMetadata` to overwrite. + + async fn add_metadata( + &self, + #[graphql(desc = "List of `{key, value}` pairs to set as metadata.")] properties: Vec< + GqlPropertyInput, + >, + ) -> Result { let self_clone = self.clone(); - blocking_write(move || { + let result = blocking_write(move || { self_clone.graph.add_metadata(as_properties(properties)?)?; - self_clone.graph.write_updates()?; Ok(true) }) - .await + .await; + self.post_mutation_ops().await; + + result } - /// Update metadata of the graph (overwrites existing values). - async fn update_metadata(&self, properties: Vec) -> Result { + /// Update metadata of the graph itself, overwriting any existing values for + /// the given keys. + + async fn update_metadata( + &self, + #[graphql(desc = "List of `{key, value}` pairs to upsert.")] properties: Vec< + GqlPropertyInput, + >, + ) -> Result { let self_clone = self.clone(); - blocking_write(move || { + let result = blocking_write(move || { self_clone .graph .update_metadata(as_properties(properties)?)?; - self_clone.graph.write_updates()?; Ok(true) }) - .await + .await; + + self.post_mutation_ops().await; + + result } } impl GqlMutableGraph { - fn get_node_view(&self, name: &str) -> Result, GraphError> { + fn get_node_view( + &self, + name: &GqlNodeId, + ) -> Result, GraphError> { self.graph .node(name) - .ok_or_else(|| GraphError::NodeMissingError(GID::Str(name.to_owned()))) + .ok_or_else(|| GraphError::NodeMissingError(name.0.clone())) } fn get_edge_view( &self, - src: String, - dst: String, + src: &GqlNodeId, + dst: &GqlNodeId, ) -> Result, GraphError> { self.graph - .edge(src.clone(), dst.clone()) - .ok_or(GraphError::EdgeMissingError { - src: GID::Str(src), - dst: GID::Str(dst), + .edge(src, dst) + .ok_or_else(|| GraphError::EdgeMissingError { + src: src.0.clone(), + dst: dst.0.clone(), }) } + + /// Post mutation operations. + async fn post_mutation_ops(&self) { + self.graph.set_dirty(true); + } } +/// Write-side handle for a single node — returned from `addNode`, `createNode`, +/// or `MutableGraph.node`. Supports adding updates, setting node type, and +/// attaching or updating metadata. #[derive(ResolvedObject, Clone)] #[graphql(name = "MutableNode")] pub struct GqlMutableNode { node: NodeView<'static, GraphWithVectors>, } -impl From> for GqlMutableNode { - fn from(node: NodeView<'static, GraphWithVectors>) -> Self { +impl GqlMutableNode { + pub fn new(node: NodeView<'static, GraphWithVectors>) -> Self { Self { node } } } @@ -412,77 +576,116 @@ impl GqlMutableNode { self.node.clone().into() } - /// Add metadata to the node (errors if the property already exists). - async fn add_metadata(&self, properties: Vec) -> Result { + /// Add metadata to this node. Errors if any of the keys already exists — + /// use `updateMetadata` to overwrite. + + async fn add_metadata( + &self, + #[graphql(desc = "List of `{key, value}` pairs to set as metadata.")] properties: Vec< + GqlPropertyInput, + >, + ) -> Result { let self_clone = self.clone(); blocking_write(move || { self_clone.node.add_metadata(as_properties(properties)?)?; - self_clone.node.graph.write_updates()?; Ok::<_, GraphError>(()) }) .await?; - let _ = self.node.update_embeddings().await; + + self.post_mutation_ops().await; + Ok(true) } - /// Set the node type (errors if the node already has a non-default type). - async fn set_node_type(&self, new_type: String) -> Result { + /// Set this node's type. Errors if the node already has a non-default + /// type and you're trying to change it. + + async fn set_node_type( + &self, + #[graphql(desc = "Node-type name to assign.")] new_type: String, + ) -> Result { let self_clone = self.clone(); blocking_write(move || { self_clone.node.set_node_type(&new_type)?; - - self_clone.node.graph.write_updates()?; Ok::<_, GraphError>(()) }) .await?; - let _ = self.node.update_embeddings().await; + + self.post_mutation_ops().await; + Ok(true) } - /// Update metadata of the node (overwrites existing property values). - async fn update_metadata(&self, properties: Vec) -> Result { + /// Update metadata of this node, overwriting any existing values for the + /// given keys. + + async fn update_metadata( + &self, + #[graphql(desc = "List of `{key, value}` pairs to upsert.")] properties: Vec< + GqlPropertyInput, + >, + ) -> Result { let self_clone = self.clone(); blocking_write(move || { self_clone .node .update_metadata(as_properties(properties)?)?; - self_clone.node.graph.write_updates()?; Ok::<_, GraphError>(()) }) .await?; - let _ = self.node.update_embeddings().await; + + self.post_mutation_ops().await; + Ok(true) } - /// Add temporal property updates to the node. + /// Append a property update to this node at a specific time. + async fn add_updates( &self, - time: i64, + #[graphql(desc = "Time of the update.")] time: GqlTimeInput, + #[graphql(desc = "Optional `{key, value}` pairs attached to the event.")] properties: Option>, + #[graphql(desc = "Optional layer name. If omitted, the default layer is used.")] + layer: Option, ) -> Result { let self_clone = self.clone(); blocking_write(move || { - self_clone - .node - .add_updates(time, as_properties(properties.unwrap_or(vec![]))?)?; - self_clone.node.graph.write_updates()?; + self_clone.node.add_updates( + time.into_input_time(), + as_properties(properties.unwrap_or(vec![]))?, + layer.as_str(), + )?; Ok::<_, GraphError>(()) }) .await?; + + self.post_mutation_ops().await; let _ = self.node.update_embeddings().await; + Ok(true) } } +impl GqlMutableNode { + /// Post mutation operations. + async fn post_mutation_ops(&self) { + self.node.graph.set_dirty(true); + } +} + +/// Write-side handle for a single edge — returned from `addEdge` or +/// `MutableGraph.edge`. Supports adding updates, deletions, and attaching +/// or updating metadata. #[derive(ResolvedObject, Clone)] #[graphql(name = "MutableEdge")] pub struct GqlMutableEdge { edge: EdgeView, } -impl From> for GqlMutableEdge { - fn from(edge: EdgeView) -> Self { +impl GqlMutableEdge { + pub fn new(edge: EdgeView) -> Self { Self { edge } } } @@ -501,36 +704,53 @@ impl GqlMutableEdge { /// Get the mutable source node of the edge. async fn src(&self) -> GqlMutableNode { - self.edge.src().into() + GqlMutableNode::new(self.edge.src()) } /// Get the mutable destination node of the edge. async fn dst(&self) -> GqlMutableNode { - self.edge.dst().into() + GqlMutableNode::new(self.edge.dst()) } - /// Mark the edge as deleted at time time. - async fn delete(&self, time: i64, layer: Option) -> Result { + /// Mark this edge as deleted at the given time. Persistent graphs treat this + /// as a tombstone (the edge becomes invalid from `time` onwards); event + /// graphs simply log the deletion event. + + async fn delete( + &self, + #[graphql(desc = "Time of the deletion.")] time: GqlTimeInput, + #[graphql( + desc = "Optional layer name. If omitted, uses the layer the edge was originally added on (when called after `addEdge`)." + )] + layer: Option, + ) -> Result { let self_clone = self.clone(); blocking_write(move || { - self_clone.edge.delete(time, layer.as_str())?; - - self_clone.edge.graph.write_updates()?; + self_clone + .edge + .delete(time.into_input_time(), layer.as_str())?; Ok::<_, GraphError>(()) }) .await?; + + self.post_mutation_ops().await; let _ = self.edge.update_embeddings().await; + Ok(true) } - /// Add metadata to the edge (errors if the value already exists). - /// - /// If this is called after add_edge, the layer is inherited from the add_edge and does not - /// need to be specified again. + /// Add metadata to this edge. Errors if any of the keys already exists — + /// use `updateMetadata` to overwrite. If this is called after `addEdge`, + /// the layer is inherited and does not need to be specified again. + async fn add_metadata( &self, - properties: Vec, - layer: Option, + #[graphql(desc = "List of `{key, value}` pairs to set as metadata.")] properties: Vec< + GqlPropertyInput, + >, + #[graphql(desc = "Optional layer name; defaults to the inherited layer.")] layer: Option< + String, + >, ) -> Result { let self_clone = self.clone(); blocking_write(move || { @@ -538,22 +758,28 @@ impl GqlMutableEdge { .edge .add_metadata(as_properties(properties)?, layer.as_str())?; - self_clone.edge.graph.write_updates()?; Ok::<_, GraphError>(()) }) .await?; + + self.post_mutation_ops().await; let _ = self.edge.update_embeddings().await; + Ok(true) } - /// Update metadata of the edge (existing values are overwritten). - /// - /// If this is called after add_edge, the layer is inherited from the add_edge and does not - /// need to be specified again. + /// Update metadata of this edge, overwriting any existing values for the + /// given keys. If this is called after `addEdge`, the layer is inherited + /// and does not need to be specified again. + async fn update_metadata( &self, - properties: Vec, - layer: Option, + #[graphql(desc = "List of `{key, value}` pairs to upsert.")] properties: Vec< + GqlPropertyInput, + >, + #[graphql(desc = "Optional layer name; defaults to the inherited layer.")] layer: Option< + String, + >, ) -> Result { let self_clone = self.clone(); blocking_write(move || { @@ -561,53 +787,65 @@ impl GqlMutableEdge { .edge .update_metadata(as_properties(properties)?, layer.as_str())?; - self_clone.edge.graph.write_updates()?; Ok::<_, GraphError>(()) }) .await?; + + self.post_mutation_ops().await; let _ = self.edge.update_embeddings().await; + Ok(true) } - /// Add temporal property updates to the edge. - /// - /// If this is called after add_edge, the layer is inherited from the add_edge and does not - /// need to be specified again. + /// Append a property update to this edge at a specific time. If called + /// after `addEdge`, the layer is inherited and does not need to be + /// specified again. + async fn add_updates( &self, - time: i64, + #[graphql(desc = "Time of the update.")] time: GqlTimeInput, + #[graphql(desc = "Optional `{key, value}` pairs attached to the event.")] properties: Option>, - layer: Option, + #[graphql(desc = "Optional layer name; defaults to the inherited layer.")] layer: Option< + String, + >, ) -> Result { let self_clone = self.clone(); blocking_write(move || { self_clone.edge.add_updates( - time, + time.into_input_time(), as_properties(properties.unwrap_or(vec![]))?, layer.as_str(), )?; - self_clone.edge.graph.write_updates()?; + Ok::<_, GraphError>(()) }) .await?; + + self.post_mutation_ops().await; let _ = self.edge.update_embeddings().await; + Ok(true) } } +impl GqlMutableEdge { + /// Post mutation operations. + async fn post_mutation_ops(&self) { + self.edge.graph.set_dirty(true); + } +} + #[cfg(test)] mod tests { use super::*; - use crate::{config::app_config::AppConfig, data::Data}; - use itertools::Itertools; + use crate::{config::app_config::AppConfig, data::Data, paths::ExistingGraphFolder}; use raphtory::{ - db::api::view::MaterializedGraph, + db::api::{storage::storage::Config, view::MaterializedGraph}, vectors::{ custom::{serve_custom_embedding, EmbeddingServer}, - embeddings::EmbeddingResult, storage::OpenAIEmbeddings, template::DocumentTemplate, - Embedding, }, }; use tempfile::tempdir; @@ -623,17 +861,20 @@ mod tests { async fn create_mutable_graph( port: u16, - ) -> (GqlMutableGraph, tempfile::TempDir, EmbeddingServer) { + ) -> (GqlMutableGraph, Data, tempfile::TempDir, EmbeddingServer) { let graph = create_test_graph(); let tmp_dir = tempdir().unwrap(); let config = AppConfig::default(); - let data = Data::new(tmp_dir.path(), &config); + let data = Data::new(tmp_dir.path(), &config, Config::default()); let graph_name = "test_graph"; - data.insert_graph(graph_name, graph).await.unwrap(); - + let overwrite = false; + let folder = data + .validate_path_for_insert(graph_name, overwrite) + .unwrap(); + data.insert_graph(folder.clone(), graph).await.unwrap(); let template = DocumentTemplate { node_template: Some("{{ name }} is a {{ node_type }}".to_string()), edge_template: Some("{{ src.name }} appeared with {{ dst.name}}".to_string()), @@ -652,15 +893,15 @@ mod tests { .await .unwrap(); - let (graph_with_vectors, path) = data.get_graph(graph_name).await.unwrap(); - let mutable_graph = GqlMutableGraph::new(path, graph_with_vectors); + let graph_with_vectors = data.get_graph(graph_name).await.unwrap(); + let mutable_graph = GqlMutableGraph::from(graph_with_vectors); - (mutable_graph, tmp_dir, embedding_server) + (mutable_graph, data, tmp_dir, embedding_server) } #[tokio::test] async fn test_add_nodes_empty_list() { - let (mutable_graph, _tmp_dir, embedding_server) = create_mutable_graph(1745).await; + let (mutable_graph, _data, _tmp_dir, embedding_server) = create_mutable_graph(1745).await; let nodes = vec![]; let result = mutable_graph.add_nodes(nodes).await; @@ -671,27 +912,30 @@ mod tests { } #[tokio::test] + #[ignore = "TODO: #2384"] async fn test_add_nodes_simple() { - let (mutable_graph, _tmp_dir, es) = create_mutable_graph(1746).await; + let (mutable_graph, _data, _tmp_dir, es) = create_mutable_graph(1746).await; let nodes = vec![ NodeAddition { - name: "node1".to_string(), + name: "node1".into(), node_type: Some("test_node_type".to_string()), metadata: None, updates: Some(vec![TemporalPropertyInput { - time: 0, + time: 0.into(), properties: None, }]), + layer: None, }, NodeAddition { - name: "node2".to_string(), + name: "node2".into(), node_type: Some("test_node_type".to_string()), metadata: None, updates: Some(vec![TemporalPropertyInput { - time: 0, + time: 0.into(), properties: None, }]), + layer: None, }, ]; @@ -716,12 +960,13 @@ mod tests { } #[tokio::test] + #[ignore = "TODO: #2384"] async fn test_add_nodes_with_properties() { - let (mutable_graph, _tmp_dir, es) = create_mutable_graph(1747).await; + let (mutable_graph, _data, _tmp_dir, es) = create_mutable_graph(1747).await; let nodes = vec![ NodeAddition { - name: "complex_node_1".to_string(), + name: "complex_node_1".into(), node_type: Some("employee".to_string()), metadata: Some(vec![GqlPropertyInput { key: "department".to_string(), @@ -729,41 +974,44 @@ mod tests { }]), updates: Some(vec![ TemporalPropertyInput { - time: 0, + time: 0.into(), properties: Some(vec![GqlPropertyInput { key: "salary".to_string(), value: Value::F64(50000.0), }]), }, TemporalPropertyInput { - time: 0, + time: 0.into(), properties: Some(vec![GqlPropertyInput { key: "salary".to_string(), value: Value::F64(55000.0), }]), }, ]), + layer: None, }, NodeAddition { - name: "complex_node_2".to_string(), + name: "complex_node_2".into(), node_type: Some("employee".to_string()), metadata: None, updates: Some(vec![TemporalPropertyInput { - time: 0, + time: 0.into(), properties: None, }]), + layer: None, }, NodeAddition { - name: "complex_node_3".to_string(), + name: "complex_node_3".into(), node_type: Some("employee".to_string()), metadata: None, updates: Some(vec![TemporalPropertyInput { - time: 0, + time: 0.into(), properties: Some(vec![GqlPropertyInput { key: "salary".to_string(), value: Value::F64(55000.0), }]), }]), + layer: None, }, ]; @@ -788,28 +1036,31 @@ mod tests { } #[tokio::test] + #[ignore = "TODO: #2384"] async fn test_add_edges_simple() { - let (mutable_graph, _tmp_dir, es) = create_mutable_graph(1748).await; + let (mutable_graph, _data, _tmp_dir, es) = create_mutable_graph(1748).await; // First add some nodes. let nodes = vec![ NodeAddition { - name: "node1".to_string(), + name: "node1".into(), node_type: Some("person".to_string()), metadata: None, updates: Some(vec![TemporalPropertyInput { - time: 0, + time: 0.into(), properties: None, }]), + layer: None, }, NodeAddition { - name: "node2".to_string(), + name: "node2".into(), node_type: Some("person".to_string()), metadata: None, updates: Some(vec![TemporalPropertyInput { - time: 0, + time: 0.into(), properties: None, }]), + layer: None, }, ]; @@ -819,25 +1070,25 @@ mod tests { // Now add edges between them. let edges = vec![ EdgeAddition { - src: "node1".to_string(), - dst: "node2".to_string(), + src: "node1".into(), + dst: "node2".into(), layer: Some("friendship".to_string()), metadata: Some(vec![GqlPropertyInput { key: "strength".to_string(), value: Value::F64(0.8), }]), updates: Some(vec![TemporalPropertyInput { - time: 0, + time: 0.into(), properties: None, }]), }, EdgeAddition { - src: "node2".to_string(), - dst: "node1".to_string(), + src: "node2".into(), + dst: "node1".into(), layer: Some("friendship".to_string()), metadata: None, updates: Some(vec![TemporalPropertyInput { - time: 0, + time: 0.into(), properties: None, }]), }, @@ -848,6 +1099,7 @@ mod tests { assert!(result.is_ok()); assert!(result.unwrap()); + // TODO: #2380 (embeddings aren't working right now) // Test that edge embeddings were generated. let embedding = fake_embedding("node1 appeared with node2"); let limit = 5; diff --git a/raphtory-graphql/src/model/graph/namespace.rs b/raphtory-graphql/src/model/graph/namespace.rs index b6001a2651..cdf13fb9bf 100644 --- a/raphtory-graphql/src/model/graph/namespace.rs +++ b/raphtory-graphql/src/model/graph/namespace.rs @@ -1,134 +1,252 @@ use crate::{ - data::get_relative_path, + auth_policy::{AuthorizationPolicy, NamespacePermission}, + data::{get_relative_path, Data}, model::graph::{ collection::GqlCollection, meta_graph::MetaGraph, namespaced_item::NamespacedItem, }, - paths::{valid_path, ExistingGraphFolder}, + paths::{ExistingGraphFolder, PathValidationError, ValidPath}, rayon::blocking_compute, }; +use async_graphql::Context; use dynamic_graphql::{ResolvedObject, ResolvedObjectFields}; use itertools::Itertools; -use raphtory::errors::InvalidPathReason; -use std::path::PathBuf; +use std::{path::PathBuf, sync::Arc}; use walkdir::WalkDir; +/// A directory-like container for graphs and nested namespaces. Graphs are +/// addressed by path (e.g. `"team/project/graph"`), and every segment except +/// the last is a namespace. Use to browse what's stored on the server without +/// loading any graph data. #[derive(ResolvedObject, Clone, Ord, Eq, PartialEq, PartialOrd)] pub(crate) struct Namespace { - base_dir: PathBuf, - current_dir: PathBuf, + current_dir: PathBuf, // always validated + relative_path: String, // relative to the root working directory +} + +pub struct NamespaceIter { + it: walkdir::IntoIter, + root: Namespace, +} + +impl Iterator for NamespaceIter { + type Item = NamespacedItem; + + fn next(&mut self) -> Option { + loop { + match self.it.next() { + None => return None, + Some(Ok(entry)) => { + let path = entry.path(); + if path.is_dir() { + match get_relative_path(&self.root.current_dir, path) { + Ok(relative) => { + match self.root.try_new_child(&relative) { + Ok(child) => { + match &child { + NamespacedItem::Namespace(_) => {} + NamespacedItem::MetaGraph(_) => { + self.it.skip_current_dir() // graphs should not be traversed further + } + } + return Some(child); + } + Err(_) => { + self.it.skip_current_dir() // not a valid path + } + } + } + Err(_) => { + self.it.skip_current_dir() // not a valid path and shouldn't be traversed further} + } + } + } + } + _ => {} // skip errors + }; + } + } } impl Namespace { - pub fn new(base_dir: PathBuf, current_dir: PathBuf) -> Self { + pub fn root(root: PathBuf) -> Self { Self { - base_dir, - current_dir, + current_dir: root, + relative_path: "".to_owned(), + } + } + + pub fn try_new(root: PathBuf, relative_path: String) -> Result { + let current_dir = ValidPath::try_new(root, relative_path.as_str())?; + Self::try_from_valid(current_dir, &relative_path) + } + + /// Create a namespace from a valid path if it exists and is a namespace + pub fn try_from_valid( + current_dir: ValidPath, + relative_path: impl Into, + ) -> Result { + if current_dir.is_namespace() { + Ok(Self { + current_dir: current_dir.into_path(), + relative_path: relative_path.into(), + }) + } else { + Err(PathValidationError::NamespaceDoesNotExist( + relative_path.into(), + )) } } - fn get_all_children(&self) -> impl Iterator + use<'_> { + pub fn try_new_child(&self, file_name: &str) -> Result { + let current_dir = ValidPath::try_new(self.current_dir.clone(), file_name)?; + let relative_path = if self.relative_path.is_empty() { + file_name.to_owned() + } else { + [&self.relative_path, file_name].join("/") + }; + let child = if current_dir.is_namespace() { + NamespacedItem::Namespace(Self::try_from_valid(current_dir, relative_path)?) + } else { + NamespacedItem::MetaGraph(MetaGraph::new(ExistingGraphFolder::try_from_valid( + current_dir, + &relative_path, + )?)) + }; + Ok(child) + } + + /// Non-recursively list children + pub fn get_children(&self) -> impl Iterator + use<'_> { WalkDir::new(&self.current_dir) + .min_depth(1) .max_depth(1) .into_iter() .flatten() .filter_map(|entry| { let path = entry.path(); - let file_name = entry.file_name().to_str()?; if path.is_dir() { - if path != self.current_dir - && valid_path(self.current_dir.clone(), file_name, true).is_ok() - { - Some(NamespacedItem::Namespace(Namespace::new( - self.base_dir.clone(), - path.to_path_buf(), - ))) - } else { - let base_path = self.base_dir.clone(); - let relative = get_relative_path(base_path.clone(), path, false).ok()?; - let folder = - ExistingGraphFolder::try_from(base_path.clone(), &relative).ok()?; - Some(NamespacedItem::MetaGraph(MetaGraph::new(folder))) - } + let file_name = entry.file_name().to_str()?; + self.try_new_child(file_name).ok() } else { None } }) } - pub(crate) fn get_all_namespaces(&self) -> Vec { - let base_path = self.base_dir.clone(); - WalkDir::new(&self.current_dir) - .into_iter() - .filter_map(|e| { - let entry = e.ok()?; - let path = entry.path(); - if path.is_dir() && get_relative_path(base_path.clone(), path, true).is_ok() { - Some(Namespace::new(self.base_dir.clone(), path.to_path_buf())) - } else { - None - } - }) - .sorted() - .collect() + /// Recursively list all children + pub fn get_all_children(&self) -> impl Iterator { + let it = WalkDir::new(&self.current_dir).into_iter(); + let root = self.clone(); + NamespaceIter { it, root } } } +fn is_graph_visible( + ctx: &Context<'_>, + policy: &Option>, + g: &MetaGraph, +) -> bool { + policy + .as_ref() + .map_or(true, |p| p.graph_permissions(ctx, &g.local_path()).is_ok()) +} + +fn is_namespace_visible( + ctx: &Context<'_>, + policy: &Option>, + n: &Namespace, +) -> bool { + policy.as_ref().map_or(true, |p| { + p.namespace_permissions(ctx, &n.relative_path) >= NamespacePermission::Discover + }) +} + #[ResolvedObjectFields] impl Namespace { - async fn graphs(&self) -> GqlCollection { + /// Graphs directly inside this namespace (excludes graphs in nested + /// namespaces). Filtered by the caller's permissions — only graphs the + /// caller is allowed to see are returned. + async fn graphs(&self, ctx: &Context<'_>) -> GqlCollection { + let data = ctx.data_unchecked::(); let self_clone = self.clone(); - blocking_compute(move || { - GqlCollection::new( - self_clone - .get_all_children() - .into_iter() - .filter_map(|g| match g { - NamespacedItem::MetaGraph(g) => Some(g), - NamespacedItem::Namespace(_) => None, - }) - .sorted() - .collect(), - ) - }) - .await + let items = blocking_compute(move || self_clone.get_children().collect::>()).await; + GqlCollection::new( + items + .into_iter() + .filter_map(|item| match item { + NamespacedItem::MetaGraph(g) + if is_graph_visible(ctx, &data.auth_policy, &g) => + { + Some(g) + } + _ => None, + }) + .sorted() + .collect(), + ) } - async fn path(&self) -> Result { - get_relative_path(self.base_dir.clone(), self.current_dir.as_path(), true) + /// Path of this namespace relative to the root namespace. Empty string for + /// the root namespace itself. + async fn path(&self) -> String { + self.relative_path.clone() } + /// Parent namespace, or null at the root. async fn parent(&self) -> Option { - let parent = self.current_dir.parent()?.to_path_buf(); - if parent.starts_with(&self.base_dir) { - Some(Namespace::new(self.base_dir.clone(), parent)) - } else { + if self.relative_path.is_empty() { None + } else { + let parent = self.current_dir.parent()?.to_path_buf(); + let relative_path = self + .relative_path + .rsplit_once("/") + .map_or("", |(parent, _)| parent); + Some(Self { + current_dir: parent, + relative_path: relative_path.to_owned(), + }) } } - async fn children(&self) -> GqlCollection { + /// Sub-namespaces directly inside this one (one level down, not recursive). + /// Filtered by permissions. + async fn children(&self, ctx: &Context<'_>) -> GqlCollection { + let data = ctx.data_unchecked::(); let self_clone = self.clone(); - blocking_compute(move || { - GqlCollection::new( - self_clone - .get_all_children() - .filter_map(|item| match item { - NamespacedItem::MetaGraph(_) => None, - NamespacedItem::Namespace(n) => Some(n), - }) - .sorted() - .collect(), - ) - }) - .await + let items = blocking_compute(move || self_clone.get_children().collect::>()).await; + GqlCollection::new( + items + .into_iter() + .filter_map(|item| match item { + NamespacedItem::Namespace(n) + if is_namespace_visible(ctx, &data.auth_policy, &n) => + { + Some(n) + } + _ => None, + }) + .sorted() + .collect(), + ) } - // Fetch the collection of namespaces/graphs in this namespace. - // Namespaces will be listed before graphs. - async fn items(&self) -> GqlCollection { + /// Everything in this namespace — sub-namespaces and graphs — as a single + /// heterogeneous collection. Sub-namespaces are listed before graphs. + /// Filtered by permissions. + async fn items(&self, ctx: &Context<'_>) -> GqlCollection { + let data = ctx.data_unchecked::(); let self_clone = self.clone(); - blocking_compute(move || { - GqlCollection::new(self_clone.get_all_children().sorted().collect()) - }) - .await + let all_items = + blocking_compute(move || self_clone.get_children().collect::>()).await; + GqlCollection::new( + all_items + .into_iter() + .filter(|item| match item { + NamespacedItem::MetaGraph(g) => is_graph_visible(ctx, &data.auth_policy, g), + NamespacedItem::Namespace(n) => is_namespace_visible(ctx, &data.auth_policy, n), + }) + .sorted() + .collect(), + ) } } diff --git a/raphtory-graphql/src/model/graph/namespaced_item.rs b/raphtory-graphql/src/model/graph/namespaced_item.rs index 1f8e87bb13..8d315eebf7 100644 --- a/raphtory-graphql/src/model/graph/namespaced_item.rs +++ b/raphtory-graphql/src/model/graph/namespaced_item.rs @@ -5,7 +5,7 @@ use dynamic_graphql::Union; // This is useful for when fetching a collection of both for the purposes of displaying all such // items, paged. #[derive(Union, Clone, PartialOrd, PartialEq, Ord, Eq)] -pub(crate) enum NamespacedItem { +pub enum NamespacedItem { /// Namespace. Namespace(Namespace), /// Metagraph. diff --git a/raphtory-graphql/src/model/graph/node.rs b/raphtory-graphql/src/model/graph/node.rs index c21e19c54f..7b429480f5 100644 --- a/raphtory-graphql/src/model/graph/node.rs +++ b/raphtory-graphql/src/model/graph/node.rs @@ -3,6 +3,7 @@ use crate::{ edges::GqlEdges, filtering::{GqlEdgeFilter, GqlNodeFilter, NodeViewCollection}, history::GqlHistory, + node_id::GqlNodeId, nodes::GqlNodes, path_from_node::GqlPathFromNode, property::{GqlMetadata, GqlProperties}, @@ -53,9 +54,10 @@ impl From> for GqlNode /// /// Collections can be filtered and used to create lists. impl GqlNode { - /// Returns the unique id of the node. - async fn id(&self) -> String { - self.vv.id().to_string() + /// Returns the unique id of the node — `String` for string-indexed + /// graphs, non-negative `Int` for integer-indexed graphs. + async fn id(&self) -> GqlNodeId { + GqlNodeId(self.vv.id()) } /// Returns the name of the node. @@ -73,24 +75,37 @@ impl GqlNode { } /// Return a view of node containing all layers specified. - async fn layers(&self, names: Vec) -> GqlNode { + + async fn layers( + &self, + #[graphql(desc = "Layer names to include.")] names: Vec, + ) -> GqlNode { let self_clone = self.clone(); blocking_compute(move || self_clone.vv.valid_layers(names).into()).await } /// Returns a collection containing nodes belonging to all layers except the excluded list of layers. - async fn exclude_layers(&self, names: Vec) -> GqlNode { + + async fn exclude_layers( + &self, + #[graphql(desc = "Layer names to exclude.")] names: Vec, + ) -> GqlNode { let self_clone = self.clone(); blocking_compute(move || self_clone.vv.exclude_valid_layers(names).into()).await } /// Returns a collection containing nodes belonging to the specified layer. - async fn layer(&self, name: String) -> GqlNode { + + async fn layer(&self, #[graphql(desc = "Layer name to include.")] name: String) -> GqlNode { self.vv.valid_layers(name).into() } /// Returns a collection containing nodes belonging to all layers except the excluded layer. - async fn exclude_layer(&self, name: String) -> GqlNode { + + async fn exclude_layer( + &self, + #[graphql(desc = "Layer name to exclude.")] name: String, + ) -> GqlNode { self.vv.exclude_valid_layers(name).into() } @@ -103,10 +118,20 @@ impl GqlNode { /// e.g. "1 month and 1 day" will align at the start of the day. /// Note that passing a step larger than window while alignment_unit is not "Unaligned" may lead to some entries appearing before /// the start of the first window and/or after the end of the last window (i.e. not included in any window). + async fn rolling( &self, + #[graphql( + desc = "Width of each window. Pass either `{epoch: }` for a discrete number of milliseconds (e.g. `{epoch: 1000}` for 1 second), or `{duration: }` for a calendar duration (e.g. `{duration: 1 day}` or `{duration: 2 hours and 30 minutes}`)." + )] window: WindowDuration, + #[graphql( + desc = "Optional gap between the start of one window and the start of the next. Accepts the same `{epoch: }` or `{duration: }` values as `window`. Defaults to `window` — i.e. windows touch end-to-end with no overlap and no gap." + )] step: Option, + #[graphql( + desc = "Optional anchor for window boundaries — pass `Unaligned` to disable, or one of the unit values (e.g. `Day`, `Hour`, `Minute`) to align edges to that calendar unit. Defaults to the smallest unit present in `step` (or `window` if no step is set)." + )] alignment_unit: Option, ) -> Result { let window = window.try_into_interval()?; @@ -126,9 +151,16 @@ impl GqlNode { /// alignment_unit optionally aligns the windows to the specified unit. "Unaligned" can be passed for no alignment. /// If unspecified (i.e. by default), alignment is done on the smallest unit of time in the step. /// e.g. "1 month and 1 day" will align at the start of the day. + async fn expanding( &self, + #[graphql( + desc = "How much the window grows by on each step. Pass either `{epoch: }` for a discrete number of milliseconds, or `{duration: }` for a calendar duration (e.g. `{duration: 1 day}`)." + )] step: WindowDuration, + #[graphql( + desc = "Optional anchor for window boundaries — pass `Unaligned` to disable, or one of the unit values (e.g. `Day`, `Hour`, `Minute`) to align edges to that calendar unit. Defaults to the smallest unit present in `step`." + )] alignment_unit: Option, ) -> Result { let step = step.try_into_interval()?; @@ -141,12 +173,21 @@ impl GqlNode { } /// Create a view of the node including all events between the specified start (inclusive) and end (exclusive). - async fn window(&self, start: GqlTimeInput, end: GqlTimeInput) -> GqlNode { + + async fn window( + &self, + #[graphql(desc = "Inclusive lower bound.")] start: GqlTimeInput, + #[graphql(desc = "Exclusive upper bound.")] end: GqlTimeInput, + ) -> GqlNode { self.vv.window(start.into_time(), end.into_time()).into() } /// Create a view of the node including all events at a specified time. - async fn at(&self, time: GqlTimeInput) -> GqlNode { + + async fn at( + &self, + #[graphql(desc = "Instant to pin the view to.")] time: GqlTimeInput, + ) -> GqlNode { self.vv.at(time.into_time()).into() } @@ -157,7 +198,11 @@ impl GqlNode { } /// Create a view of the node including all events that are valid at the specified time. - async fn snapshot_at(&self, time: GqlTimeInput) -> GqlNode { + + async fn snapshot_at( + &self, + #[graphql(desc = "Instant at which entities must be valid.")] time: GqlTimeInput, + ) -> GqlNode { self.vv.snapshot_at(time.into_time()).into() } @@ -168,29 +213,54 @@ impl GqlNode { } /// Create a view of the node including all events before specified end time (exclusive). - async fn before(&self, time: GqlTimeInput) -> GqlNode { + + async fn before( + &self, + #[graphql(desc = "Exclusive upper bound.")] time: GqlTimeInput, + ) -> GqlNode { self.vv.before(time.into_time()).into() } /// Create a view of the node including all events after the specified start time (exclusive). - async fn after(&self, time: GqlTimeInput) -> GqlNode { + + async fn after( + &self, + #[graphql(desc = "Exclusive lower bound.")] time: GqlTimeInput, + ) -> GqlNode { self.vv.after(time.into_time()).into() } /// Shrink a Window to a specified start and end time, if these are earlier and later than the current start and end respectively. - async fn shrink_window(&self, start: GqlTimeInput, end: GqlTimeInput) -> Self { + + async fn shrink_window( + &self, + #[graphql(desc = "Proposed new start (TimeInput); ignored if it would widen the window.")] + start: GqlTimeInput, + #[graphql(desc = "Proposed new end (TimeInput); ignored if it would widen the window.")] + end: GqlTimeInput, + ) -> Self { self.vv .shrink_window(start.into_time(), end.into_time()) .into() } /// Set the start of the window to the larger of a specified start time and self.start(). - async fn shrink_start(&self, start: GqlTimeInput) -> Self { + + async fn shrink_start( + &self, + #[graphql(desc = "Proposed new start (TimeInput); ignored if it would widen the window.")] + start: GqlTimeInput, + ) -> Self { self.vv.shrink_start(start.into_time()).into() } /// Set the end of the window to the smaller of a specified end and self.end(). - async fn shrink_end(&self, end: GqlTimeInput) -> Self { + + async fn shrink_end( + &self, + #[graphql(desc = "Proposed new end (TimeInput); ignored if it would widen the window.")] + end: GqlTimeInput, + ) -> Self { self.vv.shrink_end(end.into_time()).into() } diff --git a/raphtory-graphql/src/model/graph/node_id.rs b/raphtory-graphql/src/model/graph/node_id.rs new file mode 100644 index 0000000000..911e779aa8 --- /dev/null +++ b/raphtory-graphql/src/model/graph/node_id.rs @@ -0,0 +1,76 @@ +use async_graphql::{Error, Value as GqlValue}; +use dynamic_graphql::{Scalar, ScalarValue}; +use raphtory::core::entities::nodes::node_ref::{AsNodeRef, NodeRef}; +use raphtory_api::core::entities::GID; +use serde::{Deserialize, Serialize}; +use serde_json::Number; + +/// Identifier for a node — either a string (`"alice"`) or a non-negative +/// integer (`42`). Use whichever form matches how the graph was indexed +/// when nodes were added. +#[derive(Scalar, Clone, Debug, Serialize, Deserialize)] +#[graphql(name = "NodeId")] +pub struct GqlNodeId(pub GID); + +impl ScalarValue for GqlNodeId { + fn from_value(value: GqlValue) -> Result { + match value { + GqlValue::String(s) => Ok(GqlNodeId(GID::Str(s))), + GqlValue::Number(n) => n + .as_u64() + .map(|u| GqlNodeId(GID::U64(u))) + .ok_or_else(|| Error::new("NodeId integer must be a non-negative Int.")), + _ => Err(Error::new( + "Expected NodeId as a String or non-negative Int.", + )), + } + } + + fn to_value(&self) -> GqlValue { + match &self.0 { + GID::Str(s) => GqlValue::String(s.clone()), + GID::U64(u) => GqlValue::Number(Number::from(*u)), + } + } +} + +impl From for GID { + fn from(value: GqlNodeId) -> GID { + value.0 + } +} + +impl From<&str> for GqlNodeId { + fn from(value: &str) -> Self { + GqlNodeId(GID::Str(value.to_owned())) + } +} + +impl From for GqlNodeId { + fn from(value: String) -> Self { + GqlNodeId(GID::Str(value)) + } +} + +impl From for GqlNodeId { + fn from(value: u64) -> Self { + GqlNodeId(GID::U64(value)) + } +} + +impl AsNodeRef for GqlNodeId { + fn as_node_ref(&self) -> NodeRef<'_> { + self.0.as_node_ref() + } +} + +impl GqlNodeId { + /// Returns the id as a `String`. Integer ids are formatted as decimal. + /// Useful for callers that need a string id. + pub fn to_string(&self) -> String { + match &self.0 { + GID::Str(s) => s.clone(), + GID::U64(u) => u.to_string(), + } + } +} diff --git a/raphtory-graphql/src/model/graph/nodes.rs b/raphtory-graphql/src/model/graph/nodes.rs index 82fa5a5077..515f6a0c12 100644 --- a/raphtory-graphql/src/model/graph/nodes.rs +++ b/raphtory-graphql/src/model/graph/nodes.rs @@ -1,6 +1,7 @@ use crate::{ model::{ graph::{ + collection::{check_list_allowed, check_page_limit}, filtering::{GqlNodeFilter, NodesViewCollection}, node::GqlNode, timeindex::{GqlEventTime, GqlTimeInput}, @@ -11,6 +12,7 @@ use crate::{ }, rayon::blocking_compute, }; +use async_graphql::{Context, Result}; use dynamic_graphql::{ResolvedObject, ResolvedObjectFields}; use itertools::Itertools; use raphtory::{ @@ -31,6 +33,9 @@ use raphtory::{ use raphtory_api::core::{entities::VID, utils::time::IntoTime}; use std::cmp::Ordering; +/// A lazy collection of nodes from a graph view. Supports all the same view +/// transforms as `Graph` (window, layer, filter, ...) plus pagination and +/// sorting. Iterated via `list` / `page` / `ids` / `count`. #[derive(ResolvedObject, Clone)] #[graphql(name = "Nodes")] pub(crate) struct GqlNodes { @@ -68,23 +73,36 @@ impl GqlNodes { } /// Return a view of the nodes containing all layers specified. - async fn layers(&self, names: Vec) -> Self { + + async fn layers( + &self, + #[graphql(desc = "Layer names to include.")] names: Vec, + ) -> Self { self.update(self.nn.valid_layers(names)) } /// Return a view of the nodes containing all layers except those specified. - async fn exclude_layers(&self, names: Vec) -> Self { + + async fn exclude_layers( + &self, + #[graphql(desc = "Layer names to exclude.")] names: Vec, + ) -> Self { let self_clone = self.clone(); blocking_compute(move || self_clone.update(self_clone.nn.exclude_valid_layers(names))).await } /// Return a view of the nodes containing the specified layer. - async fn layer(&self, name: String) -> Self { + + async fn layer(&self, #[graphql(desc = "Layer name to include.")] name: String) -> Self { self.update(self.nn.valid_layers(name)) } /// Return a view of the nodes containing all layers except those specified. - async fn exclude_layer(&self, name: String) -> Self { + + async fn exclude_layer( + &self, + #[graphql(desc = "Layer name to exclude.")] name: String, + ) -> Self { self.update(self.nn.exclude_valid_layers(name)) } @@ -97,10 +115,20 @@ impl GqlNodes { /// e.g. "1 month and 1 day" will align at the start of the day. /// Note that passing a step larger than window while alignment_unit is not "Unaligned" may lead to some entries appearing before /// the start of the first window and/or after the end of the last window (i.e. not included in any window). + async fn rolling( &self, + #[graphql( + desc = "Width of each window. Pass either `{epoch: }` for a discrete number of milliseconds (e.g. `{epoch: 1000}` for 1 second), or `{duration: }` for a calendar duration (e.g. `{duration: 1 day}` or `{duration: 2 hours and 30 minutes}`)." + )] window: WindowDuration, + #[graphql( + desc = "Optional gap between the start of one window and the start of the next. Accepts the same `{epoch: }` or `{duration: }` values as `window`. Defaults to `window` — i.e. windows touch end-to-end with no overlap and no gap." + )] step: Option, + #[graphql( + desc = "Optional anchor for window boundaries — pass `Unaligned` to disable, or one of the unit values (e.g. `Day`, `Hour`, `Minute`) to align edges to that calendar unit. Defaults to the smallest unit present in `step` (or `window` if no step is set)." + )] alignment_unit: Option, ) -> Result { let window = window.try_into_interval()?; @@ -120,9 +148,16 @@ impl GqlNodes { /// alignment_unit optionally aligns the windows to the specified unit. "Unaligned" can be passed for no alignment. /// If unspecified (i.e. by default), alignment is done on the smallest unit of time in the step. /// e.g. "1 month and 1 day" will align at the start of the day. + async fn expanding( &self, + #[graphql( + desc = "How much the window grows by on each step. Pass either `{epoch: }` for a discrete number of milliseconds, or `{duration: }` for a calendar duration (e.g. `{duration: 1 day}`)." + )] step: WindowDuration, + #[graphql( + desc = "Optional anchor for window boundaries — pass `Unaligned` to disable, or one of the unit values (e.g. `Day`, `Hour`, `Minute`) to align edges to that calendar unit. Defaults to the smallest unit present in `step`." + )] alignment_unit: Option, ) -> Result { let step = step.try_into_interval()?; @@ -135,12 +170,21 @@ impl GqlNodes { } /// Create a view of the node including all events between the specified start (inclusive) and end (exclusive). - async fn window(&self, start: GqlTimeInput, end: GqlTimeInput) -> Self { + + async fn window( + &self, + #[graphql(desc = "Inclusive lower bound.")] start: GqlTimeInput, + #[graphql(desc = "Exclusive upper bound.")] end: GqlTimeInput, + ) -> Self { self.update(self.nn.window(start.into_time(), end.into_time())) } /// Create a view of the nodes including all events at a specified time. - async fn at(&self, time: GqlTimeInput) -> Self { + + async fn at( + &self, + #[graphql(desc = "Instant to pin the view to.")] time: GqlTimeInput, + ) -> Self { self.update(self.nn.at(time.into_time())) } @@ -151,7 +195,11 @@ impl GqlNodes { } /// Create a view of the nodes including all events that are valid at the specified time. - async fn snapshot_at(&self, time: GqlTimeInput) -> Self { + + async fn snapshot_at( + &self, + #[graphql(desc = "Instant at which entities must be valid.")] time: GqlTimeInput, + ) -> Self { self.update(self.nn.snapshot_at(time.into_time())) } @@ -162,37 +210,70 @@ impl GqlNodes { } /// Create a view of the nodes including all events before specified end time (exclusive). - async fn before(&self, time: GqlTimeInput) -> Self { + + async fn before(&self, #[graphql(desc = "Exclusive upper bound.")] time: GqlTimeInput) -> Self { self.update(self.nn.before(time.into_time())) } /// Create a view of the nodes including all events after the specified start time (exclusive). - async fn after(&self, time: GqlTimeInput) -> Self { + + async fn after(&self, #[graphql(desc = "Exclusive lower bound.")] time: GqlTimeInput) -> Self { self.update(self.nn.after(time.into_time())) } /// Shrink both the start and end of the window. - async fn shrink_window(&self, start: GqlTimeInput, end: GqlTimeInput) -> Self { + + async fn shrink_window( + &self, + #[graphql(desc = "Proposed new start (TimeInput); ignored if it would widen the window.")] + start: GqlTimeInput, + #[graphql(desc = "Proposed new end (TimeInput); ignored if it would widen the window.")] + end: GqlTimeInput, + ) -> Self { self.update(self.nn.shrink_window(start.into_time(), end.into_time())) } /// Set the start of the window to the larger of a specified start time and self.start(). - async fn shrink_start(&self, start: GqlTimeInput) -> Self { + + async fn shrink_start( + &self, + #[graphql(desc = "Proposed new start (TimeInput); ignored if it would widen the window.")] + start: GqlTimeInput, + ) -> Self { self.update(self.nn.shrink_start(start.into_time())) } /// Set the end of the window to the smaller of a specified end and self.end(). - async fn shrink_end(&self, end: GqlTimeInput) -> Self { + + async fn shrink_end( + &self, + #[graphql(desc = "Proposed new end (TimeInput); ignored if it would widen the window.")] + end: GqlTimeInput, + ) -> Self { self.update(self.nn.shrink_end(end.into_time())) } /// Filter nodes by node type. - async fn type_filter(&self, node_types: Vec) -> Self { + + async fn type_filter( + &self, + #[graphql(desc = "Node-type names to keep.")] node_types: Vec, + ) -> Self { let self_clone = self.clone(); blocking_compute(move || self_clone.update(self_clone.nn.type_filter(&node_types))).await } - async fn apply_views(&self, views: Vec) -> Result { + /// Apply a list of views in the given order and return the resulting nodes + /// collection. Lets callers compose window, layer, filter, and snapshot + /// operations in a single call. + + async fn apply_views( + &self, + #[graphql( + desc = "Ordered list of view operations; each entry is a one-of variant (`window`, `layer`, `filter`, etc.) applied to the running result." + )] + views: Vec, + ) -> Result { let mut return_view: GqlNodes = GqlNodes::new(self.nn.clone()); for view in views { return_view = match view { @@ -248,7 +329,16 @@ impl GqlNodes { //// Sorting //// ///////////////// - async fn sorted(&self, sort_bys: Vec) -> Self { + /// Sort the nodes. Multiple criteria are applied lexicographically (ties on the + /// first key break to the second, etc.). + + async fn sorted( + &self, + #[graphql( + desc = "Ordered list of sort keys. Each entry chooses exactly one of `id` / `time` / `property`, with an optional `reverse: true` to flip order." + )] + sort_bys: Vec, + ) -> Self { let self_clone = self.clone(); blocking_compute(move || { let sorted: Index = self_clone @@ -318,6 +408,7 @@ impl GqlNodes { //// List /////// ///////////////// + /// Number of nodes in the current view. async fn count(&self) -> usize { let self_clone = self.clone(); blocking_compute(move || self_clone.nn.len()).await @@ -328,33 +419,66 @@ impl GqlNodes { /// /// For example, if page(5, 2, 1) is called, a page with 5 items, offset by 11 items (2 pages of 5 + 1), /// will be returned. + async fn page( &self, - limit: usize, + ctx: &Context<'_>, + #[graphql(desc = "Maximum number of items to return on this page.")] limit: usize, + #[graphql(desc = "Extra items to skip on top of `pageIndex` paging (default 0).")] offset: Option, + #[graphql( + desc = "Zero-based page number; multiplies `limit` to determine where to start (default 0)." + )] page_index: Option, - ) -> Vec { + ) -> Result> { + check_page_limit(ctx, limit)?; let self_clone = self.clone(); - blocking_compute(move || { + Ok(blocking_compute(move || { let start = page_index.unwrap_or(0) * limit + offset.unwrap_or(0); self_clone.iter().skip(start).take(limit).collect() }) - .await + .await) } - async fn list(&self) -> Vec { + /// Materialise every node in the view. Rejected by the server when bulk list + /// endpoints are disabled; use `page` for paginated access instead. + async fn list(&self, ctx: &Context<'_>) -> Result> { + check_list_allowed(ctx)?; let self_clone = self.clone(); - blocking_compute(move || self_clone.iter().collect()).await + Ok(blocking_compute(move || self_clone.iter().collect()).await) } - /// Returns a view of the node ids. - async fn ids(&self) -> Vec { + /// Every node's id (name) as a flat list of strings. Rejected by the server when + /// bulk list endpoints are disabled. + async fn ids(&self, ctx: &Context<'_>) -> Result> { + check_list_allowed(ctx)?; let self_clone = self.clone(); - blocking_compute(move || self_clone.nn.iter_unlocked().map(|nn| nn.name()).collect()).await + Ok( + blocking_compute(move || self_clone.nn.iter_unlocked().map(|nn| nn.name()).collect()) + .await, + ) } - /// Returns a filtered view that applies to list down the chain - async fn filter(&self, expr: GqlNodeFilter) -> Result { + /// Narrow the collection to nodes matching `expr`. The filter sticks to the + /// returned view — every subsequent traversal through these nodes (their + /// neighbours, edges, properties) continues to see the filtered scope. + /// + /// Useful when you want one scoping rule to apply across the whole query. + /// E.g. restricting everything to a specific week: + /// + /// ```text + /// nodes { filter(expr: {window: {start: 1234, end: 5678}}) { + /// list { neighbours { list { name } } } # neighbours still windowed + /// } } + /// ``` + /// + /// Contrast with `select`, which applies here and is not carried through. + + async fn filter( + &self, + #[graphql(desc = "Composite node filter (by name, property, type, etc.).")] + expr: GqlNodeFilter, + ) -> Result { let self_clone = self.clone(); blocking_compute(move || { let filter: CompositeNodeFilter = expr.try_into()?; @@ -364,8 +488,30 @@ impl GqlNodes { .await } - /// Returns filtered list of nodes - async fn select(&self, expr: GqlNodeFilter) -> Result { + /// Narrow the collection to nodes matching `expr`, but only at this step — + /// subsequent traversals out of these nodes see the unfiltered graph again. + /// + /// Useful when you want different scopes at different hops. E.g. nodes + /// active on Monday, then their neighbours active on Tuesday, then *those* + /// neighbours active on Wednesday: + /// + /// ```text + /// nodes { select(expr: {window: {...monday...}}) { + /// list { neighbours { select(expr: {window: {...tuesday...}}) { + /// list { neighbours { select(expr: {window: {...wednesday...}}) { + /// list { name } + /// } } } + /// } } } + /// } } + /// ``` + /// + /// Contrast with `filter`, which persists the scope through subsequent ops. + + async fn select( + &self, + #[graphql(desc = "Composite node filter (by name, property, type, etc.).")] + expr: GqlNodeFilter, + ) -> Result { let self_clone = self.clone(); blocking_compute(move || { let filter: CompositeNodeFilter = expr.try_into()?; diff --git a/raphtory-graphql/src/model/graph/path_from_node.rs b/raphtory-graphql/src/model/graph/path_from_node.rs index e4562fea23..4a41c4a573 100644 --- a/raphtory-graphql/src/model/graph/path_from_node.rs +++ b/raphtory-graphql/src/model/graph/path_from_node.rs @@ -1,5 +1,6 @@ use crate::{ model::graph::{ + collection::{check_list_allowed, check_page_limit}, filtering::{GqlNodeFilter, PathFromNodeViewCollection}, node::GqlNode, timeindex::{GqlEventTime, GqlTimeInput}, @@ -8,6 +9,7 @@ use crate::{ }, rayon::blocking_compute, }; +use async_graphql::Context; use dynamic_graphql::{ResolvedObject, ResolvedObjectFields}; use raphtory::{ core::utils::time::TryIntoInterval, @@ -20,6 +22,10 @@ use raphtory::{ }; use raphtory_api::core::utils::time::IntoTime; +/// A collection of nodes anchored to a source node — the result of traversals +/// like `node.neighbours`, `inNeighbours`, or `outNeighbours`. Supports all +/// the usual view transforms (window, layer, filter, ...) and can be chained +/// to walk further hops. #[derive(ResolvedObject, Clone)] #[graphql(name = "PathFromNode")] pub(crate) struct GqlPathFromNode { @@ -50,24 +56,37 @@ impl GqlPathFromNode { //////////////////////// /// Returns a view of PathFromNode containing the specified layer, errors if the layer does not exist. - async fn layers(&self, names: Vec) -> Self { + + async fn layers( + &self, + #[graphql(desc = "Layer names to include.")] names: Vec, + ) -> Self { let self_clone = self.clone(); blocking_compute(move || self_clone.update(self_clone.nn.valid_layers(names))).await } /// Return a view of PathFromNode containing all layers except the specified excluded layers, errors if any of the layers do not exist. - async fn exclude_layers(&self, names: Vec) -> Self { + + async fn exclude_layers( + &self, + #[graphql(desc = "Layer names to exclude.")] names: Vec, + ) -> Self { let self_clone = self.clone(); blocking_compute(move || self_clone.update(self_clone.nn.exclude_valid_layers(names))).await } /// Return a view of PathFromNode containing the layer specified layer, errors if the layer does not exist. - async fn layer(&self, name: String) -> Self { + + async fn layer(&self, #[graphql(desc = "Layer name to include.")] name: String) -> Self { self.update(self.nn.valid_layers(name)) } /// Return a view of PathFromNode containing all layers except the specified excluded layers, errors if any of the layers do not exist. - async fn exclude_layer(&self, name: String) -> Self { + + async fn exclude_layer( + &self, + #[graphql(desc = "Layer name to exclude.")] name: String, + ) -> Self { self.update(self.nn.exclude_valid_layers(name)) } @@ -80,10 +99,20 @@ impl GqlPathFromNode { /// e.g. "1 month and 1 day" will align at the start of the day. /// Note that passing a step larger than window while alignment_unit is not "Unaligned" may lead to some entries appearing before /// the start of the first window and/or after the end of the last window (i.e. not included in any window). + async fn rolling( &self, + #[graphql( + desc = "Width of each window. Pass either `{epoch: }` for a discrete number of milliseconds (e.g. `{epoch: 1000}` for 1 second), or `{duration: }` for a calendar duration (e.g. `{duration: 1 day}` or `{duration: 2 hours and 30 minutes}`)." + )] window: WindowDuration, + #[graphql( + desc = "Optional gap between the start of one window and the start of the next. Accepts the same `{epoch: }` or `{duration: }` values as `window`. Defaults to `window` — i.e. windows touch end-to-end with no overlap and no gap." + )] step: Option, + #[graphql( + desc = "Optional anchor for window boundaries — pass `Unaligned` to disable, or one of the unit values (e.g. `Day`, `Hour`, `Minute`) to align edges to that calendar unit. Defaults to the smallest unit present in `step` (or `window` if no step is set)." + )] alignment_unit: Option, ) -> Result { let window = window.try_into_interval()?; @@ -103,9 +132,16 @@ impl GqlPathFromNode { /// alignment_unit optionally aligns the windows to the specified unit. "Unaligned" can be passed for no alignment. /// If unspecified (i.e. by default), alignment is done on the smallest unit of time in the step. /// e.g. "1 month and 1 day" will align at the start of the day. + async fn expanding( &self, + #[graphql( + desc = "How much the window grows by on each step. Pass either `{epoch: }` for a discrete number of milliseconds, or `{duration: }` for a calendar duration (e.g. `{duration: 1 day}`)." + )] step: WindowDuration, + #[graphql( + desc = "Optional anchor for window boundaries — pass `Unaligned` to disable, or one of the unit values (e.g. `Day`, `Hour`, `Minute`) to align edges to that calendar unit. Defaults to the smallest unit present in `step`." + )] alignment_unit: Option, ) -> Result { let step = step.try_into_interval()?; @@ -118,12 +154,21 @@ impl GqlPathFromNode { } /// Create a view of the PathFromNode including all events between a specified start (inclusive) and end (exclusive). - async fn window(&self, start: GqlTimeInput, end: GqlTimeInput) -> Self { + + async fn window( + &self, + #[graphql(desc = "Inclusive lower bound.")] start: GqlTimeInput, + #[graphql(desc = "Exclusive upper bound.")] end: GqlTimeInput, + ) -> Self { self.update(self.nn.window(start.into_time(), end.into_time())) } /// Create a view of the PathFromNode including all events at time. - async fn at(&self, time: GqlTimeInput) -> Self { + + async fn at( + &self, + #[graphql(desc = "Instant to pin the view to.")] time: GqlTimeInput, + ) -> Self { self.update(self.nn.at(time.into_time())) } @@ -134,7 +179,11 @@ impl GqlPathFromNode { } /// Create a view of the PathFromNode including all events that are valid at the specified time. - async fn snapshot_at(&self, time: GqlTimeInput) -> Self { + + async fn snapshot_at( + &self, + #[graphql(desc = "Instant at which entities must be valid.")] time: GqlTimeInput, + ) -> Self { self.update(self.nn.snapshot_at(time.into_time())) } @@ -145,32 +194,55 @@ impl GqlPathFromNode { } /// Create a view of the PathFromNode including all events before the specified end (exclusive). - async fn before(&self, time: GqlTimeInput) -> Self { + + async fn before(&self, #[graphql(desc = "Exclusive upper bound.")] time: GqlTimeInput) -> Self { self.update(self.nn.before(time.into_time())) } /// Create a view of the PathFromNode including all events after the specified start (exclusive). - async fn after(&self, time: GqlTimeInput) -> Self { + + async fn after(&self, #[graphql(desc = "Exclusive lower bound.")] time: GqlTimeInput) -> Self { self.update(self.nn.after(time.into_time())) } /// Shrink both the start and end of the window. - async fn shrink_window(&self, start: GqlTimeInput, end: GqlTimeInput) -> Self { + + async fn shrink_window( + &self, + #[graphql(desc = "Proposed new start (TimeInput); ignored if it would widen the window.")] + start: GqlTimeInput, + #[graphql(desc = "Proposed new end (TimeInput); ignored if it would widen the window.")] + end: GqlTimeInput, + ) -> Self { self.update(self.nn.shrink_window(start.into_time(), end.into_time())) } /// Set the start of the window to the larger of the specified start and self.start(). - async fn shrink_start(&self, start: GqlTimeInput) -> Self { + + async fn shrink_start( + &self, + #[graphql(desc = "Proposed new start (TimeInput); ignored if it would widen the window.")] + start: GqlTimeInput, + ) -> Self { self.update(self.nn.shrink_start(start.into_time())) } /// Set the end of the window to the smaller of the specified end and self.end(). - async fn shrink_end(&self, end: GqlTimeInput) -> Self { + + async fn shrink_end( + &self, + #[graphql(desc = "Proposed new end (TimeInput); ignored if it would widen the window.")] + end: GqlTimeInput, + ) -> Self { self.update(self.nn.shrink_end(end.into_time())) } - /// Filter nodes by type. - async fn type_filter(&self, node_types: Vec) -> Self { + /// Narrow this path to neighbours whose node type is in the given set. + + async fn type_filter( + &self, + #[graphql(desc = "Node types to keep.")] node_types: Vec, + ) -> Self { let self_clone = self.clone(); blocking_compute(move || self_clone.update(self_clone.nn.type_filter(&node_types))).await } @@ -193,6 +265,7 @@ impl GqlPathFromNode { //// List /////// ///////////////// + /// Number of neighbour nodes reachable from the source in this view. async fn count(&self) -> usize { let self_clone = self.clone(); blocking_compute(move || self_clone.nn.len()).await @@ -203,34 +276,50 @@ impl GqlPathFromNode { /// /// For example, if page(5, 2, 1) is called, a page with 5 items, offset by 11 items (2 pages of 5 + 1), /// will be returned. + async fn page( &self, - limit: usize, + ctx: &Context<'_>, + #[graphql(desc = "Maximum number of items to return on this page.")] limit: usize, + #[graphql(desc = "Extra items to skip on top of `pageIndex` paging (default 0).")] offset: Option, + #[graphql( + desc = "Zero-based page number; multiplies `limit` to determine where to start (default 0)." + )] page_index: Option, - ) -> Vec { + ) -> async_graphql::Result> { + check_page_limit(ctx, limit)?; let self_clone = self.clone(); - blocking_compute(move || { + Ok(blocking_compute(move || { let start = page_index.unwrap_or(0) * limit + offset.unwrap_or(0); self_clone.iter().skip(start).take(limit).collect() }) - .await + .await) } - async fn list(&self) -> Vec { + /// Materialise every neighbour node in the path. Rejected by the server when + /// bulk list endpoints are disabled; use `page` for paginated access instead. + async fn list(&self, ctx: &Context<'_>) -> async_graphql::Result> { + check_list_allowed(ctx)?; let self_clone = self.clone(); - blocking_compute(move || self_clone.iter().collect()).await + Ok(blocking_compute(move || self_clone.iter().collect()).await) } - /// Returns the node ids. - async fn ids(&self) -> Vec { + /// Every neighbour node's id (name) as a flat list of strings. Rejected by the + /// server when bulk list endpoints are disabled. + async fn ids(&self, ctx: &Context<'_>) -> async_graphql::Result> { + check_list_allowed(ctx)?; let self_clone = self.clone(); - blocking_compute(move || self_clone.nn.name().collect()).await + Ok(blocking_compute(move || self_clone.nn.name().collect()).await) } /// Takes a specified selection of views and applies them in given order. + async fn apply_views( &self, + #[graphql( + desc = "Ordered list of view operations; each entry is a one-of variant (`window`, `layer`, `filter`, ...) applied to the running result." + )] views: Vec, ) -> Result { let mut return_view: GqlPathFromNode = self.clone(); @@ -276,8 +365,26 @@ impl GqlPathFromNode { Ok(return_view) } - /// Returns a filtered view that applies to list down the chain - async fn filter(&self, expr: GqlNodeFilter) -> Result { + /// Narrow the neighbour set to nodes matching `expr`. The filter sticks to + /// the returned path — every subsequent traversal (further hops, edges, + /// properties) continues to see the filtered scope. + /// + /// Useful when you want one scoping rule to apply across the whole query. + /// E.g. restricting the whole traversal to a specific week: + /// + /// ```text + /// node(name: "A") { neighbours { filter(expr: {window: {...week...}}) { + /// list { neighbours { list { name } } } # further hops still windowed + /// } } } + /// ``` + /// + /// Contrast with `select`, which applies here and is not carried through. + + async fn filter( + &self, + #[graphql(desc = "Composite node filter (by name, property, type, etc.).")] + expr: GqlNodeFilter, + ) -> Result { let self_clone = self.clone(); blocking_compute(move || { let filter: CompositeNodeFilter = expr.try_into()?; @@ -287,8 +394,27 @@ impl GqlPathFromNode { .await } - /// Returns filtered list of neighbour nodes - async fn select(&self, expr: GqlNodeFilter) -> Result { + /// Narrow the neighbour set to nodes matching `expr`, but only at this hop + /// — further traversals out of these nodes see the unfiltered graph again. + /// + /// Useful when each hop needs a different scope. E.g. neighbours active on + /// Monday, then *their* neighbours active on Tuesday: + /// + /// ```text + /// node(name: "A") { neighbours { select(expr: {window: {...monday...}}) { + /// list { neighbours { select(expr: {window: {...tuesday...}}) { + /// list { name } + /// } } } + /// } } } + /// ``` + /// + /// Contrast with `filter`, which persists the scope through subsequent ops. + + async fn select( + &self, + #[graphql(desc = "Composite node filter (by name, property, type, etc.).")] + expr: GqlNodeFilter, + ) -> Result { let self_clone = self.clone(); blocking_compute(move || { let filter: CompositeNodeFilter = expr.try_into()?; diff --git a/raphtory-graphql/src/model/graph/property.rs b/raphtory-graphql/src/model/graph/property.rs index 22ffb1b264..1153cc2876 100644 --- a/raphtory-graphql/src/model/graph/property.rs +++ b/raphtory-graphql/src/model/graph/property.rs @@ -6,6 +6,7 @@ use crate::{ rayon::blocking_compute, }; use async_graphql::{Error, Name, Value as GqlValue}; +use bigdecimal::BigDecimal; use dynamic_graphql::{ InputObject, OneOfInput, ResolvedObject, ResolvedObjectFields, Scalar, ScalarValue, }; @@ -20,20 +21,25 @@ use raphtory::{ }; use raphtory_api::core::{ entities::properties::prop::{IntoPropMap, Prop}, - storage::{arc_str::ArcStr, timeindex::EventTime}, - utils::time::IntoTime, + storage::{ + arc_str::ArcStr, + timeindex::{AsTime, EventTime}, + }, + utils::time::{IntoTime, TryIntoTime}, }; use rustc_hash::FxHashMap; +use serde::{Deserialize, Serialize}; use serde_json::Number; use std::{ collections::HashMap, convert::TryFrom, fmt, fmt::{Display, Formatter}, + str::FromStr, sync::Arc, }; -#[derive(InputObject, Clone, Debug)] +#[derive(InputObject, Clone, Debug, Serialize, Deserialize)] pub struct ObjectEntry { /// Key. pub key: String, @@ -41,7 +47,8 @@ pub struct ObjectEntry { pub value: Value, } -#[derive(OneOfInput, Clone, Debug)] +#[derive(OneOfInput, Clone, Debug, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] pub enum Value { /// 8 bit unsigned integer. U8(u8), @@ -67,6 +74,12 @@ pub enum Value { List(Vec), /// Object. Object(Vec), + /// Timezone-aware datetime. + DTime(String), + /// Naive datetime (no timezone). + NDTime(String), + /// BigDecimal number (string representation, e.g. "3.14159" or "123e-5"). + Decimal(String), } impl Display for Value { @@ -93,6 +106,9 @@ impl Display for Value { .join(", "); write!(f, "Object({{{}}})", inner) } + Value::DTime(v) => write!(f, "DTime({})", v), + Value::NDTime(v) => write!(f, "NDTime({})", v), + Value::Decimal(v) => write!(f, "Decimal({})", v), } } } @@ -131,6 +147,24 @@ fn value_to_prop(value: Value) -> Result { .collect::, _>>()?; Ok(Prop::Map(Arc::new(prop_map))) } + Value::DTime(s) => { + let t = s.try_into_time().map_err(GraphError::from)?; + t.dt().map(|dt| Prop::DTime(dt)).map_err(GraphError::from) + } + Value::NDTime(s) => { + let t = s.try_into_time().map_err(GraphError::from)?; + t.dt() + .map(|dt| Prop::NDTime(dt.naive_utc())) + .map_err(GraphError::from) + } + Value::Decimal(s) => { + let bd = BigDecimal::from_str(&s).map_err(|e| GraphError::InvalidProperty { + reason: format!("Invalid Decimal: {e}"), + })?; + Prop::try_from_bd(bd).map_err(|e| GraphError::InvalidProperty { + reason: format!("Decimal too large: {e}"), + }) + } } } @@ -192,7 +226,7 @@ fn prop_to_gql(prop: &Prop) -> GqlValue { .map(|number| GqlValue::Number(number)) .unwrap_or(GqlValue::Null), Prop::Bool(b) => GqlValue::Boolean(*b), - Prop::List(l) => GqlValue::List(l.iter().map(|pp| prop_to_gql(pp)).collect()), + Prop::List(l) => GqlValue::List(l.iter().map(|pp| prop_to_gql(&pp)).collect()), Prop::Map(m) => GqlValue::Object( m.iter() .map(|(k, v)| (Name::new(k.to_string()), prop_to_gql(v))) @@ -200,11 +234,13 @@ fn prop_to_gql(prop: &Prop) -> GqlValue { ), Prop::DTime(t) => GqlValue::Number(t.timestamp_millis().into()), Prop::NDTime(t) => GqlValue::Number(t.and_utc().timestamp_millis().into()), - Prop::Array(a) => GqlValue::List(a.iter_prop().map(|p| prop_to_gql(&p)).collect()), Prop::Decimal(d) => GqlValue::String(d.to_string()), } } +/// A single `(key, value)` property reading at a point in the graph view. +/// The value is exposed both as a typed scalar (`value`) and as a +/// human-readable string (`asString`). #[derive(Clone, ResolvedObject)] #[graphql(name = "Property")] pub(crate) struct GqlProperty { @@ -226,19 +262,28 @@ impl From<(String, Prop)> for GqlProperty { #[ResolvedObjectFields] impl GqlProperty { + /// The property key (name). async fn key(&self) -> String { self.key.clone() } + /// The property value rendered as a human-readable string (e.g. `"10"`, `"hello"`, + /// `"2024-01-01T00:00:00Z"`). For programmatic access use `value`, which returns + /// a typed scalar. async fn as_string(&self) -> String { self.prop.to_string() } + /// The property value as a typed `PropertyOutput` scalar — numbers come back as + /// numbers, booleans as booleans, strings as strings, etc. async fn value(&self) -> GqlPropertyOutputVal { GqlPropertyOutputVal(self.prop.clone()) } } +/// A `(time, value)` pair — the output type of temporal-property accessors +/// that need to report *when* a value was observed (e.g. `min`, `max`, +/// `median`, `orderedDedupe`). #[derive(ResolvedObject, Clone)] #[graphql(name = "PropertyTuple")] pub(crate) struct GqlPropertyTuple { @@ -260,20 +305,29 @@ impl From<(EventTime, Prop)> for GqlPropertyTuple { #[ResolvedObjectFields] impl GqlPropertyTuple { + /// The timestamp at which this value was recorded. async fn time(&self) -> GqlEventTime { self.time.into() } + /// The value rendered as a human-readable string. For programmatic access use + /// `value`, which returns a typed scalar. async fn as_string(&self) -> String { let self_clone = self.clone(); blocking_compute(move || self_clone.prop.to_string()).await } + /// The value as a typed `PropertyOutput` scalar — numbers come back as numbers, + /// booleans as booleans, etc. async fn value(&self) -> GqlPropertyOutputVal { GqlPropertyOutputVal(self.prop.clone()) } } +/// The full timeline of a single property key on one entity. Exposes every +/// update (via `values` / `history` / `orderedDedupe`), point lookups (`at`, +/// `latest`), and aggregates over the timeline (`sum`, `mean`, `min`, `max`, +/// `median`, `count`). #[derive(ResolvedObject, Clone)] #[graphql(name = "TemporalProperty")] pub(crate) struct GqlTemporalProperty { @@ -295,46 +349,71 @@ impl From<(String, TemporalPropertyView)> for GqlTemporalProperty { #[ResolvedObjectFields] impl GqlTemporalProperty { - /// Key of a property. + /// The property key (name). async fn key(&self) -> String { self.key.clone() } + /// Event history for this property — one entry per temporal update, in + /// insertion order. Use this to navigate the full timeline: access the + /// raw `timestamps` / `datetimes` / `eventId` lists, analyse gaps between + /// updates via `intervals` (mean/median/min/max), ask `isEmpty`, or + /// paginate the events. async fn history(&self) -> GqlHistory { let self_clone = self.clone(); blocking_compute(move || self_clone.prop.history().into()).await } - /// Return the values of the properties. - async fn values(&self) -> Vec { + /// All values this property has ever taken, in temporal order (one per update). + /// Typed as `PropertyOutput` so numeric values stay numeric. + async fn values(&self) -> Vec { let self_clone = self.clone(); - blocking_compute(move || self_clone.prop.values().map(|x| x.to_string()).collect()).await + blocking_compute(move || self_clone.prop.values().map(GqlPropertyOutputVal).collect()).await } - async fn at(&self, t: GqlTimeInput) -> Option { + /// The value at or before time `t` (latest update on or before `t`). Returns null + /// if no update exists on or before `t`. + + async fn at( + &self, + #[graphql( + desc = "A TimeInput (epoch millis integer, RFC3339 string, or `{timestamp, eventId}` object)." + )] + t: GqlTimeInput, + ) -> Option { let self_clone = self.clone(); - blocking_compute(move || self_clone.prop.at(t.into_time()).map(|x| x.to_string())).await + blocking_compute(move || self_clone.prop.at(t.into_time()).map(GqlPropertyOutputVal)).await } - async fn latest(&self) -> Option { + /// The most recent value, or null if the property has never been set in this view. + async fn latest(&self) -> Option { let self_clone = self.clone(); - blocking_compute(move || self_clone.prop.latest().map(|x| x.to_string())).await + blocking_compute(move || self_clone.prop.latest().map(GqlPropertyOutputVal)).await } - async fn unique(&self) -> Vec { + /// The set of distinct values this property has ever taken (order not guaranteed). + async fn unique(&self) -> Vec { let self_clone = self.clone(); blocking_compute(move || { self_clone .prop .unique() .into_iter() - .map(|x| x.to_string()) + .map(GqlPropertyOutputVal) .collect_vec() }) .await } - async fn ordered_dedupe(&self, latest_time: bool) -> Vec { + /// Collapses runs of consecutive-equal updates into a single `(time, value)` pair. + + async fn ordered_dedupe( + &self, + #[graphql( + desc = "If true, each run is represented by its *last* timestamp; if false, by its *first*. Useful for compressing chatter in a timeline." + )] + latest_time: bool, + ) -> Vec { let self_clone = self.clone(); blocking_compute(move || { self_clone @@ -346,8 +425,57 @@ impl GqlTemporalProperty { }) .await } + + /// Sum of all updates. Returns null if the dtype is not additive or the property is empty. + async fn sum(&self) -> Option { + let self_clone = self.clone(); + blocking_compute(move || self_clone.prop.sum().map(GqlPropertyOutputVal)).await + } + + /// Mean of all updates as an F64. Returns null if any value is non-numeric or the property is + /// empty. + async fn mean(&self) -> Option { + let self_clone = self.clone(); + blocking_compute(move || self_clone.prop.mean().map(GqlPropertyOutputVal)).await + } + + /// Alias for `mean` — same F64 average, same null cases. + async fn average(&self) -> Option { + let self_clone = self.clone(); + blocking_compute(move || self_clone.prop.average().map(GqlPropertyOutputVal)).await + } + + /// Minimum `(time, value)` pair. Returns null if the dtype is not comparable or the property is + /// empty. + async fn min(&self) -> Option { + let self_clone = self.clone(); + blocking_compute(move || self_clone.prop.min().map(GqlPropertyTuple::from)).await + } + + /// Maximum `(time, value)` pair. Returns null if the dtype is not comparable or the property is + /// empty. + async fn max(&self) -> Option { + let self_clone = self.clone(); + blocking_compute(move || self_clone.prop.max().map(GqlPropertyTuple::from)).await + } + + /// Median `(time, value)` pair (lower median on even-length inputs). Returns null if the dtype + /// is not comparable or the property is empty. + async fn median(&self) -> Option { + let self_clone = self.clone(); + blocking_compute(move || self_clone.prop.median().map(GqlPropertyTuple::from)).await + } + + /// Number of updates recorded for this property in the current view. + async fn count(&self) -> usize { + let self_clone = self.clone(); + blocking_compute(move || self_clone.prop.count()).await + } } +/// All temporal properties of an entity (metadata is exposed separately). +/// Look up individual properties via `get` / `contains`, enumerate via +/// `keys` / `values`, or drop into `temporal` for time-aware accessors. #[derive(ResolvedObject, Clone)] #[graphql(name = "Properties")] pub(crate) struct GqlProperties { @@ -369,6 +497,9 @@ impl> From

for GqlProperties { } } +/// The temporal-only view of an entity's properties. Each entry is a +/// `TemporalProperty` carrying the full timeline for that key — use this when +/// you need per-update iteration, time-indexed lookups, or aggregates. #[derive(ResolvedObject, Clone)] #[graphql(name = "TemporalProperties")] pub(crate) struct GqlTemporalProperties { @@ -387,6 +518,10 @@ impl From for GqlTemporalProperties { } } +/// Constant key/value metadata attached to an entity (node, edge, or graph). +/// Metadata has no timeline — each key maps to exactly one value for the +/// lifetime of the entity. Separate from `Properties`, which carries +/// time-varying data. #[derive(ResolvedObject, Clone)] #[graphql(name = "Metadata")] pub(crate) struct GqlMetadata { @@ -407,19 +542,29 @@ impl> From

for GqlMetadata { #[ResolvedObjectFields] impl GqlProperties { - /// Get property value matching the specified key. - async fn get(&self, key: String) -> Option { + /// Look up a single property by key. Returns null if no property with that key + /// exists in the current view. + + async fn get( + &self, + #[graphql(desc = "The property name.")] key: String, + ) -> Option { self.props .get(key.as_str()) .map(|p| (key.to_string(), p).into()) } - /// Check if the key is in the properties. - async fn contains(&self, key: String) -> bool { + /// Returns true if a property with the given key exists in this view. + + async fn contains( + &self, + #[graphql(desc = "The property name to look up.")] key: String, + ) -> bool { self.props.get(&key).is_some() } - /// Return all property keys. + /// All property keys present in the current view. Does not include metadata + /// — metadata is exposed separately via the entity's `metadata` field. async fn keys(&self) -> Vec { let self_clone = self.clone(); blocking_compute(move || { @@ -432,8 +577,15 @@ impl GqlProperties { .await } - /// Return all property values. - async fn values(&self, keys: Option>) -> Vec { + /// Snapshot of property values, one `{key, value}` entry per property. + + async fn values( + &self, + #[graphql( + desc = "Optional whitelist. If provided, only properties with these keys are returned; if omitted or null, every property in the view is returned." + )] + keys: Option>, + ) -> Vec { let self_clone = self.clone(); blocking_compute(move || match keys { Some(keys) => self_clone @@ -457,6 +609,8 @@ impl GqlProperties { .await } + /// The temporal-only view of these properties — excludes metadata (which has no + /// history) and lets you drill into per-key timelines and aggregates. async fn temporal(&self) -> GqlTemporalProperties { self.props.temporal().into() } @@ -464,26 +618,42 @@ impl GqlProperties { #[ResolvedObjectFields] impl GqlMetadata { - /// Get metadata value matching the specified key. - async fn get(&self, key: String) -> Option { + /// Look up a single metadata value by key. Returns null if no metadata with that + /// key exists. + + async fn get( + &self, + #[graphql(desc = "The metadata name.")] key: String, + ) -> Option { self.props .get(key.as_str()) .map(|p| (key.to_string(), p).into()) } - /// /// Check if the key is in the metadata. - async fn contains(&self, key: String) -> bool { + /// Returns true if a metadata entry with the given key exists. + + async fn contains( + &self, + #[graphql(desc = "The metadata name to look up.")] key: String, + ) -> bool { self.props.contains(key.as_str()) } - /// Return all metadata keys. + /// All metadata keys present on this entity. async fn keys(&self) -> Vec { let self_clone = self.clone(); blocking_compute(move || self_clone.props.keys().map(|k| k.clone().into()).collect()).await } - /// /// Return all metadata values. - pub(crate) async fn values(&self, keys: Option>) -> Vec { + /// All metadata values as `{key, value}` entries. + + pub(crate) async fn values( + &self, + #[graphql( + desc = "Optional whitelist. If provided, only metadata with these keys is returned; if omitted, every metadata entry is returned." + )] + keys: Option>, + ) -> Vec { let self_clone = self.clone(); blocking_compute(move || match keys { Some(keys) => self_clone @@ -510,17 +680,26 @@ impl GqlMetadata { #[ResolvedObjectFields] impl GqlTemporalProperties { - /// Get property value matching the specified key. - async fn get(&self, key: String) -> Option { + /// Look up a single temporal property by key. Returns null if there's no temporal + /// property with that key. + + async fn get( + &self, + #[graphql(desc = "The property name.")] key: String, + ) -> Option { self.props.get(key.as_str()).map(move |p| (key, p).into()) } - /// Check if the key is in the properties. - async fn contains(&self, key: String) -> bool { + /// Returns true if a temporal property with the given key exists. + + async fn contains( + &self, + #[graphql(desc = "The property name to look up.")] key: String, + ) -> bool { self.props.get(&key).is_some() } - /// Return all property keys. + /// All temporal-property keys present in this view. async fn keys(&self) -> Vec { let self_clone = self.clone(); blocking_compute(move || { @@ -533,8 +712,16 @@ impl GqlTemporalProperties { .await } - /// Return all property values. - async fn values(&self, keys: Option>) -> Vec { + /// All temporal properties, each as a `TemporalProperty` with its full timeline + /// available. Use `history`, `values`, `latest`, `at`, etc. on each entry. + + async fn values( + &self, + #[graphql( + desc = "Optional whitelist. If provided, only temporal properties with these keys are returned; if omitted, every temporal property in the view is returned." + )] + keys: Option>, + ) -> Vec { let self_clone = self.clone(); blocking_compute(move || match keys { Some(keys) => self_clone diff --git a/raphtory-graphql/src/model/graph/timeindex.rs b/raphtory-graphql/src/model/graph/timeindex.rs index 42cc7e713a..fd01768ca9 100644 --- a/raphtory-graphql/src/model/graph/timeindex.rs +++ b/raphtory-graphql/src/model/graph/timeindex.rs @@ -3,16 +3,22 @@ use chrono::format::{Item, StrftimeItems}; use dynamic_graphql::{ResolvedObject, ResolvedObjectFields, Scalar, ScalarValue}; use raphtory_api::core::{ storage::timeindex::{AsTime, EventTime}, - utils::time::{IntoTime, TryIntoTime}, + utils::time::{InputTime, IntoTime, TryIntoTime}, }; +use serde::{Deserialize, Serialize}; /// Input for primary time component. Expects Int, DateTime formatted String, or Object { timestamp, eventId } /// where the timestamp is either an Int or a DateTime formatted String, and eventId is a non-negative Int. /// Valid string formats are RFC3339, RFC2822, %Y-%m-%d, %Y-%m-%dT%H:%M:%S%.3f, %Y-%m-%dT%H:%M:%S%, /// %Y-%m-%d %H:%M:%S%.3f and %Y-%m-%d %H:%M:%S%. -#[derive(Scalar, Clone, Debug)] +/// +/// Internally wraps `InputTime` so write paths (`addNode`, `addEdge`, +/// `addProperties`, etc.) can preserve auto-increment of `event_id` when only +/// a timestamp is given. Pass the object form `{timestamp, eventId}` to lock +/// the event_id explicitly. +#[derive(Scalar, Clone, Debug, Serialize, Deserialize)] #[graphql(name = "TimeInput")] -pub struct GqlTimeInput(pub EventTime); +pub struct GqlTimeInput(pub InputTime); impl ScalarValue for GqlTimeInput { fn from_value(value: GqlValue) -> Result { @@ -22,11 +28,11 @@ impl ScalarValue for GqlTimeInput { .ok_or(Error::new( "Expected Int, DateTime formatted String, or Object { timestamp, eventId }.", )) - .map(|timestamp| GqlTimeInput(EventTime::start(timestamp))), + .map(|timestamp| GqlTimeInput(InputTime::Simple(timestamp))), GqlValue::String(dt) => dt .try_into_time() - .map(|t| GqlTimeInput(t.set_event_id(0))) + .map(|t| GqlTimeInput(InputTime::Simple(t.t()))) .map_err(|e| Error::new(e.to_string())), // TimeInput: Object { timestamp: Number | String, eventId: Number } @@ -61,7 +67,7 @@ impl ScalarValue for GqlTimeInput { _ => return Err(Error::new("eventId must be a non-negative Int.")), }; - Ok(GqlTimeInput(EventTime::new(ts, idx))) + Ok(GqlTimeInput(InputTime::Indexed(ts, idx))) } _ => Err(Error::new( "Expected Int, DateTime formatted String, or Object { timestamp, eventId }.", @@ -70,28 +76,48 @@ impl ScalarValue for GqlTimeInput { } fn to_value(&self) -> GqlValue { - self.0.t().into() + self.t().into() } } impl From for GqlTimeInput { fn from(value: i64) -> Self { - GqlTimeInput(EventTime::start(value)) + GqlTimeInput(InputTime::Simple(value)) + } +} + +impl GqlTimeInput { + /// Extract just the timestamp (for read-side query args like `window`, + /// `at`, `before`, `after`). Auto-increment semantics aren't relevant + /// when only reading. + pub fn t(&self) -> i64 { + match &self.0 { + InputTime::Simple(t) => *t, + InputTime::Indexed(t, _) => *t, + } + } + + /// Pass the underlying `InputTime` straight through to write paths so + /// `Simple` causes the graph to allocate a fresh `event_id` and + /// `Indexed` locks one explicitly. + pub fn into_input_time(self) -> InputTime { + self.0 } } impl IntoTime for GqlTimeInput { + /// Build an `EventTime`. For read-side use only — write paths should call + /// `into_input_time` instead so auto-increment of `event_id` works. fn into_time(self) -> EventTime { - self.0 + match self.0 { + InputTime::Simple(t) => EventTime::start(t), + InputTime::Indexed(t, e) => EventTime::new(t, e), + } } } pub fn dt_format_str_is_valid(fmt_str: &str) -> bool { - if StrftimeItems::new(fmt_str).any(|it| matches!(it, Item::Error)) { - false - } else { - true - } + !StrftimeItems::new(fmt_str).any(|it| matches!(it, Item::Error)) } /// Raphtory’s EventTime. @@ -127,7 +153,14 @@ impl GqlEventTime { /// Defaults to RFC 3339 if not provided (e.g., "2023-12-25T10:30:45.123Z"). /// Refer to chrono::format::strftime for formatting specifiers and escape sequences. /// Raises an error if a time conversion fails. - async fn datetime(&self, format_string: Option) -> Result, Error> { + + async fn datetime( + &self, + #[graphql( + desc = "Optional format string for the rendered datetime. Uses `%`-style specifiers — for example `%Y-%m-%d` for `2024-01-15`, `%Y-%m-%d %H:%M:%S` for `2024-01-15 10:30:00`, or `%H:%M` for `10:30`. Defaults to RFC 3339 (e.g. `2024-01-15T10:30:45.123+00:00`) when omitted." + )] + format_string: Option, + ) -> Result, Error> { let fmt_string = format_string.as_deref().unwrap_or("%+"); // %+ is RFC 3339 if dt_format_str_is_valid(fmt_string) { self.inner diff --git a/raphtory-graphql/src/model/graph/vector_selection.rs b/raphtory-graphql/src/model/graph/vector_selection.rs index 9560d43cff..58e136a75b 100644 --- a/raphtory-graphql/src/model/graph/vector_selection.rs +++ b/raphtory-graphql/src/model/graph/vector_selection.rs @@ -2,6 +2,7 @@ use super::{ document::GqlDocument, edge::GqlEdge, node::GqlNode, + node_id::GqlNodeId, vectorised_graph::{IntoWindowTuple, VectorisedGraphWindow}, }; use crate::rayon::blocking_compute; @@ -14,12 +15,16 @@ use raphtory::{ #[derive(InputObject)] pub(super) struct InputEdge { - /// Source node. - src: String, - /// Destination node. - dst: String, + /// Source node id (string or non-negative integer). + src: GqlNodeId, + /// Destination node id (string or non-negative integer). + dst: GqlNodeId, } +/// A working set of documents / nodes / edges built up via similarity +/// searches on a `VectorisedGraph`. Selections are mutable: you can grow +/// them with more hops (`expand*`), dereference the contents (`nodes`, +/// `edges`, `getDocuments`), or start fresh with `emptySelection`. #[derive(ResolvedObject)] #[graphql(name = "VectorSelection")] pub(crate) struct GqlVectorSelection(VectorSelection); @@ -56,29 +61,45 @@ impl GqlVectorSelection { .collect()) } - /// Adds all the documents associated with the specified nodes to the current selection. - /// - /// Documents added by this call are assumed to have a score of 0. - async fn add_nodes(&self, nodes: Vec) -> Self { + /// Add every document associated with the named nodes to the selection. + /// Documents added this way receive a score of 0 (no similarity ranking). + + async fn add_nodes( + &self, + #[graphql(desc = "Node ids whose documents to include.")] nodes: Vec, + ) -> Self { let mut selection = self.cloned(); selection.add_nodes(nodes); selection.into() } - /// Adds all the documents associated with the specified edges to the current selection. - /// - /// Documents added by this call are assumed to have a score of 0. - async fn add_edges(&self, edges: Vec) -> Self { + /// Add every document associated with the named edges to the selection. + /// Documents added this way receive a score of 0 (no similarity ranking). + + async fn add_edges( + &self, + #[graphql(desc = "List of `{src, dst}` pairs identifying the edges.")] edges: Vec< + InputEdge, + >, + ) -> Self { let mut selection = self.cloned(); let edges = edges.into_iter().map(|edge| (edge.src, edge.dst)).collect(); selection.add_edges(edges); selection.into() } - /// Add all the documents a specified number of hops away to the selection. - /// - /// Two documents A and B are considered to be 1 hop away of each other if they are on the same entity or if they are on the same node and edge pair. - async fn expand(&self, hops: usize, window: Option) -> Self { + /// Grow the selection by including documents that are within `hops` of any + /// document already in the selection. Two documents are 1 hop apart if + /// they're on the same entity or on a connected node/edge pair. + + async fn expand( + &self, + #[graphql(desc = "Number of expansion rounds (1 = direct neighbours).")] hops: usize, + #[graphql( + desc = "Optional `{start, end}` to restrict expansion to entities active in that interval." + )] + window: Option, + ) -> Self { let window = window.into_window_tuple(); let mut selection = self.cloned(); blocking_compute(move || { @@ -88,11 +109,18 @@ impl GqlVectorSelection { .await } - /// Adds documents, from the set of one hop neighbours to the current selection, to the selection based on their similarity score with the specified query. This function loops so that the set of one hop neighbours expands on each loop and number of documents added is determined by the specified limit. + /// Iteratively expand the selection by similarity to a natural-language + /// query. Each pass takes the one-hop neighbour set of the current + /// selection and adds the highest-scoring entities (mixed nodes and + /// edges); the loop continues until `limit` entities have been added. + async fn expand_entities_by_similarity( &self, - query: String, - limit: usize, + #[graphql(desc = "Natural-language search string; embedded by the server.")] query: String, + #[graphql(desc = "Total number of entities to add across all passes.")] limit: usize, + #[graphql( + desc = "Optional `{start, end}` to restrict matches to entities active in that interval." + )] window: Option, ) -> GraphResult { let vector = self.embed_text(query).await?; @@ -104,11 +132,16 @@ impl GqlVectorSelection { Ok(selection.into()) } - /// Add the adjacent nodes with higher score for query to the selection up to a specified limit. This function loops like expand_entities_by_similarity but is restricted to nodes. + /// Like `expandEntitiesBySimilarity` but restricted to nodes — iteratively + /// add the highest-scoring adjacent nodes to the selection. + async fn expand_nodes_by_similarity( &self, - query: String, - limit: usize, + #[graphql(desc = "Natural-language search string; embedded by the server.")] query: String, + #[graphql(desc = "Total number of nodes to add across all passes.")] limit: usize, + #[graphql( + desc = "Optional `{start, end}` to restrict matches to nodes active in that interval." + )] window: Option, ) -> GraphResult { let vector = self.embed_text(query).await?; @@ -120,11 +153,16 @@ impl GqlVectorSelection { Ok(selection.into()) } - /// Add the adjacent edges with higher score for query to the selection up to a specified limit. This function loops like expand_entities_by_similarity but is restricted to edges. + /// Like `expandEntitiesBySimilarity` but restricted to edges — iteratively + /// add the highest-scoring adjacent edges to the selection. + async fn expand_edges_by_similarity( &self, - query: String, - limit: usize, + #[graphql(desc = "Natural-language search string; embedded by the server.")] query: String, + #[graphql(desc = "Total number of edges to add across all passes.")] limit: usize, + #[graphql( + desc = "Optional `{start, end}` to restrict matches to edges active in that interval." + )] window: Option, ) -> GraphResult { let vector = self.embed_text(query).await?; diff --git a/raphtory-graphql/src/model/graph/vectorised_graph.rs b/raphtory-graphql/src/model/graph/vectorised_graph.rs index 23e4479588..eb455b13f9 100644 --- a/raphtory-graphql/src/model/graph/vectorised_graph.rs +++ b/raphtory-graphql/src/model/graph/vectorised_graph.rs @@ -1,4 +1,4 @@ -use crate::rayon::blocking_compute; +use crate::{model::graph::timeindex::GqlTimeInput, rayon::blocking_compute}; use super::vector_selection::GqlVectorSelection; use dynamic_graphql::{InputObject, ResolvedObject, ResolvedObjectFields}; @@ -6,13 +6,14 @@ use raphtory::{ db::api::view::MaterializedGraph, errors::GraphResult, vectors::vectorised_graph::VectorisedGraph, }; +use raphtory_api::core::{storage::timeindex::AsTime, utils::time::IntoTime}; #[derive(InputObject)] pub(super) struct VectorisedGraphWindow { - /// Start time. - start: i64, - /// End time. - end: i64, + /// Inclusive lower bound of the search window. + start: GqlTimeInput, + /// Exclusive upper bound of the search window. + end: GqlTimeInput, } pub(super) trait IntoWindowTuple { @@ -21,10 +22,14 @@ pub(super) trait IntoWindowTuple { impl IntoWindowTuple for Option { fn into_window_tuple(self) -> Option<(i64, i64)> { - self.map(|window| (window.start, window.end)) + self.map(|window| (window.start.into_time().t(), window.end.into_time().t())) } } +/// A graph with embedded vector representations for its nodes and edges. +/// Exposes similarity search over documents, nodes, and edges, plus +/// selection building (`emptySelection`) and index maintenance +/// (`optimizeIndex`). #[derive(ResolvedObject)] #[graphql(name = "VectorisedGraph")] pub(crate) struct GqlVectorisedGraph(VectorisedGraph); @@ -37,7 +42,9 @@ impl From> for GqlVectorisedGraph { #[ResolvedObjectFields] impl GqlVectorisedGraph { - /// Optmize the vector index + /// Rebuild (or incrementally update) the on-disk vector indexes for nodes + /// and edges so subsequent similarity searches hit the fresh embeddings. + /// Safe to call repeatedly; returns true on success. async fn optimize_index(&self) -> GraphResult { self.0.optimize_index().await?; Ok(true) @@ -48,11 +55,17 @@ impl GqlVectorisedGraph { self.0.empty_selection().into() } - /// Search the top scoring entities according to a specified query returning no more than a specified limit of entities. + /// Find the highest-scoring nodes *and* edges (mixed) by similarity to a + /// natural-language query. The query is embedded server-side and matched + /// against indexed entity vectors. + async fn entities_by_similarity( &self, - query: String, - limit: usize, + #[graphql(desc = "Natural-language search string; embedded by the server.")] query: String, + #[graphql(desc = "Maximum number of results to return.")] limit: usize, + #[graphql( + desc = "Optional `{start, end}` to restrict matches to entities active in that interval." + )] window: Option, ) -> GraphResult { let vector = self.0.embed_text(query).await?; @@ -63,11 +76,17 @@ impl GqlVectorisedGraph { Ok(query.execute().await?.into()) } - /// Search the top scoring nodes according to a specified query returning no more than a specified limit of nodes. + /// Find the highest-scoring nodes by similarity to a natural-language + /// query. The query is embedded server-side and matched against indexed + /// node vectors. + async fn nodes_by_similarity( &self, - query: String, - limit: usize, + #[graphql(desc = "Natural-language search string; embedded by the server.")] query: String, + #[graphql(desc = "Maximum number of nodes to return.")] limit: usize, + #[graphql( + desc = "Optional `{start, end}` to restrict matches to nodes active in that interval." + )] window: Option, ) -> GraphResult { let vector = self.0.embed_text(query).await?; @@ -77,11 +96,17 @@ impl GqlVectorisedGraph { Ok(query.execute().await?.into()) } - /// Search the top scoring edges according to a specified query returning no more than a specified limit of edges. + /// Find the highest-scoring edges by similarity to a natural-language + /// query. The query is embedded server-side and matched against indexed + /// edge vectors. + async fn edges_by_similarity( &self, - query: String, - limit: usize, + #[graphql(desc = "Natural-language search string; embedded by the server.")] query: String, + #[graphql(desc = "Maximum number of edges to return.")] limit: usize, + #[graphql( + desc = "Optional `{start, end}` to restrict matches to edges active in that interval." + )] window: Option, ) -> GraphResult { let vector = self.0.embed_text(query).await?; diff --git a/raphtory-graphql/src/model/graph/windowset.rs b/raphtory-graphql/src/model/graph/windowset.rs index 32b1501029..e118986553 100644 --- a/raphtory-graphql/src/model/graph/windowset.rs +++ b/raphtory-graphql/src/model/graph/windowset.rs @@ -1,11 +1,17 @@ use crate::{ model::graph::{ - edge::GqlEdge, edges::GqlEdges, graph::GqlGraph, node::GqlNode, nodes::GqlNodes, + collection::{check_list_allowed, check_page_limit}, + edge::GqlEdge, + edges::GqlEdges, + graph::GqlGraph, + node::GqlNode, + nodes::GqlNodes, path_from_node::GqlPathFromNode, }, paths::ExistingGraphFolder, rayon::blocking_compute, }; +use async_graphql::Context; use dynamic_graphql::{ResolvedObject, ResolvedObjectFields}; use raphtory::db::{ api::{ @@ -15,6 +21,10 @@ use raphtory::db::{ graph::{edge::EdgeView, edges::Edges, node::NodeView, nodes::Nodes, path::PathFromNode}, }; +/// A lazy sequence of graph snapshots produced by `rolling` or `expanding`. +/// Each entry is a `Graph` at a different window over time. Iterate via +/// `list` / `page` (or count with `count`). Subsequent view ops apply +/// per-window. #[derive(ResolvedObject, Clone)] #[graphql(name = "GraphWindowSet")] pub(crate) struct GqlGraphWindowSet { @@ -29,7 +39,8 @@ impl GqlGraphWindowSet { } #[ResolvedObjectFields] impl GqlGraphWindowSet { - /// Returns the number of items. + /// Number of windows in this set. Materialising all windows is expensive for + /// large graphs — prefer `page` over `list` when iterating. async fn count(&self) -> usize { let self_clone = self.clone(); blocking_compute(move || self_clone.ws.clone().count()).await @@ -40,14 +51,21 @@ impl GqlGraphWindowSet { /// /// For example, if page(5, 2, 1) is called, a page with 5 items, offset by 11 items (2 pages of 5 + 1), /// will be returned. + async fn page( &self, - limit: usize, + ctx: &Context<'_>, + #[graphql(desc = "Maximum number of items to return on this page.")] limit: usize, + #[graphql(desc = "Extra items to skip on top of `pageIndex` paging (default 0).")] offset: Option, + #[graphql( + desc = "Zero-based page number; multiplies `limit` to determine where to start (default 0)." + )] page_index: Option, - ) -> Vec { + ) -> async_graphql::Result> { + check_page_limit(ctx, limit)?; let self_clone = self.clone(); - blocking_compute(move || { + Ok(blocking_compute(move || { let start = page_index.unwrap_or(0) * limit + offset.unwrap_or(0); self_clone .ws @@ -57,22 +75,28 @@ impl GqlGraphWindowSet { .map(|g| GqlGraph::new(self_clone.path.clone(), g)) .collect() }) - .await + .await) } - async fn list(&self) -> Vec { + /// Materialise every window as a list. Rejected by the server when bulk list + /// endpoints are disabled; use `page` for paginated access instead. + async fn list(&self, ctx: &Context<'_>) -> async_graphql::Result> { + check_list_allowed(ctx)?; let self_clone = self.clone(); - blocking_compute(move || { + Ok(blocking_compute(move || { self_clone .ws .clone() .map(|g| GqlGraph::new(self_clone.path.clone(), g)) .collect() }) - .await + .await) } } +/// A lazy sequence of per-window views of a single node, produced by +/// `node.rolling` / `node.expanding`. Each entry is the node as it exists in +/// that window. #[derive(ResolvedObject, Clone)] #[graphql(name = "NodeWindowSet")] pub(crate) struct GqlNodeWindowSet { @@ -86,6 +110,8 @@ impl GqlNodeWindowSet { } #[ResolvedObjectFields] impl GqlNodeWindowSet { + /// Number of windows in this set. Materialising all windows is expensive for + /// large graphs — prefer `page` over `list` when iterating. async fn count(&self) -> usize { let self_clone = self.clone(); blocking_compute(move || self_clone.ws.clone().count()).await @@ -96,14 +122,21 @@ impl GqlNodeWindowSet { /// /// For example, if page(5, 2, 1) is called, a page with 5 items, offset by 11 items (2 pages of 5 + 1), /// will be returned. + async fn page( &self, - limit: usize, + ctx: &Context<'_>, + #[graphql(desc = "Maximum number of items to return on this page.")] limit: usize, + #[graphql(desc = "Extra items to skip on top of `pageIndex` paging (default 0).")] offset: Option, + #[graphql( + desc = "Zero-based page number; multiplies `limit` to determine where to start (default 0)." + )] page_index: Option, - ) -> Vec { + ) -> async_graphql::Result> { + check_page_limit(ctx, limit)?; let self_clone = self.clone(); - blocking_compute(move || { + Ok(blocking_compute(move || { let start = page_index.unwrap_or(0) * limit + offset.unwrap_or(0); self_clone .ws @@ -113,15 +146,21 @@ impl GqlNodeWindowSet { .map(|n| n.into()) .collect() }) - .await + .await) } - async fn list(&self) -> Vec { + /// Materialise every window as a list. Rejected by the server when bulk list + /// endpoints are disabled; use `page` for paginated access instead. + async fn list(&self, ctx: &Context<'_>) -> async_graphql::Result> { + check_list_allowed(ctx)?; let self_clone = self.clone(); - blocking_compute(move || self_clone.ws.clone().map(|n| n.into()).collect()).await + Ok(blocking_compute(move || self_clone.ws.clone().map(|n| n.into()).collect()).await) } } +/// A lazy sequence of per-window node collections, produced by +/// `nodes.rolling` / `nodes.expanding`. Each entry is a `Nodes` collection +/// as it exists in that window. #[derive(ResolvedObject, Clone)] #[graphql(name = "NodesWindowSet")] pub(crate) struct GqlNodesWindowSet { @@ -137,6 +176,8 @@ impl GqlNodesWindowSet { } #[ResolvedObjectFields] impl GqlNodesWindowSet { + /// Number of windows in this set. Materialising all windows is expensive for + /// large graphs — prefer `page` over `list` when iterating. async fn count(&self) -> usize { let self_clone = self.clone(); blocking_compute(move || self_clone.ws.clone().count()).await @@ -147,14 +188,21 @@ impl GqlNodesWindowSet { /// /// For example, if page(5, 2, 1) is called, a page with 5 items, offset by 11 items (2 pages of 5 + 1), /// will be returned. + async fn page( &self, - limit: usize, + ctx: &Context<'_>, + #[graphql(desc = "Maximum number of items to return on this page.")] limit: usize, + #[graphql(desc = "Extra items to skip on top of `pageIndex` paging (default 0).")] offset: Option, + #[graphql( + desc = "Zero-based page number; multiplies `limit` to determine where to start (default 0)." + )] page_index: Option, - ) -> Vec { + ) -> async_graphql::Result> { + check_page_limit(ctx, limit)?; let self_clone = self.clone(); - blocking_compute(move || { + Ok(blocking_compute(move || { let start = page_index.unwrap_or(0) * limit + offset.unwrap_or(0); self_clone .ws @@ -164,15 +212,24 @@ impl GqlNodesWindowSet { .map(|n| GqlNodes::new(n)) .collect() }) - .await + .await) } - async fn list(&self) -> Vec { + /// Materialise every window as a list. Rejected by the server when bulk list + /// endpoints are disabled; use `page` for paginated access instead. + async fn list(&self, ctx: &Context<'_>) -> async_graphql::Result> { + check_list_allowed(ctx)?; let self_clone = self.clone(); - blocking_compute(move || self_clone.ws.clone().map(|n| GqlNodes::new(n)).collect()).await + Ok( + blocking_compute(move || self_clone.ws.clone().map(|n| GqlNodes::new(n)).collect()) + .await, + ) } } +/// A lazy sequence of per-window neighbour sets, produced by +/// `neighbours.rolling` / `neighbours.expanding` (or the in/out variants). +/// Each entry is a `PathFromNode` scoped to that window. #[derive(ResolvedObject, Clone)] #[graphql(name = "PathFromNodeWindowSet")] pub(crate) struct GqlPathFromNodeWindowSet { @@ -186,6 +243,8 @@ impl GqlPathFromNodeWindowSet { } #[ResolvedObjectFields] impl GqlPathFromNodeWindowSet { + /// Number of windows in this set. Materialising all windows is expensive for + /// large graphs — prefer `page` over `list` when iterating. async fn count(&self) -> usize { let self_clone = self.clone(); blocking_compute(move || self_clone.ws.clone().count()).await @@ -196,14 +255,21 @@ impl GqlPathFromNodeWindowSet { /// /// For example, if page(5, 2, 1) is called, a page with 5 items, offset by 11 items (2 pages of 5 + 1), /// will be returned. + async fn page( &self, - limit: usize, + ctx: &Context<'_>, + #[graphql(desc = "Maximum number of items to return on this page.")] limit: usize, + #[graphql(desc = "Extra items to skip on top of `pageIndex` paging (default 0).")] offset: Option, + #[graphql( + desc = "Zero-based page number; multiplies `limit` to determine where to start (default 0)." + )] page_index: Option, - ) -> Vec { + ) -> async_graphql::Result> { + check_page_limit(ctx, limit)?; let self_clone = self.clone(); - blocking_compute(move || { + Ok(blocking_compute(move || { let start = page_index.unwrap_or(0) * limit + offset.unwrap_or(0); self_clone .ws @@ -213,22 +279,28 @@ impl GqlPathFromNodeWindowSet { .map(|n| GqlPathFromNode::new(n)) .collect() }) - .await + .await) } - async fn list(&self) -> Vec { + /// Materialise every window as a list. Rejected by the server when bulk list + /// endpoints are disabled; use `page` for paginated access instead. + async fn list(&self, ctx: &Context<'_>) -> async_graphql::Result> { + check_list_allowed(ctx)?; let self_clone = self.clone(); - blocking_compute(move || { + Ok(blocking_compute(move || { self_clone .ws .clone() .map(|n| GqlPathFromNode::new(n)) .collect() }) - .await + .await) } } +/// A lazy sequence of per-window views of a single edge, produced by +/// `edge.rolling` / `edge.expanding`. Each entry is the edge as it exists in +/// that window. #[derive(ResolvedObject, Clone)] #[graphql(name = "EdgeWindowSet")] pub(crate) struct GqlEdgeWindowSet { @@ -242,6 +314,8 @@ impl GqlEdgeWindowSet { } #[ResolvedObjectFields] impl GqlEdgeWindowSet { + /// Number of windows in this set. Materialising all windows is expensive for + /// large graphs — prefer `page` over `list` when iterating. async fn count(&self) -> usize { let self_clone = self.clone(); blocking_compute(move || self_clone.ws.clone().count()).await @@ -252,14 +326,21 @@ impl GqlEdgeWindowSet { /// /// For example, if page(5, 2, 1) is called, a page with 5 items, offset by 11 items (2 pages of 5 + 1), /// will be returned. + async fn page( &self, - limit: usize, + ctx: &Context<'_>, + #[graphql(desc = "Maximum number of items to return on this page.")] limit: usize, + #[graphql(desc = "Extra items to skip on top of `pageIndex` paging (default 0).")] offset: Option, + #[graphql( + desc = "Zero-based page number; multiplies `limit` to determine where to start (default 0)." + )] page_index: Option, - ) -> Vec { + ) -> async_graphql::Result> { + check_page_limit(ctx, limit)?; let self_clone = self.clone(); - blocking_compute(move || { + Ok(blocking_compute(move || { let start = page_index.unwrap_or(0) * limit + offset.unwrap_or(0); self_clone .ws @@ -269,15 +350,21 @@ impl GqlEdgeWindowSet { .map(|e| e.into()) .collect() }) - .await + .await) } - async fn list(&self) -> Vec { + /// Materialise every window as a list. Rejected by the server when bulk list + /// endpoints are disabled; use `page` for paginated access instead. + async fn list(&self, ctx: &Context<'_>) -> async_graphql::Result> { + check_list_allowed(ctx)?; let self_clone = self.clone(); - blocking_compute(move || self_clone.ws.clone().map(|e| e.into()).collect()).await + Ok(blocking_compute(move || self_clone.ws.clone().map(|e| e.into()).collect()).await) } } +/// A lazy sequence of per-window edge collections, produced by +/// `edges.rolling` / `edges.expanding`. Each entry is an `Edges` collection +/// as it exists in that window. #[derive(ResolvedObject, Clone)] #[graphql(name = "EdgesWindowSet")] pub(crate) struct GqlEdgesWindowSet { @@ -291,6 +378,8 @@ impl GqlEdgesWindowSet { } #[ResolvedObjectFields] impl GqlEdgesWindowSet { + /// Number of windows in this set. Materialising all windows is expensive for + /// large graphs — prefer `page` over `list` when iterating. async fn count(&self) -> usize { let self_clone = self.clone(); blocking_compute(move || self_clone.ws.clone().count()).await @@ -301,14 +390,21 @@ impl GqlEdgesWindowSet { /// /// For example, if page(5, 2, 1) is called, a page with 5 items, offset by 11 items (2 pages of 5 + 1), /// will be returned. + async fn page( &self, - limit: usize, + ctx: &Context<'_>, + #[graphql(desc = "Maximum number of items to return on this page.")] limit: usize, + #[graphql(desc = "Extra items to skip on top of `pageIndex` paging (default 0).")] offset: Option, + #[graphql( + desc = "Zero-based page number; multiplies `limit` to determine where to start (default 0)." + )] page_index: Option, - ) -> Vec { + ) -> async_graphql::Result> { + check_page_limit(ctx, limit)?; let self_clone = self.clone(); - blocking_compute(move || { + Ok(blocking_compute(move || { let start = page_index.unwrap_or(0) * limit + offset.unwrap_or(0); self_clone .ws @@ -318,11 +414,17 @@ impl GqlEdgesWindowSet { .map(|e| GqlEdges::new(e)) .collect() }) - .await + .await) } - async fn list(&self) -> Vec { + /// Materialise every window as a list. Rejected by the server when bulk list + /// endpoints are disabled; use `page` for paginated access instead. + async fn list(&self, ctx: &Context<'_>) -> async_graphql::Result> { + check_list_allowed(ctx)?; let self_clone = self.clone(); - blocking_compute(move || self_clone.ws.clone().map(|e| GqlEdges::new(e)).collect()).await + Ok( + blocking_compute(move || self_clone.ws.clone().map(|e| GqlEdges::new(e)).collect()) + .await, + ) } } diff --git a/raphtory-graphql/src/model/mod.rs b/raphtory-graphql/src/model/mod.rs index 1f23566bce..de734a771a 100644 --- a/raphtory-graphql/src/model/mod.rs +++ b/raphtory-graphql/src/model/mod.rs @@ -1,28 +1,45 @@ use crate::{ - auth::ContextValidation, + auth::{AuthError, ContextValidation}, + auth_policy::{AuthPolicyError, AuthorizationPolicy, GraphPermission, NamespacePermission}, data::Data, model::{ graph::{ - collection::GqlCollection, graph::GqlGraph, index::IndexSpecInput, - mutable_graph::GqlMutableGraph, namespace::Namespace, + collection::GqlCollection, + filtering::{GqlEdgeFilter, GqlNodeFilter, GraphAccessFilter}, + graph::GqlGraph, + index::IndexSpecInput, + meta_graph::MetaGraph, + mutable_graph::GqlMutableGraph, + namespace::Namespace, + namespaced_item::NamespacedItem, + node_id::GqlNodeId, vectorised_graph::GqlVectorisedGraph, }, - plugins::{mutation_plugin::MutationPlugin, query_plugin::QueryPlugin}, + plugins::{ + mutation_plugin::MutationPlugin, query_plugin::QueryPlugin, PermissionsEntrypointMut, + PermissionsEntrypointQuery, + }, }, - paths::{valid_path, ExistingGraphFolder}, + paths::{ExistingGraphFolder, ValidGraphPaths, ValidWriteableGraphFolder}, rayon::blocking_compute, - url_encode::{url_decode_graph, url_encode_graph}, + url_encode::{url_decode_graph_at, url_encode_graph}, }; use async_graphql::Context; use dynamic_graphql::{ App, Enum, InputObject, Mutation, MutationFields, MutationRoot, OneOfInput, ResolvedObject, ResolvedObjectFields, Result, Upload, }; +use itertools::Itertools; use raphtory::{ - db::{api::view::MaterializedGraph, graph::views::deletion_graph::PersistentGraph}, - errors::{GraphError, GraphResult, InvalidPathReason}, + db::{ + api::{ + storage::storage::{Extension, PersistenceStrategy}, + view::{DynamicGraph, Filter, IntoDynamic, MaterializedGraph}, + }, + graph::views::{deletion_graph::PersistentGraph, filter::model::NodeViewFilterOps}, + }, + errors::{GraphError, GraphResult}, prelude::*, - serialise::InternalStableDecode, vectors::{ cache::CachedEmbeddingModel, storage::OpenAIEmbeddings, @@ -30,17 +47,16 @@ use raphtory::{ }, version, }; -#[cfg(feature = "storage")] -use raphtory_storage::{core_ops::CoreGraphOps, graph::graph::GraphStorage}; use std::{ error::Error, fmt::{Display, Formatter}, - io::Read, + future::Future, + pin::Pin, sync::Arc, }; -use zip::ZipArchive; +use tracing::{error, warn}; -pub(crate) mod graph; +pub mod graph; pub mod plugins; pub(crate) mod schema; pub(crate) mod sorting; @@ -129,6 +145,246 @@ pub enum GqlGraphType { Event, } +/// Checks that the caller has at least READ permission for the graph at `path`. +/// Returns the effective `GraphPermission` (including any stored filter) on success. +/// When denied and the caller has no INTROSPECT on the parent namespace, returns a +/// "Graph does not exist" error to avoid leaking that the graph is present. +fn require_at_least_read( + ctx: &Context<'_>, + policy: &Option>, + path: &str, +) -> async_graphql::Result { + if let Some(policy) = policy { + let role = ctx.data::>().ok().and_then(|r| r.as_deref()); + match policy.graph_permissions(ctx, path) { + Err(msg) => { + let ns = parent_namespace(path); + if policy.namespace_permissions(ctx, ns) >= NamespacePermission::Introspect { + warn!( + role = role.unwrap_or(""), + graph = path, + "Access denied by auth policy" + ); + return Err(msg.into()); + } else { + // Don't leak graph existence — act as if it doesn't exist. + return Err(async_graphql::Error::new(MissingGraph.to_string())); + } + } + Ok(perm) => { + if let Some(p) = perm.at_least_read() { + return Ok(p); + } else { + warn!( + role = role.unwrap_or(""), + graph = path, + "Introspect-only access — graph() denied; use graphMetadata() instead" + ); + return Err(async_graphql::Error::new(format!( + "Access denied: role '{}' has introspect-only access to graph '{path}' — \ + use graphMetadata(path:) for counts and timestamps, or namespace listings to browse graphs", + role.unwrap_or("") + ))); + } + } + } + } + Ok(GraphPermission::Write) +} + +/// Applies a stored data filter (serialised as `serde_json::Value` with optional `node`, `edge`, +/// `graph` keys) to a `DynamicGraph`, returning a new filtered view. +fn apply_graph_filter( + mut graph: DynamicGraph, + filter: GraphAccessFilter, +) -> Pin> + Send>> { + Box::pin(async move { + use raphtory::db::graph::views::filter::model::{ + edge_filter::CompositeEdgeFilter, node_filter::CompositeNodeFilter, DynView, + }; + + match filter { + GraphAccessFilter::Node(gql_filter) => { + let raphtory_filter = CompositeNodeFilter::try_from(gql_filter).map_err(|e| { + error!(error = %e, "node filter conversion failed"); + async_graphql::Error::new("internal error applying access filter") + })?; + graph = blocking_compute({ + let g = graph.clone(); + move || g.filter(raphtory_filter) + }) + .await + .map_err(|e| { + error!(error = %e, "node filter apply failed"); + async_graphql::Error::new("internal error applying access filter") + })? + .into_dynamic(); + } + GraphAccessFilter::Edge(gql_filter) => { + let raphtory_filter = CompositeEdgeFilter::try_from(gql_filter).map_err(|e| { + error!(error = %e, "edge filter conversion failed"); + async_graphql::Error::new("internal error applying access filter") + })?; + graph = blocking_compute({ + let g = graph.clone(); + move || g.filter(raphtory_filter) + }) + .await + .map_err(|e| { + error!(error = %e, "edge filter apply failed"); + async_graphql::Error::new("internal error applying access filter") + })? + .into_dynamic(); + } + GraphAccessFilter::Graph(gql_filter) => { + let dyn_view = DynView::try_from(gql_filter).map_err(|e| { + error!(error = %e, "graph filter conversion failed"); + async_graphql::Error::new("internal error applying access filter") + })?; + graph = blocking_compute({ + let g = graph.clone(); + move || g.filter(dyn_view) + }) + .await + .map_err(|e| { + error!(error = %e, "graph filter apply failed"); + async_graphql::Error::new("internal error applying access filter") + })? + .into_dynamic(); + } + GraphAccessFilter::And(filters) => { + for f in filters { + graph = apply_graph_filter(graph, f).await?; + } + } + GraphAccessFilter::Or(filters) => { + // Group same-type sub-filters and combine with native Or; + // cross-type sub-filters are applied as independent restrictions. + let mut node_fs: Vec = vec![]; + let mut edge_fs: Vec = vec![]; + let mut rest: Vec = vec![]; + for f in filters { + match f { + GraphAccessFilter::Node(n) => node_fs.push(n), + GraphAccessFilter::Edge(e) => edge_fs.push(e), + other => rest.push(other), + } + } + if !node_fs.is_empty() { + let combined = if node_fs.len() == 1 { + node_fs.pop().unwrap() + } else { + GqlNodeFilter::Or(node_fs) + }; + graph = apply_graph_filter(graph, GraphAccessFilter::Node(combined)).await?; + } + if !edge_fs.is_empty() { + let combined = if edge_fs.len() == 1 { + edge_fs.pop().unwrap() + } else { + GqlEdgeFilter::Or(edge_fs) + }; + graph = apply_graph_filter(graph, GraphAccessFilter::Edge(combined)).await?; + } + for f in rest { + graph = apply_graph_filter(graph, f).await?; + } + } + } + + Ok(graph) + }) +} + +/// Returns the namespace portion of a graph path: everything before the last `/`. +/// For top-level graphs (no `/`), returns `""` (the root namespace). +fn parent_namespace(path: &str) -> &str { + path.rfind('/').map(|i| &path[..i]).unwrap_or("") +} + +fn write_denied(role: Option<&str>, msg: impl std::fmt::Display) -> async_graphql::Error { + match role { + Some(_) => async_graphql::Error::new(msg.to_string()), + None => AuthError::RequireWrite.into(), + } +} + +fn require_graph_write( + ctx: &Context<'_>, + policy: &Option>, + path: &str, +) -> async_graphql::Result<()> { + match policy { + None => ctx.require_jwt_write_access().map_err(Into::into), + Some(p) => { + let role = ctx.data::>().ok().and_then(|r| r.as_deref()); + p.graph_permissions(ctx, path) + .map_err(async_graphql::Error::from)? + .at_least_write() + .ok_or_else(|| { + write_denied( + role, + format!("Access denied: WRITE permission required for graph '{path}'"), + ) + })?; + Ok(()) + } + } +} + +fn require_namespace_write( + ctx: &Context<'_>, + policy: &Option>, + ns_path: &str, + new_path: &str, + operation: &str, +) -> async_graphql::Result<()> { + match policy { + None => ctx.require_jwt_write_access().map_err(Into::into), + Some(p) => { + let role = ctx.data::>().ok().and_then(|r| r.as_deref()); + if p.namespace_permissions(ctx, ns_path) < NamespacePermission::Write { + return Err(write_denied( + role, + format!("Access denied: WRITE required on namespace '{ns_path}' to {operation} graph '{new_path}'"), + )); + } + Ok(()) + } + } +} + +fn require_graph_read_src( + ctx: &Context<'_>, + policy: &Option>, + path: &str, + operation: &str, +) -> async_graphql::Result<()> { + match policy { + None => ctx.require_jwt_write_access().map_err(Into::into), + Some(p) => { + let role = ctx.data::>().ok().and_then(|r| r.as_deref()); + p.graph_permissions(ctx, path) + .map_err(async_graphql::Error::from)? + .at_least_read() + .ok_or_else(|| { + write_denied( + role, + format!( + "Access denied: READ required on source graph '{path}' to {operation}" + ), + ) + })?; + Ok(()) + } + } +} + +/// Top-level READ-only query root. Entry points for loading a graph +/// (`graph`, `graphMetadata`), browsing stored graphs (`namespaces`, +/// `namespace`, `root`), downloading a stored graph as a base64 blob +/// (`receiveGraph`), inspecting vectorised variants (`vectorisedGraph`), +/// and a few utility endpoints (`version`, `hello`, `plugins`). #[derive(ResolvedObject)] #[graphql(root)] pub(crate) struct QueryRoot; @@ -151,45 +407,137 @@ fn resolve(template: Option