diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 7a0121b..0b8028c 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -9,6 +9,18 @@ RUN groupadd --gid $USER_GID $USERNAME \ && mkdir -p /home/$USERNAME/.vscode-server /home/$USERNAME/.pixi-env \ && chown -R $USERNAME:$USERNAME /home/$USERNAME +# gcloud SDK + kubectl for batch remote targets (K8s, Cloud Run) +RUN apt-get update \ + && apt-get install -y --no-install-recommends curl \ + && rm -rf /var/lib/apt/lists/* + +COPY .devcontainer/scripts/on_build/install_gcloud.sh /tmp/install_gcloud.sh +COPY .devcontainer/scripts/on_build/install_kubectl.sh /tmp/install_kubectl.sh +RUN chmod +x /tmp/install_gcloud.sh /tmp/install_kubectl.sh \ + && /tmp/install_gcloud.sh \ + && /tmp/install_kubectl.sh \ + && rm /tmp/install_gcloud.sh /tmp/install_kubectl.sh + WORKDIR /workspaces/joshpy USER $USERNAME CMD ["bash"] \ No newline at end of file diff --git a/.devcontainer/scripts/on_build/install_gcloud.sh b/.devcontainer/scripts/on_build/install_gcloud.sh new file mode 100755 index 0000000..384d244 --- /dev/null +++ b/.devcontainer/scripts/on_build/install_gcloud.sh @@ -0,0 +1,35 @@ +#!/bin/bash +# License: BSD-3-Clause +# +# SHA256 captured 2026-04-16 from this devcontainer (Codespaces, linux/amd64). +# Source: https://dl.google.com/dl/cloudsdk/channels/rapid/downloads/google-cloud-cli-526.0.0-linux-x86_64.tar.gz +# To update: download the new version, run sha256sum, and replace both +# the version and hash below. +set -e + +GCLOUD_VERSION="526.0.0" +GCLOUD_SHA256="9d647a35c87e3d6ffe3f0c7331a81b3c7cd02b0bd1cb48b83f6acb5aca75d000" + +curl -fsSL "https://dl.google.com/dl/cloudsdk/channels/rapid/downloads/google-cloud-cli-${GCLOUD_VERSION}-linux-x86_64.tar.gz" -o /tmp/gcloud.tar.gz + +if ! echo "${GCLOUD_SHA256} /tmp/gcloud.tar.gz" | sha256sum --check --status; then + echo "ERROR: SHA256 verification failed for google-cloud-cli ${GCLOUD_VERSION}." >&2 + echo "Expected: ${GCLOUD_SHA256}" >&2 + echo "Got: $(sha256sum /tmp/gcloud.tar.gz | cut -d' ' -f1)" >&2 + echo "The upstream archive may have changed. Re-verify and update the hash." >&2 + rm /tmp/gcloud.tar.gz + exit 1 +fi + +tar -xzf /tmp/gcloud.tar.gz -C /opt +/opt/google-cloud-sdk/install.sh --quiet --usage-reporting=false --command-completion=false --path-update=false +/opt/google-cloud-sdk/bin/gcloud components install gke-gcloud-auth-plugin --quiet +rm /tmp/gcloud.tar.gz + +# Symlink to /usr/local/bin so gcloud and the auth plugin are on the system +# PATH for all processes — not just interactive shells. kubectl exec's +# gke-gcloud-auth-plugin as a subprocess, which in turn exec's gcloud; +# neither inherits shell PATH or Dockerfile ENV PATH reliably. +ln -sf /opt/google-cloud-sdk/bin/gcloud /usr/local/bin/gcloud +ln -sf /opt/google-cloud-sdk/bin/gsutil /usr/local/bin/gsutil +ln -sf /opt/google-cloud-sdk/bin/gke-gcloud-auth-plugin /usr/local/bin/gke-gcloud-auth-plugin diff --git a/.devcontainer/scripts/on_build/install_kubectl.sh b/.devcontainer/scripts/on_build/install_kubectl.sh new file mode 100755 index 0000000..19bea28 --- /dev/null +++ b/.devcontainer/scripts/on_build/install_kubectl.sh @@ -0,0 +1,25 @@ +#!/bin/bash +# License: BSD-3-Clause +# +# SHA256 captured 2026-04-16 from this devcontainer (Codespaces, linux/amd64). +# Source: https://dl.k8s.io/release/v1.31.4/bin/linux/amd64/kubectl +# To update: download the new version, run sha256sum, and replace both +# the version and hash below. +set -e + +KUBECTL_VERSION="v1.31.4" +KUBECTL_SHA256="298e19e9c6c17199011404278f0ff8168a7eca4217edad9097af577023a5620f" + +curl -fsSL "https://dl.k8s.io/release/${KUBECTL_VERSION}/bin/linux/amd64/kubectl" -o /tmp/kubectl + +if ! echo "${KUBECTL_SHA256} /tmp/kubectl" | sha256sum --check --status; then + echo "ERROR: SHA256 verification failed for kubectl ${KUBECTL_VERSION}." >&2 + echo "Expected: ${KUBECTL_SHA256}" >&2 + echo "Got: $(sha256sum /tmp/kubectl | cut -d' ' -f1)" >&2 + echo "The upstream binary may have changed. Re-verify and update the hash." >&2 + rm /tmp/kubectl + exit 1 +fi + +install -o root -g root -m 0755 /tmp/kubectl /usr/local/bin/kubectl +rm /tmp/kubectl diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index f48ea1b..e5e6407 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -10,7 +10,7 @@ permissions: contents: read jobs: - test: + unit-tests: runs-on: ubuntu-latest steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 @@ -20,5 +20,42 @@ jobs: pixi-version: latest environments: dev - - name: Run tests + - name: Run unit tests run: pixi run -e dev test + + integration-tests: + runs-on: ubuntu-latest + services: + minio: + image: bitnamilegacy/minio:latest@sha256:b3d51900e846b92f7503ca6be07d2e8c56ebb6a13a60bc71b8777c716c074bcf + ports: + - 9000:9000 + env: + MINIO_ROOT_USER: minioadmin + MINIO_ROOT_PASSWORD: minioadmin + MINIO_DEFAULT_BUCKETS: josh-test-bucket:public + MINIO_SCHEME: http + options: >- + --health-cmd "curl -f http://localhost:9000/minio/health/ready || curl -f http://localhost:9000/minio/health/live" + --health-interval 10s + --health-timeout 5s + --health-retries 5 + + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false + + - uses: prefix-dev/setup-pixi@a0af7a228712d6121d37aba47adf55c1332c9c2e # v0.9.4 + with: + pixi-version: latest + environments: dev + + - name: Verify MinIO is ready + run: curl -f http://localhost:9000/minio/health/ready + + - name: Download Josh JAR + run: pixi run get-jars + + - name: Run integration tests + run: pixi run -e dev test-integration diff --git a/BATCH_INTEGRATION.md b/BATCH_INTEGRATION.md new file mode 100644 index 0000000..00f5f16 --- /dev/null +++ b/BATCH_INTEGRATION.md @@ -0,0 +1,524 @@ +# Plan: Batch Remote Execution for joshpy + +Tracking issue: [joshpy#31](https://github.com/SchmidtDSE/joshpy/issues/31) +Umbrella PR on joshpy: [joshpy#34](https://github.com/SchmidtDSE/joshpy/pull/34) — `feat/batch-run` accumulates PRs 1–4 shipped; PR5 will land on the same branch. +Companion Java umbrella: [josh#374](https://github.com/SchmidtDSE/josh/issues/374) + +**Upstream josh dependencies (all merged):** +- [josh#409](https://github.com/SchmidtDSE/josh/pull/409) — `pollBatch` CLI (closed [josh#406](https://github.com/SchmidtDSE/josh/issues/406)) +- [josh#414](https://github.com/SchmidtDSE/josh/pull/414) — K8s batch execution system (PRs 5–9 + GKE integration) +- [josh#423](https://github.com/SchmidtDSE/josh/pull/423) — `batchRemote` flag-based UX with `.josh-staged.json` sentinel (**breaking CLI change — joshpy PR5 must adapt**) + +**Filed joshpy-facing issues (tracking):** +- [josh#416](https://github.com/SchmidtDSE/josh/issues/416) — target profile JSON uses mixed snake_case/camelCase; joshpy works around it with an explicit coercion layer +- [josh#418](https://github.com/SchmidtDSE/josh/issues/418) — Cloud Run dev deployment stuck at `running` (container scaled down before sim completes); blocks HTTP-target e2e +- [josh#425](https://github.com/SchmidtDSE/josh/issues/425) — `.jshd` → `.jsdz` XZ/LZMA2 compression (pressure-release for per-job upload/download duplication; 5–20× expected) +- [josh#426](https://github.com/SchmidtDSE/josh/issues/426) — streaming `.jshd` from S3 + inline `.josh`/`.jshc` dispatch (deferred pending #425 evaluation) + +**Related joshpy PR (separate from feat/batch-run):** +- [joshpy#35](https://github.com/SchmidtDSE/joshpy/pull/35) — `configure_s3` strips scheme from full URL endpoints (found during GKE e2e) + +## Context + +joshsim (Java) has added `batchRemote` — a parallel execution path using MinIO staging and target profiles instead of HTTP streaming. The full K8s batch execution system has merged to josh ([josh#414](https://github.com/SchmidtDSE/josh/pull/414)) and the CLI UX was reshaped in [josh#423](https://github.com/SchmidtDSE/josh/pull/423) to separate staging from dispatch via the `.josh-staged.json` sentinel. joshpy needs to wrap these capabilities and provide Python-level orchestration for parameter sweeps. + +**Immediate motivation:** A production run has 5 of 6 replicate CSVs sitting in MinIO (the 6th OOM'd). The run is registered in the local RunRegistry with a label. We need a way to recover those results NOW — look up the run by label, discover the `minio://` export paths, read the CSVs directly into DuckDB via S3, and load them into the registry. This drives the PR ordering: result ingestion first, then the rest of the batch infrastructure. + +**Access model (Model A):** MinIO/S3 CSVs are the source of truth. The local `.duckdb` is a materialized cache that any machine can rebuild by re-ingesting from S3. DuckDB reads CSVs directly from S3 via `httpfs` — no download, no local disk needed for the CSV data. This supports future access patterns: browser WASM reading S3, serverless aggregators attaching `.duckdb`, multi-machine access. + +**State ownership:** josh is stateless/ephemeral — it dispatches jobs and can check their status, but holds no long-running state. joshpy owns all state via RunRegistry (what was run, parameters, label, job ID). When joshpy dispatches a `--no-wait` batch job, it stores the `batch_job_id` in `job_runs.metadata`. To poll, joshpy calls `cli.poll_batch(job_id, target)` (josh#409, shipped) which knows HOW to check status for each target type (MinIO status file for HTTP, K8s Job API for K8s). joshpy doesn't know or care about the polling mechanism internals — it just gets back exit codes (0 complete / 1 error / 2 running / 100 transient) + a JSON status line. + +**Key design decisions:** +- **Staging is a separate concern from dispatch (post-#423).** `batchRemote` no longer auto-stages. It takes `--minio-prefix` pointing at an already-populated MinIO location, guarded by a `.josh-staged.json` sentinel. Callers populate the prefix via `stageToMinio` (explicit) or `--stage-from-local-dir` (convenience wrapper that calls `stageToMinio` before dispatching). For sweeps, joshpy uses `cli.stage_to_minio()` then `cli.batch_remote(require_prestaged=True)`. +- **Target config is SHARED between josh and joshpy.** joshpy reads AND writes `~/.josh/targets/.json`. +- **MinIO cred resolution hierarchy** (mirrors joshsim's `HierarchyConfig`): CLI flags > profile JSON > env vars (`MINIO_ENDPOINT`, `MINIO_ACCESS_KEY`, `MINIO_SECRET_KEY`, `MINIO_BUCKET`). Secrets don't need to live in profile JSON. +- **K8s targets have a separate `pod_minio_endpoint`** — the in-cluster MinIO endpoint pods use, which may differ from the outer `minio_endpoint` used for host-side staging. +- **Auto-pull results from MinIO after completion**, with opt-out for fire-and-forget. The same `ingest_results()` code path serves batch remote AND local OOM recovery (DRY). +- **Per-job duplication is accepted, not optimized** (decided via [josh#425](https://github.com/SchmidtDSE/josh/issues/425) / [josh#426](https://github.com/SchmidtDSE/josh/issues/426)). Each `ExpandedJob` stages its own copy of `.josh` + `.jshd` + `.jshc` to its own MinIO prefix. Sharing shared files across sweep jobs would require a PVC or a josh-side multi-prefix-merge feature; neither is available today. Compression (josh#425, expected 5–20× on geospatial `.jshd`) is the accepted pressure-release. +- **Pods do NOT share disk.** K8s indexed Jobs fan out N pods per dispatch; each pod has its own container FS and runs `stageFromMinio` independently. The "shared" in "20 replicates share external data" means shared MinIO prefix (one upload, N parallel downloads), not shared disk. + +--- + +## Validated Access Pattern (2026-04-16) + +Proved out end-to-end against GKE Autopilot + GCS (S3-compatible). The client (joshpy / devcontainer) never runs simulations — it orchestrates: + +``` +Client (joshsim CLI) GCS (S3-compatible) GKE Autopilot +───────────────────── ──────────────────── ────────────── +1. Stage local files ──────────────────→ batch-jobs//inputs/ +2. Create K8s Secret (MinIO creds) ────────────────────────────────────→ josh-creds- +3. Create K8s Job ─────────────────────────────────────────────────────→ josh- +4. Poll Job API ───────────────────────────────────────────────────────→ status? + │ + Pod starts: │ + ← stageFromMinio (inputs) │ + → run simulation │ + → write results to GCS ─────────→ gke-test-results/smoke_3.csv + │ +5. Poll returns COMPLETE ←─────────────────────────────────────────────←─┘ +6. (preprocessBatch only) Download result ←── batch-jobs//outputs/output.jshd +``` + +### What the client needs + +- **The fat JAR** — `java -jar joshsim-fat.jar batchRemote ...` +- **kubectl context** — `gcloud container clusters get-credentials` (Fabric8 reads `~/.kube/config`) +- **MinIO/GCS credentials** — `MINIO_ACCESS_KEY` + `MINIO_SECRET_KEY` as env vars +- **A target profile** — `~/.josh/targets/.json` with cluster context, namespace, image, resource requests, GCS bucket +- **A .josh simulation file** — with `minio://` export paths pointing at the GCS bucket + +### Commands (pre-#423, historical) + +```bash +# Validated against pre-#423 JAR with positional arg. Retained +# here only to explain what was tested end-to-end; the CLI surface has +# since changed. See the "Authoritative CLI Surface" section below for +# the current syntax joshpy must target. +java -jar joshsim-fat.jar batchRemote sim.josh SimName --target=gke-test --replicates=5 +``` + +### Implications for joshpy + +- **K8s targets require the Java CLI** — it uses the Fabric8 K8s client to create Jobs and Secrets directly. There is no HTTP intermediary. joshpy shells out to `java -jar joshsim-fat.jar batchRemote ...`. +- **HTTP targets also go through the JAR.** The JAR is the single source of truth for dispatch semantics across both target types. joshpy does not POST to `/runBatch` directly — the ~3s JVM startup cost per job is negligible vs job runtimes and is easily amortized in sweeps. (Rejected: the "Python-side direct HTTP dispatch" option from earlier drafts.) +- **Env vars are the credential transport** — set `MINIO_ENDPOINT`, `MINIO_ACCESS_KEY`, `MINIO_SECRET_KEY`, `MINIO_BUCKET` before invoking the JAR. The JAR resolves them via HierarchyConfig. +- **GKE cluster is already running** — server-side infrastructure is deployed. joshpy just needs the client-side plumbing. + +--- + +## Authoritative CLI Surface (post-#423) + +The following is verbatim from `joshsim-fat-dev.jar` (latest `dev` build, 2026-04-24) and is the source of truth joshpy code must match. The josh-side `llms-full.txt` documents this at a higher level; the JAR help is canonical. + +### `batchRemote` + +``` +joshsim batchRemote [--no-wait] [--require-prestaged] [--suppress-errors] [--suppress-info] + --minio-prefix= + [--poll-interval=] + [--replicates=] + [--stage-from-local-dir=] + --target= + [--timeout=] + +``` + +Required: +- Positional `` — simulation name (e.g. `Main`) +- `--minio-prefix=` — where inputs live (e.g. `batch-jobs/my-run/inputs/`) +- `--target=` — profile from `~/.josh/targets/.json` + +Three modes (mutually exclusive): +- `--stage-from-local-dir=` — upload local dir to `--minio-prefix`, write `.josh-staged.json` sentinel, then dispatch +- default (neither flag) — read sentinel at `--minio-prefix`: warn if absent, fail if `staging`/`error`, proceed if `complete` +- `--require-prestaged` — fail fast unless sentinel reports `complete` (recommended for the sweep use case) + +Removed vs pre-#423: positional `` (was the local dir or `.josh` file), `--custom-tag` flag. + +### `pollBatch` + +``` +joshsim pollBatch [--suppress-errors] [--suppress-info] --target= +``` + +Exit codes: +- `0` — complete (success) +- `1` — error (simulation failed or dispatcher reported terminal failure) +- `2` — running / pending (still in progress) +- `100` — poll failure (transient; caller should retry) + +Stdout JSON (one line): +```json +{"status": "running", "jobId": "...", "startedAt": ""} +{"status": "complete", "jobId": "...", "completedAt": ""} +{"status": "error", "jobId": "...", "failedAt": "", "message": ""} +``` + +### `stageToMinio` + +``` +joshsim stageToMinio [--ensure-bucket-exists] [--suppress-errors] [--suppress-info] + [--config-file=] + --input-dir= --prefix= + [--minio-endpoint=...] [--minio-access-key=...] + [--minio-secret-key=...] [--minio-bucket=...] + [--minio-path=...] +``` + +Behavior: walks `` recursively, uploads every regular file to `` + relative path. Does **not** delete existing keys at the prefix (overlay/additive). Always writes `.josh-staged.json` at ``: `status=staging` → `status=complete` on success, or `status=error` with `message=` on failure. + +### `stageFromMinio` + +``` +joshsim stageFromMinio [--ensure-bucket-exists] [--suppress-errors] [--suppress-info] + [--config-file=] + --output-dir= --prefix= + [--minio-endpoint=...] [--minio-access-key=...] + [--minio-secret-key=...] [--minio-bucket=...] + [--minio-path=...] +``` + +Used both pod-side (in the batch worker entrypoint) and client-side (fallback path for `ingest_results(download=True)`). Filters out `.josh-staged.json` at any depth so pods never see the sentinel; throws if every key was filtered out (prevents silently empty workdirs). + +### `preprocessBatch` (unchanged schema for now) + +``` +joshsim preprocessBatch + --target= + [--no-wait] [--poll-interval=S] [--timeout=S] + [--crs=] [--default-value=] + [--x-coord=] [--y-coord=] + [--time-dim=] [--timestep=] + [--parallel] [--amend] +``` + +Note: `preprocessBatch` has **not** been refactored to the flag-based `--minio-prefix` UX yet — it still takes a positional `` that gets auto-staged by the JAR. If josh reshapes this in a follow-up, joshpy's `PreprocessBatchConfig` will need the same refactor as `BatchRemoteConfig`. + +### `.josh-staged.json` sentinel + +Written by `stageToMinio` at the root of its `--prefix`. JSON shape (from josh#423): + +```json +{"status": "staging", "startedAt": ""} +{"status": "complete", "completedAt": ""} +{"status": "error", "failedAt": "", "message": ""} +``` + +Readers: +- `batchRemote` (default mode) — proceeds if `complete`, warns if absent, fails on `staging`/`error` +- `batchRemote --require-prestaged` — fails hard unless `complete` +- pod entrypoint — filtered out of `stageFromMinio` so pods never see it + +--- + +## PR Plan + +``` +PR1 ✅ → PR2 ✅ → PR3 ✅ → PR4 ✅ → PR5 (CLI refactor for #423 + per-job workdir) → PR6 (polish) +``` + +PRs 1–4 are merged to `feat/batch-run` (joshpy#34). PR3 shipped `BatchRemoteConfig` / `cli.batch_remote()` against the pre-#423 CLI; PR5 rewrites both against the new surface. PR4 shipped `to_batch_remote_config()` which needs the same refactor. No downstream consumers, so the break is contained. + +### Regression gates (every PR) +- `pixi run test` passes (current: 935 unit tests; integration tests excluded) +- `pixi run test-integration` passes (17 MinIO integration tests, CI only) +- Existing `runRemote` path completely untouched + +--- + +### PR 1: Result Recovery — S3-native `ingest_results()` ✅ DONE + +**Status:** Merged via [joshpy#32](https://github.com/SchmidtDSE/joshpy/pull/32). All code shipped, unit tests passing, MinIO integration tests added. + +#### What shipped + +| Component | File | Description | +|-----------|------|-------------| +| `configure_s3()` | `joshpy/registry.py` | DuckDB httpfs + S3 credential setup (parameterized `use_ssl`) | +| `CellDataLoader.load_csv()` | `joshpy/cell_data.py` | Accepts `s3://` URL strings alongside local `Path` | +| `ingest_results()` | `joshpy/sweep.py` | Full recovery by label: metadata → exports → S3 read → load | +| `_resolve_ingest_metadata()` | `joshpy/sweep.py` | Helper: label/hash → run metadata | +| `_get_josh_source()` | `joshpy/sweep.py` | Helper: josh file from disk or stored content | +| `_configure_minio_access()` | `joshpy/sweep.py` | Helper: S3 direct read or stageFromMinio download | +| `_load_ingest_replicates()` | `joshpy/sweep.py` | Helper: per-replicate CSV loading with graceful skip | +| `StageFromMinioConfig` | `joshpy/cli.py` | Config for `stageFromMinio` CLI command | +| `JoshCLI.stage_from_minio()` | `joshpy/cli.py` | `download=True` fallback path | +| `SweepManager.ingest()` | `joshpy/sweep.py` | Convenience wrapper for `ingest_results()` | + +#### CI infrastructure shipped alongside PR 1 + +| Component | File | Description | +|-----------|------|-------------| +| Unit test workflow | `.github/workflows/test.yml` | `unit-tests` job: 867 tests via pixi | +| Integration test workflow | `.github/workflows/test.yml` | `integration-tests` job: MinIO service container + JAR | +| MinIO test simulation | `tests/fixtures/minio_export.josh` | Minimal .josh with `minio://` exports | +| Shared fixtures | `tests/conftest.py` | `minio_conn`, `minio_registry`, `seed_csv`, `josh_cli`, etc. | +| Integration tests | `tests/test_minio_integration.py` | 17 tests across 6 escalating levels | +| Pytest marker | `pyproject.toml` | `integration` marker registered | +| Pixi tasks | `pixi.toml` | `test` (unit only), `test-integration` (MinIO only) | + +#### Integration test levels + +| Level | Class | Tests | What it proves | +|-------|-------|-------|---------------| +| 1 | `TestMinioWrite` | 2 | DuckDB httpfs writes/reads CSVs to MinIO | +| 2 | `TestMinioJarWrite` | 3 | Real Josh JAR exports to MinIO, Python reads back | +| 3 | `TestMinioCellDataLoader` | 4 | `CellDataLoader.load_csv("s3://...")` ingests into registry | +| 4 | `TestMinioIngestResults` | 2 | Full `ingest_results()` by label, data queryable | +| 5 | `TestMinioPartialRecovery` | 3 | Missing replicates skipped gracefully (2/3, 0/3, 1/10) | +| Edge | `TestMinioEdgeCases` | 3 | Bad creds, missing bucket, run_hash namespace isolation | + +--- + +### PR 2: Target Profile System ✅ DONE + +**Status:** Merged on `feat/batch-run` (commit `a6f82d7`). No changes needed for #423 — the target profile JSON schema is unchanged. + +#### What shipped +- `joshpy/targets.py` — `TargetProfile`, `HttpTargetConfig`, `KubernetesTargetConfig`, `ResolvedMinioCreds` dataclasses; `save_target`/`load_target`/`list_targets`/`delete_target`; `resolve_minio_creds()` (profile > env vars hierarchy) +- `joshpy/__init__.py` — all target symbols exported +- `tests/test_targets.py` — 32 tests (dataclass construction, serialization round-trip, filesystem CRUD, credential resolution) +- `.devcontainer/Dockerfile` + `install_gcloud.sh`/`install_kubectl.sh` — SHA256-pinned gcloud SDK 526.0.0 + kubectl v1.31.4 baked into the image +- Explicit Python↔JSON key mapping for 4 fields where conventions differ: `target_type` ↔ `type`, `api_key` ↔ `apiKey`, `timeout_seconds` ↔ `timeoutSeconds`, `ttl_seconds_after_finished` ↔ `ttlSecondsAfterFinished` + +#### Known gap +- [josh#416](https://github.com/SchmidtDSE/josh/issues/416) — target profile JSON mixes snake_case and camelCase in the same file (`minio_endpoint` vs `apiKey`). joshpy handles it with the explicit coercion layer above. Filed for future consistency. + +--- + +### PR 3: CLI Wrappers — `batch_remote()`, `preprocess_batch()`, `stage_to_minio()` ✅ DONE (needs refactor in PR5) + +**Status:** Merged on `feat/batch-run` (commit `346b9b9`). Built against the **pre-#423** CLI. **Must be refactored in PR5** to match the new flag-based `batchRemote` surface. + +#### What shipped (pre-#423 shape — partially obsolete) + +| Dataclass | Status | Notes | +|-----------|--------|-------| +| `StageToMinioConfig` | ✅ additive fields TBD in PR5 | Missing `ensure_bucket_exists`, `config_file`, `minio_path` (new in josh dev) | +| `StageFromMinioConfig` | ✅ (shipped in PR1) additive fields TBD in PR5 | Same new flags available | +| `BatchRemoteConfig` | ⚠️ **breaking refactor in PR5** | Currently has positional `script_or_dir` + `custom_tags`, both removed by #423 | +| `PreprocessBatchConfig` | ✅ barebones; expandable | Still uses positional args (josh hasn't refactored `preprocessBatch` yet) | + +#### What PR5 must do to this module +- Rewrite `BatchRemoteConfig` to require `minio_prefix`, add `stage_from_local_dir` and `require_prestaged` (mutex), drop `script_or_dir` and `custom_tags` +- Rewrite `cli.batch_remote()` command building against the new flag set +- Add optional `ensure_bucket_exists`, `config_file`, `minio_path` to `StageToMinioConfig` / `StageFromMinioConfig` +- Expand `PreprocessBatchConfig` with newly-exposed flags (`--crs`, `--x-coord`, `--y-coord`, `--time-dim`, `--timestep`, `--default-value`, `--parallel`, `--amend`) + +#### Tests +- `tests/test_cli.py` — `TestStageToMinio`, `TestStageToMinioConfig`, `TestBatchRemote`, `TestBatchRemoteConfig`, `TestPreprocessBatch`, `TestPreprocessBatchConfig`, `TestPollBatch`, `TestPollBatchConfig` (17 tests; will need updates in PR5 to match new CLI shape) + +--- + +### PR 4: Sweep Integration — `run_sweep()` + `SweepManager` + adaptive ✅ DONE (needs refactor in PR5) + +**Status:** Merged on `feat/batch-run` (commit `ff661a3`). Built against the **pre-#423** CLI. The dispatch wiring in `run_sweep()` and `run_adaptive_sweep()` is sound, but the `to_batch_remote_config()` helper and the exact `cli.batch_remote()` call sites must change in PR5. + +#### What shipped +- `PollBatchConfig` + `JoshCLI.poll_batch()` — wraps `pollBatch --target=`. **Still correct** post-#423 (pollBatch CLI didn't change). +- `to_batch_remote_config(job, target, *, no_wait, poll_interval, timeout)` — converts `ExpandedJob` → `BatchRemoteConfig`. **Will be rewritten in PR5** against the new config shape. +- `run_sweep()` new params: `batch_remote`, `target`, `batch_no_wait`, `poll_interval`, `batch_timeout`, `auto_ingest`. Mutually-exclusive with `remote`. +- Two dispatch modes wired: + - **Blocking** (default): `batch_remote(no_wait=False)`, JAR polls internally, `ingest_results()` loads CSVs from S3 + - **Async** (`batch_no_wait=True`): `batch_remote(no_wait=True)` per job, parse `jobId` from stdout JSON, store in registry metadata, then poll loop using `cli.poll_batch()` until all complete +- `SweepManager.run()` / `run_adaptive_sweep()` thread the new params through + +#### What PR5 must do to this module +- Update the `to_batch_remote_config()` call sites in `jobs.py` and `strategies.py` to: + 1. Assemble per-job workdir (new helper) + 2. Call `cli.stage_to_minio(input_dir=workdir, prefix=per_job_prefix)` + 3. Build `BatchRemoteConfig(minio_prefix=per_job_prefix, require_prestaged=True, ...)` (new schema) + 4. Call `cli.batch_remote(br_config)` +- The async polling loop (`_async_dispatched` / `cli.poll_batch`) stays as-is. + +#### Tests shipped +- `tests/test_jobs.py::TestToBatchRemoteConfig` (3 tests) — will be updated for new shape +- `tests/test_jobs.py::TestRunSweepBatchRemote` (4 tests) — covers validation, blocking dispatch, async JSON parsing; the dispatch-path tests will need reworking against `stage_to_minio` + `batch_remote(--require-prestaged)` flow + +--- + +### PR 5: Refactor to post-#423 CLI + per-job workdir assembly + +This is the consolidated "catch up to josh#423 and fix the e2e-surfaced staging gaps" PR. Two real problems it solves: + +1. **Adapt to josh#423's breaking CLI change.** `batchRemote` no longer auto-stages; it expects `--minio-prefix` pointing at a sentinel-protected MinIO location. joshpy's `BatchRemoteConfig` / `cli.batch_remote()` / `to_batch_remote_config()` must be rewritten. +2. **Directory contamination fix (found during GKE e2e).** When the pre-#423 JAR was given a `.josh` file, it staged the entire containing directory. Sibling `.josh` files (e.g. test fixtures) got swept in. Per-job workdir assembly solves this cleanly. + +**Explicitly NOT in scope** (discussed and deferred per [josh#425](https://github.com/SchmidtDSE/josh/issues/425) / [josh#426](https://github.com/SchmidtDSE/josh/issues/426)): +- Cross-job sharing of `.josh`/`.jshd` via a shared MinIO prefix. josh does not support multi-prefix merge today; joshpy-side server-side copy would re-couple infra logic. Accepted duplication is the design; compression is the pressure-release. +- Python-side HTTP POST to `/runBatch`. JAR is the single dispatcher. + +#### New file: `joshpy/batch_orchestrator.py` + +```python +def assemble_batch_workdir(job: ExpandedJob, workdir: Path) -> Path: + """Create a per-ExpandedJob staging directory. + + Layout:: + + workdir// + sim.josh # symlink to job.source_path + config.jshc # unique rendered config for this job + .jshd # symlinks for each entry in job.file_mappings + + Returns the path that should be passed to ``cli.stage_to_minio(input_dir=...)``. + Uses symlinks (not copies) to avoid disk duplication for large .jshd files. + """ +``` + +(No `BatchOrchestrator` class. Pure function. joshpy doesn't own staging state — josh does, via the sentinel.) + +#### CLI-layer refactor (`joshpy/cli.py`) + +```python +@dataclass(frozen=True) +class BatchRemoteConfig: + simulation: str + target: str + minio_prefix: str # REQUIRED (new) + replicates: int = 1 + no_wait: bool = False + poll_interval: int | None = None + timeout: int | None = None + stage_from_local_dir: Path | None = None # mutex with require_prestaged + require_prestaged: bool = False # recommended for sweeps + # removed: script_or_dir, custom_tags + + def __post_init__(self) -> None: + if self.stage_from_local_dir and self.require_prestaged: + raise ValueError( + "stage_from_local_dir and require_prestaged are mutually exclusive" + ) +``` + +`StageToMinioConfig` / `StageFromMinioConfig`: add optional `ensure_bucket_exists: bool = False`, `config_file: Path | None = None`, `minio_path: str | None = None`. + +`PreprocessBatchConfig`: expand with `crs`, `x_coord`, `y_coord`, `time_dim`, `timestep`, `default_value`, `parallel`, `amend`. + +#### Sweep-loop rewire (`joshpy/jobs.py` + `joshpy/strategies.py`) + +```python +# New shape of to_batch_remote_config: takes a pre-staged prefix. +def to_batch_remote_config( + job: ExpandedJob, + target: str, + minio_prefix: str, + *, + no_wait: bool = False, + poll_interval: int | None = None, + timeout: int | None = None, + require_prestaged: bool = True, +) -> BatchRemoteConfig: ... + +# run_sweep batch-remote path becomes: +workdir = tempfile.mkdtemp(prefix=f"joshpy-sweep-{session_id}-") +for job in job_set: + job_dir = assemble_batch_workdir(job, Path(workdir)) + per_job_prefix = f"sweeps/{session_id}/jobs/{job.run_hash}/" + cli.stage_to_minio( + StageToMinioConfig(input_dir=job_dir, prefix=per_job_prefix) + ) + cli.batch_remote( + to_batch_remote_config(job, target, per_job_prefix, + require_prestaged=True, + no_wait=batch_no_wait, + poll_interval=poll_interval, + timeout=batch_timeout) + ) +``` + +The async `_async_dispatched` / `cli.poll_batch` loop from PR4 stays exactly as-is. + +#### Tests +- `tests/test_cli.py` — update `TestBatchRemote*` for the new flag set; add `TestBatchRemoteConfig.test_mutex` for the mutex validation; add tests for new optional flags on stage configs +- `tests/test_batch_orchestrator.py` (NEW) — `assemble_batch_workdir` covers symlinks vs copies, run_hash subdir naming, file_mappings fan-out, `.jshc` content write +- `tests/test_jobs.py::TestToBatchRemoteConfig` — rewrite for new signature +- `tests/test_jobs.py::TestRunSweepBatchRemote` — mock both `stage_to_minio` and `batch_remote`; assert ordering (stage first, dispatch second); assert `require_prestaged=True` on the batch call + +#### E2E against GKE (2026-04-23+ JAR required) +- Dispatch a single ExpandedJob with 5 replicates through the full joshpy sweep loop +- Verify GCS prefix contains `.josh-staged.json` with `status=complete` after `stage_to_minio` +- Verify `batch_remote(--require-prestaged)` proceeds and K8s indexed Job fans out 5 pods +- Verify `ingest_results()` pulls all 5 CSVs from the per-job prefix's output location + +**Risk: LOW.** CLI refactor is mechanical. Workdir assembly is pure filesystem. No infra/dispatch logic moves into joshpy. HTTP-target e2e is blocked on [josh#418](https://github.com/SchmidtDSE/josh/issues/418) but K8s path is unaffected. + +--- + +### PR 6: Polish — Builder, Docs, Bottle Metadata + +- `SweepManagerBuilder.with_batch_remote(target, ...)` convenience method +- MinIO metadata in bottle manifest +- Update `llms-full.txt` with all new APIs + +**Risk: LOW** + +--- + +## Files Modified (all PRs) + +| File | PRs | Changes | +|------|-----|---------| +| `joshpy/cli.py` | ✅1, ✅3, ✅4, **5** | `StageFromMinioConfig` + `stage_from_minio()` (✅PR 1); `BatchRemoteConfig` + `batch_remote()`, `PreprocessBatchConfig` + `preprocess_batch()`, `StageToMinioConfig` + `stage_to_minio()` (✅PR 3); `PollBatchConfig` + `poll_batch()` (✅PR 4); **refactor `BatchRemoteConfig`/`cli.batch_remote()` for josh#423 new flag surface; add `ensure_bucket_exists`/`config_file`/`minio_path` to stage configs; expand `PreprocessBatchConfig` (PR 5)** | +| `joshpy/cell_data.py` | ✅1 | `load_csv()` accepts `str` (S3 URL) in addition to `Path` | +| `joshpy/registry.py` | ✅1 | `configure_s3()` utility for DuckDB httpfs + S3 credential setup (separate PR [joshpy#35](https://github.com/SchmidtDSE/joshpy/pull/35) adds scheme-stripping on top) | +| `joshpy/sweep.py` | ✅1, ✅4, 6 | `ingest_results()` + helpers + `SweepManager.ingest()` (✅PR 1); `batch_remote`/`target`/`batch_no_wait`/`poll_interval`/`batch_timeout`/`auto_ingest` on `.run()` (✅PR 4); builder (PR 6) | +| **NEW** `joshpy/targets.py` | ✅2 | Target profile system (read/write/list/creds hierarchy) | +| `joshpy/jobs.py` | ✅4, **5** | `to_batch_remote_config()` + extend `run_sweep()` (✅PR 4); **rewrite `to_batch_remote_config()` for new CLI; wire `assemble_batch_workdir` + `stage_to_minio` before dispatch (PR 5)** | +| `joshpy/strategies.py` | ✅4, **5** | Extend `run_adaptive_sweep()` (✅PR 4); same dispatch-path rewrite (PR 5) | +| **NEW** `joshpy/batch_orchestrator.py` | **5** | `assemble_batch_workdir()` pure-function helper | +| `joshpy/bottle.py` | 6 | MinIO metadata in manifest | +| `joshpy/__init__.py` | ✅1-4, **5** | Export new symbols; remove/add as CLI shape changes (PR 5) | +| **NEW** `.devcontainer/scripts/on_build/install_gcloud.sh` + `install_kubectl.sh` | ✅2 | SHA256-pinned system tool installs | +| `.devcontainer/Dockerfile` | ✅2 | Install curl + run both gcloud/kubectl scripts | +| `tests/test_cli.py` | ✅1, ✅3, **5** | `StageFromMinio` tests (✅PR 1); remaining CLI tests (✅PR 3); **update `TestBatchRemote*` for new flags, add mutex test (PR 5)** | +| `tests/test_sweep.py` | ✅1, ✅4 | `ingest_results` tests (✅PR 1); SweepManager batch_remote tests (✅PR 4); `TestConfigureS3` scheme-handling tests (joshpy#35) | +| `tests/conftest.py` | ✅1 | Shared fixtures, marker registration | +| `tests/test_minio_integration.py` | ✅1 | 17 MinIO integration tests (5 levels + edge cases) | +| **NEW** `tests/test_targets.py` | ✅2 | Target profile tests (32 cases) | +| `tests/test_jobs.py` | ✅4, **5** | Workdir + converter + sweep tests (✅PR 4); rewrite for new `to_batch_remote_config` shape and `stage_to_minio → batch_remote` ordering (PR 5) | +| `tests/test_strategies.py` | 4, **5** | Adaptive batch remote tests; adapt for new dispatch path (PR 5) | +| **NEW** `tests/test_batch_orchestrator.py` | **5** | `assemble_batch_workdir` tests (symlinks, run_hash dirs, .jshc content) | + +--- + +## Verification + +PR 1 end-to-end (immediate need — ✅ validated): +```bash +# In josh-models repo: +pixi run recover my-label +# -> Looks up "my-label" in registry +# -> Discovers minio:// export paths via inspect-exports +# -> Configures DuckDB httpfs with S3 creds from env vars +# -> Reads CSVs directly from S3 into DuckDB (no download) +# -> Loads into registry, skipping missing replicate from OOM +# -> Prints: "Done: 1234567 rows loaded for 'my-label'" +``` + +CI verification (✅ in place): +```bash +# Unit tests (no MinIO needed): +pixi run test # 935 passed, 17 deselected (as of 2026-04-23) + +# Integration tests (MinIO service container + JAR): +pixi run test-integration # 17 tests across 5 levels + edge cases + +# Local integration test: +docker run -d --name minio-test -p 9000:9000 \ + -e MINIO_ROOT_USER=minioadmin -e MINIO_ROOT_PASSWORD=minioadmin \ + -e MINIO_DEFAULT_BUCKETS=josh-test-bucket:public \ + bitnamilegacy/minio:latest +pixi run get-jars +pixi run -e dev test-integration +docker rm -f minio-test +``` + +Full batch remote integration (target: PR 5 once refactor lands): +```python +# Sweep with batch remote on GKE. joshpy stages each ExpandedJob's +# workdir to its own MinIO prefix, then dispatches with +# require_prestaged=True. +manager = SweepManager.from_config(config, registry="exp.duckdb") +results = manager.run(batch_remote=True, target="gke-test") +manager.load_results() + +# Fire-and-forget -> recover later (stores batch_job_id in registry metadata). +results = manager.run(batch_remote=True, target="gke-test", batch_no_wait=True) +# ... later, same machine or another: +manager.ingest() + +# Or download locally: +manager.ingest(download=True, output_dir=Path("./local_results")) +``` + +E2E status (2026-04-24): +- **K8s target (gke-test):** ✅ fully working end-to-end against `dse-nps` GKE Autopilot with the pre-#423 JAR. Dispatch → poll → ingest all validated. Re-validation needed once PR5 lands and we switch to the post-#423 JAR (`jar/joshsim-fat-dev.jar`). +- **HTTP target (cloudrun-dev):** ❌ blocked on [josh#418](https://github.com/SchmidtDSE/josh/issues/418). Dispatch succeeds, `status=running` is written to GCS, then Cloud Run scales the container down before the simulation completes. No output CSV produced. Not a joshpy issue. + +--- + +## Summary: why PR5 is the main remaining work + +josh#423 arrived after PRs 3–4 were designed. The new flag-based `batchRemote` is a strictly better architecture — staging and dispatch are cleanly separated, the `.josh-staged.json` sentinel gives us an explicit readiness contract, and the multi-dispatch workflow (one stage, many dispatches) is now first-class. But it's a breaking CLI change. joshpy PR5 is the "catch up + fix the workdir-contamination bug + wire `stage_to_minio` + `batch_remote(--require-prestaged)` through the sweep loop" consolidation. After PR5, the plan completes with PR6 (polish). diff --git a/joshpy/__init__.py b/joshpy/__init__.py index 50270ae..4a20381 100644 --- a/joshpy/__init__.py +++ b/joshpy/__init__.py @@ -37,6 +37,11 @@ InspectExportsConfig, ExportFileInfo, ExportPaths, + StageFromMinioConfig, + StageToMinioConfig, + BatchRemoteConfig, + PreprocessBatchConfig, + PollBatchConfig, ) # JFR diagnostics (always available, no external deps) @@ -72,6 +77,19 @@ format_message, ) +# Target profile system (always available, no external deps) +from joshpy.targets import ( + TargetProfile, + HttpTargetConfig, + KubernetesTargetConfig, + ResolvedMinioCreds, + load_target, + save_target, + list_targets, + delete_target, + resolve_minio_creds, +) + # Optional jobs module (requires jinja2 and pyyaml) try: from joshpy.jobs import ( @@ -87,6 +105,7 @@ AdaptiveSweepResult, to_run_config, to_run_remote_config, + to_batch_remote_config, run_sweep, discover_jshd_files, ) @@ -119,6 +138,7 @@ RunInfo, SessionSummary, DataSummary, + configure_s3, ) from joshpy.cell_data import ( CellDataLoader, @@ -136,6 +156,7 @@ SweepManagerBuilder, recover_sweep_results, load_job_results, + ingest_results, LoadConfig, ResultLoadError, ) @@ -182,6 +203,11 @@ "InspectExportsConfig", "ExportFileInfo", "ExportPaths", + "StageFromMinioConfig", + "StageToMinioConfig", + "BatchRemoteConfig", + "PreprocessBatchConfig", + "PollBatchConfig", # JFR diagnostics "ResourceProfile", "CpuProfile", @@ -206,6 +232,16 @@ "load_debug_file", "load_debug_from_script", "format_message", + # Target profiles + "TargetProfile", + "HttpTargetConfig", + "KubernetesTargetConfig", + "ResolvedMinioCreds", + "load_target", + "save_target", + "list_targets", + "delete_target", + "resolve_minio_creds", # Jobs (optional) "ConfigSweepParameter", "FileSweepParameter", @@ -219,6 +255,7 @@ "AdaptiveSweepResult", "to_run_config", "to_run_remote_config", + "to_batch_remote_config", "run_sweep", "discover_jshd_files", "GridSpec", @@ -243,6 +280,7 @@ "RunInfo", "SessionSummary", "DataSummary", + "configure_s3", "CellDataLoader", "DiagnosticQueries", "SimulationDiagnostics", @@ -252,6 +290,7 @@ "SweepManagerBuilder", "recover_sweep_results", "load_job_results", + "ingest_results", "LoadConfig", "ResultLoadError", "HAS_SWEEP", diff --git a/joshpy/batch_orchestrator.py b/joshpy/batch_orchestrator.py new file mode 100644 index 0000000..4322647 --- /dev/null +++ b/joshpy/batch_orchestrator.py @@ -0,0 +1,63 @@ +"""Per-job staging directory assembly for batch remote execution. + +The sweep loop assembles one directory per ``ExpandedJob`` so each job stages +only its own ``.josh``/``.jshc``/``.jshd`` files into its MinIO prefix. This +avoids the directory-contamination bug where sibling fixture files were swept +in when the pre-#423 JAR staged a whole parent directory. +""" + +from __future__ import annotations + +import os +from pathlib import Path +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from joshpy.jobs import ExpandedJob + + +def assemble_batch_workdir(job: ExpandedJob, workdir: Path) -> Path: + """Create a per-ExpandedJob staging directory. + + Layout:: + + workdir// + sim.josh # symlink to job.source_path + config.jshc # unique rendered config for this job (written) + .jshd # symlink per entry in job.file_mappings + + Uses symlinks (not copies) to avoid disk duplication of large .jshd files; + ``cli.stage_to_minio`` uploads via content reads and follows symlinks. + Idempotent: re-running against the same workdir replaces existing entries. + + Args: + job: The expanded job. ``job.source_path`` must be set. + workdir: Parent directory to create the per-job subdir inside. + + Returns: + Path to the per-job staging directory (``workdir//``). + + Raises: + ValueError: If ``job.source_path`` is None. + """ + if job.source_path is None: + raise ValueError("ExpandedJob.source_path is required for batch remote") + + target = workdir / job.run_hash + target.mkdir(parents=True, exist_ok=True) + + sim_link = target / "sim.josh" + if sim_link.exists() or sim_link.is_symlink(): + sim_link.unlink() + os.symlink(job.source_path.resolve(), sim_link) + + (target / "config.jshc").write_text(job.config_content) + + for name, path in job.file_mappings.items(): + dest_name = name if name.endswith(".jshd") else f"{name}.jshd" + dest = target / dest_name + if dest.exists() or dest.is_symlink(): + dest.unlink() + os.symlink(path.resolve(), dest) + + return target diff --git a/joshpy/cell_data.py b/joshpy/cell_data.py index 7b70b59..ee02ba8 100644 --- a/joshpy/cell_data.py +++ b/joshpy/cell_data.py @@ -148,7 +148,7 @@ def __init__(self, registry: Any): def load_csv( self, - csv_path: Path, + csv_path: "Path | str", run_id: str, run_hash: str, entity_type: str = "patch", @@ -166,10 +166,12 @@ def load_csv( quoted identifiers (e.g., 'avg.height' stays as "avg.height"), requiring double quotes when referenced with direct calls to DuckDB. - Uses DuckDB's native CSV reader for optimal performance. + Uses DuckDB's native CSV reader for optimal performance. Accepts both + local ``Path`` objects and ``s3://`` URL strings (requires httpfs to be + loaded on the connection -- see ``configure_s3()``). Args: - csv_path: Path to the CSV file. + csv_path: Path to the CSV file, or an ``s3://`` URL string. run_id: The run ID this data belongs to. run_hash: Run hash for this run. entity_type: Type of entity being exported (default: "patch"). @@ -178,14 +180,18 @@ def load_csv( Number of rows loaded. Raises: - FileNotFoundError: If csv_path doesn't exist. + FileNotFoundError: If csv_path is a local path that doesn't exist. ValueError: If CSV is missing required columns or type mismatch. """ - if not csv_path.exists(): - raise FileNotFoundError(f"CSV not found: {csv_path}") + if isinstance(csv_path, str) and csv_path.startswith("s3://"): + csv_path_str = csv_path + else: + csv_path = Path(csv_path) + if not csv_path.exists(): + raise FileNotFoundError(f"CSV not found: {csv_path}") + csv_path_str = str(csv_path.resolve()) conn = self.registry.conn - csv_path_str = str(csv_path.resolve()) # Read CSV header to identify columns using DuckDB header_result = conn.execute(f"SELECT * FROM read_csv_auto('{csv_path_str}') LIMIT 0") diff --git a/joshpy/cli.py b/joshpy/cli.py index 2debb12..fb97ccc 100644 --- a/joshpy/cli.py +++ b/joshpy/cli.py @@ -398,6 +398,175 @@ class InspectJshdConfig: y: int +@dataclass(frozen=True) +class StageFromMinioConfig: + """Arguments for 'java -jar joshsim.jar stageFromMinio' command. + + Downloads all objects under a MinIO prefix to a local directory. + MinIO credentials are optional -- joshsim falls back to environment + variables (MINIO_ENDPOINT, MINIO_ACCESS_KEY, MINIO_SECRET_KEY, + MINIO_BUCKET) via its HierarchyConfig. + + Attributes: + output_dir: Local directory to download files into. + prefix: MinIO object prefix to download from. + minio_endpoint: MinIO endpoint URL (optional). + minio_access_key: MinIO access key (optional). + minio_secret_key: MinIO secret key (optional). + minio_bucket: MinIO bucket name (optional). + config_file: Path to JSON configuration file (optional). + ensure_bucket_exists: Ensure the bucket exists before downloading. + minio_path: Base object name/path within bucket (optional). + """ + + output_dir: Path + prefix: str + minio_endpoint: str | None = None + minio_access_key: str | None = None + minio_secret_key: str | None = None + minio_bucket: str | None = None + config_file: Path | None = None + ensure_bucket_exists: bool = False + minio_path: str | None = None + + +@dataclass(frozen=True) +class StageToMinioConfig: + """Arguments for 'java -jar joshsim.jar stageToMinio' command. + + Uploads a local directory to MinIO under a given prefix. + MinIO credentials are optional -- joshsim falls back to environment + variables via its HierarchyConfig. + + Attributes: + input_dir: Local directory to upload. + prefix: MinIO object prefix to upload to. + minio_endpoint: MinIO endpoint URL (optional). + minio_access_key: MinIO access key (optional). + minio_secret_key: MinIO secret key (optional). + minio_bucket: MinIO bucket name (optional). + config_file: Path to JSON configuration file (optional). + ensure_bucket_exists: Ensure the bucket exists before uploading. + minio_path: Base object name/path within bucket (optional). + """ + + input_dir: Path + prefix: str + minio_endpoint: str | None = None + minio_access_key: str | None = None + minio_secret_key: str | None = None + minio_bucket: str | None = None + config_file: Path | None = None + ensure_bucket_exists: bool = False + minio_path: str | None = None + + +@dataclass(frozen=True) +class BatchRemoteConfig: + """Arguments for 'java -jar joshsim.jar batchRemote' command (post-josh#423). + + Staging is a separate concern from dispatch: the simulation and data files + must already live in MinIO under ``minio_prefix`` (guarded by a + ``.josh-staged.json`` sentinel), OR be provided via ``stage_from_local_dir`` + which invokes ``stageToMinio`` as a convenience before dispatch. These two + staging modes are mutually exclusive. + + Attributes: + simulation: Name of simulation to run. + target: Target profile name (required). + minio_prefix: MinIO object prefix where inputs live (e.g. + ``batch-jobs/my-run/inputs/``). + replicates: Number of replicates (default: 1). + no_wait: If True, dispatch and exit without polling (default: False). + poll_interval: Polling interval in seconds (optional). + timeout: Job timeout in seconds (optional). + stage_from_local_dir: If set, upload this local directory to + ``minio_prefix`` before dispatching. Mutex with ``require_prestaged``. + require_prestaged: Fail fast unless ``.josh-staged.json`` at + ``minio_prefix`` reports ``complete``. Recommended for sweeps. + Mutex with ``stage_from_local_dir``. + """ + + simulation: str + target: str + minio_prefix: str + replicates: int = 1 + no_wait: bool = False + poll_interval: int | None = None + timeout: int | None = None + stage_from_local_dir: Path | None = None + require_prestaged: bool = False + + def __post_init__(self) -> None: + if self.stage_from_local_dir is not None and self.require_prestaged: + raise ValueError( + "stage_from_local_dir and require_prestaged are mutually exclusive" + ) + + +@dataclass(frozen=True) +class PreprocessBatchConfig: + """Arguments for 'java -jar joshsim.jar preprocessBatch' command. + + Preprocesses geospatial data on a remote target, downloads the + resulting ``.jshd`` file. + + Attributes: + script: Path to .josh file. + simulation: Name of simulation. + data_file: Input data file (e.g. .nc). + variable: Variable name in the data file. + units: Units of the data. + output: Output .jshd file path. + target: Target profile name (required). + crs: CRS to use when reading the file. + x_coord: Name of X coordinate dimension. + y_coord: Name of Y coordinate dimension. + time_dim: Name of time dimension. + timestep: Single timestep to process. + default_value: Default value to fill grid spaces before copying data. + parallel: Enable parallel processing of patches within each timestep. + amend: Amend existing output file rather than overwriting. + """ + + script: Path + simulation: str + data_file: Path + variable: str + units: str + output: Path + target: str + crs: str | None = None + x_coord: str | None = None + y_coord: str | None = None + time_dim: str | None = None + timestep: int | None = None + default_value: float | None = None + parallel: bool = False + amend: bool = False + + +@dataclass(frozen=True) +class PollBatchConfig: + """Arguments for 'java -jar joshsim.jar pollBatch' command. + + Single-shot status check for a dispatched batch job. + + Exit codes from the JAR: + 0 — complete (job succeeded) + 1 — error (job failed) + 2 — running / pending + 100 — poll failure (transient, retry) + + Attributes: + job_id: Job ID returned by ``batchRemote --no-wait``. + target: Target profile name (loads ``~/.josh/targets/.json``). + """ + + job_id: str + target: str + + @dataclass(frozen=True) class InspectExportsConfig: """Arguments for 'java -jar joshsim.jar inspect-exports' command. @@ -624,6 +793,199 @@ def _execute( command=cmd, ) + def stage_from_minio( + self, + config: StageFromMinioConfig, + timeout: float | None = None, + ) -> CLIResult: + """Download files from MinIO to a local directory. + + Args: + config: Stage-from-MinIO configuration. + timeout: Timeout in seconds. + + Returns: + CLIResult with execution details. + """ + args = [ + "stageFromMinio", + "--output-dir", str(config.output_dir.resolve()), + "--prefix", config.prefix, + ] + + if config.minio_endpoint: + args.extend(["--minio-endpoint", config.minio_endpoint]) + if config.minio_access_key: + args.extend(["--minio-access-key", config.minio_access_key]) + if config.minio_secret_key: + args.extend(["--minio-secret-key", config.minio_secret_key]) + if config.minio_bucket: + args.extend(["--minio-bucket", config.minio_bucket]) + if config.config_file is not None: + args.append(f"--config-file={config.config_file.resolve()}") + if config.ensure_bucket_exists: + args.append("--ensure-bucket-exists") + if config.minio_path is not None: + args.append(f"--minio-path={config.minio_path}") + + return self._execute(args, timeout=timeout) + + def stage_to_minio( + self, + config: StageToMinioConfig, + timeout: float | None = None, + ) -> CLIResult: + """Upload a local directory to MinIO. + + Args: + config: Stage-to-MinIO configuration. + timeout: Timeout in seconds. + + Returns: + CLIResult with execution details. + """ + args = [ + "stageToMinio", + "--input-dir", str(config.input_dir.resolve()), + "--prefix", config.prefix, + ] + + if config.minio_endpoint: + args.extend(["--minio-endpoint", config.minio_endpoint]) + if config.minio_access_key: + args.extend(["--minio-access-key", config.minio_access_key]) + if config.minio_secret_key: + args.extend(["--minio-secret-key", config.minio_secret_key]) + if config.minio_bucket: + args.extend(["--minio-bucket", config.minio_bucket]) + if config.config_file is not None: + args.append(f"--config-file={config.config_file.resolve()}") + if config.ensure_bucket_exists: + args.append("--ensure-bucket-exists") + if config.minio_path is not None: + args.append(f"--minio-path={config.minio_path}") + + return self._execute(args, timeout=timeout) + + def batch_remote( + self, + config: BatchRemoteConfig, + timeout: float | None = None, + jfr: JfrConfig | None = None, + stream_output: bool = False, + ) -> CLIResult: + """Dispatch a simulation to a remote target via MinIO staging. + + Args: + config: Batch remote configuration. + timeout: Timeout in seconds. + jfr: Optional JFR profiling configuration. + stream_output: Stream JAR output to terminal in real time. + + Returns: + CLIResult with execution details. + """ + args = [ + "batchRemote", + f"--target={config.target}", + f"--minio-prefix={config.minio_prefix}", + ] + + if config.replicates > 1: + args.append(f"--replicates={config.replicates}") + if config.no_wait: + args.append("--no-wait") + if config.poll_interval is not None: + args.append(f"--poll-interval={config.poll_interval}") + if config.timeout is not None: + args.append(f"--timeout={config.timeout}") + if config.stage_from_local_dir is not None: + args.append( + f"--stage-from-local-dir={config.stage_from_local_dir.resolve()}" + ) + if config.require_prestaged: + args.append("--require-prestaged") + + args.append(config.simulation) + + return self._execute( + args, timeout=timeout, jfr=jfr, stream_output=stream_output, + ) + + def preprocess_batch( + self, + config: PreprocessBatchConfig, + timeout: float | None = None, + jfr: JfrConfig | None = None, + ) -> CLIResult: + """Preprocess geospatial data on a remote target. + + Args: + config: Preprocess-batch configuration. + timeout: Timeout in seconds. + jfr: Optional JFR profiling configuration. + + Returns: + CLIResult with execution details. + """ + args = [ + "preprocessBatch", + str(config.script.resolve()), + config.simulation, + str(config.data_file.resolve()), + config.variable, + config.units, + str(config.output.resolve()), + f"--target={config.target}", + ] + + if config.crs is not None: + args.append(f"--crs={config.crs}") + if config.x_coord is not None: + args.append(f"--x-coord={config.x_coord}") + if config.y_coord is not None: + args.append(f"--y-coord={config.y_coord}") + if config.time_dim is not None: + args.append(f"--time-dim={config.time_dim}") + if config.timestep is not None: + args.append(f"--timestep={config.timestep}") + if config.default_value is not None: + args.append(f"--default-value={config.default_value}") + if config.parallel: + args.append("--parallel") + if config.amend: + args.append("--amend") + + return self._execute(args, timeout=timeout, jfr=jfr) + + def poll_batch( + self, + config: PollBatchConfig, + timeout: float | None = None, + ) -> CLIResult: + """Check the status of a dispatched batch job. + + Exit codes: + 0 — complete (job succeeded, stdout has JSON status) + 1 — error (job failed, stdout has JSON with error details) + 2 — running / pending + 100 — poll failure (transient error, caller should retry) + + Args: + config: Poll-batch configuration with job_id and target. + timeout: Timeout in seconds. + + Returns: + CLIResult with exit_code indicating job state. + """ + args = [ + "pollBatch", + config.job_id, + f"--target={config.target}", + ] + + return self._execute(args, timeout=timeout) + def _execute_streaming( self, cmd: list[str], diff --git a/joshpy/jobs.py b/joshpy/jobs.py index 2484035..3070a9a 100644 --- a/joshpy/jobs.py +++ b/joshpy/jobs.py @@ -1336,6 +1336,57 @@ def to_run_remote_config( ) +def to_batch_remote_config( + job: ExpandedJob, + target: str, + minio_prefix: str, + *, + no_wait: bool = False, + poll_interval: int | None = None, + timeout: int | None = None, + require_prestaged: bool = True, +) -> BatchRemoteConfig: + """Convert an ExpandedJob to a BatchRemoteConfig for batch remote execution. + + Inputs must already be staged at ``minio_prefix`` (guarded by a + ``.josh-staged.json`` sentinel). Use :func:`assemble_batch_workdir` + + :func:`cli.stage_to_minio` before dispatching. + + Args: + job: The expanded job to convert. + target: Target profile name (e.g. ``"gke-test"``). + minio_prefix: Pre-staged MinIO prefix (e.g. ``sweeps//jobs//``). + no_wait: If True, dispatch and return immediately. + poll_interval: Polling interval in seconds (optional). + timeout: Job timeout in seconds (optional). + require_prestaged: If True (default), JAR will fail fast unless the + prefix sentinel reports ``complete``. + + Returns: + BatchRemoteConfig ready for use with JoshCLI.batch_remote(). + + Raises: + ValueError: If job.source_path is None. + """ + from joshpy.cli import BatchRemoteConfig + + if job.source_path is None: + raise ValueError( + "ExpandedJob.source_path is required for to_batch_remote_config()" + ) + + return BatchRemoteConfig( + simulation=job.simulation, + target=target, + minio_prefix=minio_prefix, + replicates=job.replicates, + no_wait=no_wait, + poll_interval=poll_interval, + timeout=timeout, + require_prestaged=require_prestaged, + ) + + @dataclass class SweepResult: """Results from running a parameter sweep. @@ -1625,6 +1676,12 @@ def run_sweep( remote: bool = False, api_key: str | None = None, endpoint: str | None = None, + batch_remote: bool = False, + target: str | None = None, + batch_no_wait: bool = False, + poll_interval: int = 10, + batch_timeout: int | None = None, + auto_ingest: bool = True, on_complete: Callable[[ExpandedJob, Any], None] | None = None, stop_on_failure: bool = True, dry_run: bool = False, @@ -1656,6 +1713,16 @@ def run_sweep( remote: If True, use run_remote() for cloud execution. api_key: Josh Cloud API key (required if remote=True). endpoint: Custom Josh Cloud endpoint URL. + batch_remote: If True, use batch_remote() for MinIO-staged execution. + Mutually exclusive with ``remote``. + target: Target profile name (required if batch_remote=True). + batch_no_wait: If True, dispatch all jobs with ``--no-wait`` then poll + for completion (async mode). If False (default), each job blocks + until the JAR finishes polling internally (blocking mode). + poll_interval: Seconds between poll attempts in async mode (default: 10). + batch_timeout: Overall timeout in seconds per job for async polling. + auto_ingest: If True (default), call ``ingest_results()`` after each + successful batch job to load CSVs from S3 into the registry. on_complete: Optional callback invoked after each job completes. Signature: callback(job, result) -> None. Called after registry recording (if enabled). Use for progress reporting, logging, etc. @@ -1722,6 +1789,10 @@ def run_sweep( if registry is not None and session_id is None: raise ValueError("session_id is required when registry is provided") + if batch_remote and remote: + raise ValueError("batch_remote and remote are mutually exclusive") + if batch_remote and target is None: + raise ValueError("target is required when batch_remote=True") # Setup registry callback if registry provided registry_callback: RegistryCallback | None = None @@ -1764,20 +1835,80 @@ def run_sweep( _bottled_success = False _bottle_collect: list[tuple[ExpandedJob, Any]] = [] + # Async batch remote tracking: job_id -> (job, dispatch_result) + _async_dispatched: dict[str, tuple[ExpandedJob, Any]] = {} + + # Per-sweep staging root for batch remote mode. One tempdir per sweep, one + # subdir per ExpandedJob. Cleaned up in the finally below. + sweep_workdir: Path | None = None + if batch_remote: + import tempfile + + sweep_workdir = Path( + tempfile.mkdtemp(prefix=f"joshpy-sweep-{session_id or 'adhoc'}-") + ) + try: for i, job in enumerate(job_set): if not quiet: - mode = "remote" if remote else "local" + mode = "batch-remote" if batch_remote else ("remote" if remote else "local") print(f"[{i + 1}/{total_jobs}] Running ({mode}): {job.parameters}") job_jfr = _per_job_jfr(jfr, job.run_hash) if jfr else None - if remote: + if batch_remote: + from joshpy.batch_orchestrator import assemble_batch_workdir + from joshpy.cli import StageToMinioConfig + + assert target is not None # validated above + assert sweep_workdir is not None # allocated when batch_remote=True + + job_dir = assemble_batch_workdir(job, sweep_workdir) + per_job_prefix = ( + f"sweeps/{session_id or 'adhoc'}/jobs/{job.run_hash}/" + ) + stage_result = cli.stage_to_minio( + StageToMinioConfig(input_dir=job_dir, prefix=per_job_prefix), + ) + if not stage_result.success: + result = stage_result + else: + br_config = to_batch_remote_config( + job, target, per_job_prefix, + no_wait=batch_no_wait, + poll_interval=poll_interval if batch_no_wait else None, + timeout=batch_timeout, + require_prestaged=True, + ) + result = cli.batch_remote( + br_config, jfr=job_jfr, stream_output=stream_output, + ) + elif remote: run_config = to_run_remote_config(job, api_key=api_key, endpoint=endpoint) result = cli.run_remote(run_config, jfr=job_jfr, stream_output=stream_output) else: run_config = to_run_config(job, enable_profiler=enable_profiler) result = cli.run(run_config, jfr=job_jfr, stream_output=stream_output) + # Async batch dispatch: parse job_id, defer result tracking + if batch_no_wait and batch_remote and result.success: + import json as _json + + try: + dispatch_data = _json.loads(result.stdout.strip()) + job_id = dispatch_data["jobId"] + _async_dispatched[job_id] = (job, result) + if not quiet: + print(f" [DISPATCHED] job_id={job_id}") + # Store batch metadata in registry + if registry_callback is not None: + run_id = registry_callback.record(job, result) + run_ids[job.run_hash] = run_id + continue # skip normal result handling + except (ValueError, KeyError) as e: + if not quiet: + print(f" [WARN] Could not parse --no-wait output: {e}") + # Fall through to normal result handling + job_results.append((job, result)) if result.success: @@ -1864,6 +1995,75 @@ def run_sweep( succeeded_before=succeeded, ) + # ----------------------------------------------------------- + # Async batch polling: wait for all dispatched jobs + # ----------------------------------------------------------- + if _async_dispatched: + import time + + from joshpy.cli import CLIResult, PollBatchConfig + + assert target is not None # validated above + remaining = set(_async_dispatched.keys()) + + if not quiet: + print(f"Polling {len(remaining)} dispatched jobs...") + + poll_start = time.monotonic() + while remaining: + if batch_timeout is not None: + elapsed = time.monotonic() - poll_start + if elapsed > batch_timeout: + for job_id in list(remaining): + job, _ = _async_dispatched[job_id] + timeout_result = CLIResult( + success=False, + exit_code=-1, + stdout="", + stderr=f"Batch job {job_id} timed out after {batch_timeout}s", + command=[], + ) + job_results.append((job, timeout_result)) + failed += 1 + remaining.discard(job_id) + break + + for job_id in list(remaining): + job, dispatch_result = _async_dispatched[job_id] + poll_result = cli.poll_batch( + PollBatchConfig(job_id=job_id, target=target), + ) + + if poll_result.exit_code == 0: + # Complete + remaining.discard(job_id) + job_results.append((job, poll_result)) + succeeded += 1 + if not quiet: + print(f" [COMPLETE] {job_id}") + elif poll_result.exit_code == 1: + # Error + remaining.discard(job_id) + job_results.append((job, poll_result)) + failed += 1 + if not quiet: + print(f" [ERROR] {job_id}") + if stop_on_failure: + if manage_status and registry is not None and session_id is not None: + registry.update_session_status(session_id, "failed") + raise SweepExecutionError( + job=job, + result=poll_result, + trial_num=0, + succeeded_before=succeeded, + ) + elif poll_result.exit_code in (2, 100): + # Running/pending or transient poll failure — keep waiting + pass + + if remaining: + time.sleep(poll_interval) + if not quiet: print(f"Completed: {succeeded} succeeded, {failed} failed") @@ -1900,3 +2100,8 @@ def run_sweep( if manage_status and registry is not None and session_id is not None: registry.update_session_status(session_id, "failed") raise + finally: + if sweep_workdir is not None: + import shutil + + shutil.rmtree(sweep_workdir, ignore_errors=True) diff --git a/joshpy/registry.py b/joshpy/registry.py index cda9f87..cc50594 100644 --- a/joshpy/registry.py +++ b/joshpy/registry.py @@ -72,6 +72,48 @@ def _check_duckdb() -> None: ) +def configure_s3( + conn: Any, + endpoint: str, + access_key: str, + secret_key: str, + url_style: str = "path", + use_ssl: bool = True, +) -> None: + """Configure a DuckDB connection for S3/MinIO access via httpfs. + + Installs and loads the httpfs extension, then creates an S3 secret + so ``read_csv_auto('s3://bucket/key.csv')`` works transparently. + + Credential resolution is the caller's responsibility -- this function + takes explicit values. ``ingest_results()`` resolves credentials from + environment variables (``MINIO_ENDPOINT``, ``MINIO_ACCESS_KEY``, + ``MINIO_SECRET_KEY``) before calling here. + + Args: + conn: DuckDB connection object. + endpoint: S3-compatible endpoint (e.g. ``"storage.googleapis.com"``). + access_key: Access key / key ID. + secret_key: Secret key. + url_style: ``"path"`` (default, MinIO) or ``"vhost"`` (AWS). + use_ssl: Use HTTPS (default True). + """ + conn.execute("INSTALL httpfs; LOAD httpfs;") + conn.execute( + """ + CREATE OR REPLACE SECRET ( + TYPE s3, + KEY_ID ?, + SECRET ?, + ENDPOINT ?, + URL_STYLE ?, + USE_SSL ? + ) + """, + [access_key, secret_key, endpoint, url_style, use_ssl], + ) + + def _get_git_hash() -> str | None: """Get current git HEAD hash, or None if not in a git repo.""" try: diff --git a/joshpy/strategies.py b/joshpy/strategies.py index de0abce..f4fba5b 100644 --- a/joshpy/strategies.py +++ b/joshpy/strategies.py @@ -39,6 +39,7 @@ from abc import ABC, abstractmethod from collections.abc import Callable, Iterator from dataclasses import dataclass, field +from pathlib import Path from typing import TYPE_CHECKING, Any if TYPE_CHECKING: @@ -785,6 +786,12 @@ def run_adaptive_sweep( remote: bool = False, api_key: str | None = None, endpoint: str | None = None, + batch_remote: bool = False, + target: str | None = None, + batch_no_wait: bool = False, + poll_interval: int = 10, + batch_timeout: int | None = None, + auto_ingest: bool = True, quiet: bool = False, load_config: Any | None = None, # LoadConfig stop_on_failure: bool = True, @@ -929,6 +936,15 @@ def run_adaptive_sweep( # Set session status to running registry.update_session_status(session_id, "running") + # Per-sweep staging root for batch remote mode. Cleaned up in finally. + sweep_workdir: Path | None = None + if batch_remote: + import tempfile + + sweep_workdir = Path( + tempfile.mkdtemp(prefix=f"joshpy-adaptive-{session_id}-") + ) + try: for trial_num in range(n_trials): # 1. Ask Optuna for next params @@ -970,7 +986,34 @@ def run_adaptive_sweep( # 5. Execute CLI job_jfr = _per_job_jfr(jfr, job.run_hash) if jfr else None - if remote: + if batch_remote: + from joshpy.batch_orchestrator import assemble_batch_workdir + from joshpy.cli import StageToMinioConfig + from joshpy.jobs import to_batch_remote_config + + assert target is not None + assert sweep_workdir is not None + + job_dir = assemble_batch_workdir(job, sweep_workdir) + per_job_prefix = ( + f"sweeps/{session_id}/jobs/{job.run_hash}/" + ) + stage_result = cli.stage_to_minio( + StageToMinioConfig(input_dir=job_dir, prefix=per_job_prefix), + ) + if not stage_result.success: + result = stage_result + else: + br_config = to_batch_remote_config( + job, target, per_job_prefix, + no_wait=False, # adaptive sweeps always block + timeout=batch_timeout, + require_prestaged=True, + ) + result = cli.batch_remote( + br_config, jfr=job_jfr, stream_output=stream_output, + ) + elif remote: run_config = to_run_remote_config(job, api_key=api_key, endpoint=endpoint) result = cli.run_remote(run_config, jfr=job_jfr, stream_output=stream_output) else: @@ -1105,6 +1148,11 @@ def run_adaptive_sweep( # Set status to failed on exception registry.update_session_status(session_id, "failed") raise + finally: + if sweep_workdir is not None: + import shutil + + shutil.rmtree(sweep_workdir, ignore_errors=True) def _create_single_job( diff --git a/joshpy/sweep.py b/joshpy/sweep.py index 6d7eba7..e116d6f 100644 --- a/joshpy/sweep.py +++ b/joshpy/sweep.py @@ -37,6 +37,8 @@ from __future__ import annotations +import os +import tempfile import time from collections.abc import Callable from dataclasses import dataclass, field @@ -46,7 +48,7 @@ import pandas as pd from joshpy.cell_data import CellDataLoader, DiagnosticQueries -from joshpy.cli import ExportPaths, InspectExportsConfig, JoshCLI +from joshpy.cli import ExportPaths, InspectExportsConfig, JoshCLI, StageFromMinioConfig from joshpy.jobs import ( ExpandedJob, JobConfig, @@ -55,7 +57,7 @@ SweepResult, run_sweep, ) -from joshpy.registry import RunRegistry +from joshpy.registry import RunRegistry, configure_s3 @dataclass @@ -421,6 +423,316 @@ def _get_export_path(export_paths: ExportPaths, export_type: str) -> str | None: raise ValueError(f"Unknown export_type: {export_type}. Use 'patch', 'meta', or 'entity'.") +@dataclass +class _IngestMetadata: + """Resolved metadata for an ingest operation.""" + + run_hash: str + config: Any + simulation: str + total_replicates: int + label: str | None + + +def _resolve_ingest_metadata( + registry: RunRegistry, + label_or_hash: str, + *, + quiet: bool = False, +) -> _IngestMetadata: + """Resolve a label or hash to the run metadata needed for ingestion.""" + run_hash = registry._resolve_label_or_hash(label_or_hash) + config = registry.get_config_by_hash(run_hash) + if config is None: + raise KeyError(f"No config found for run hash: {run_hash}") + + session = registry.get_session(config.session_id) + if session is None: + raise KeyError(f"No session found: {config.session_id}") + + simulation = session.simulation + + # Determine replicate count: session metadata > job_runs count > fallback to 1 + total_replicates = session.total_replicates + if not total_replicates: + job_config = session.job_config + if job_config is not None: + total_replicates = getattr(job_config, "replicates", None) + if not total_replicates: + runs = registry.get_runs_for_hash(run_hash) + total_replicates = len(runs) if runs else 1 + + if not quiet: + label_str = f" ({config.label})" if config.label else "" + print(f"Ingesting results for {run_hash}{label_str}") + print(f" Simulation: {simulation}, Replicates: {total_replicates}") + + return _IngestMetadata( + run_hash=run_hash, + config=config, + simulation=simulation, + total_replicates=total_replicates, + label=config.label, + ) + + +def _get_josh_source(config: Any, run_hash: str) -> tuple[Path, str | None]: + """Get josh source file on disk, creating a temp file if needed. + + Returns: + ``(josh_path, temp_file_path_or_None)``. Caller must clean up + the temp file when non-None. + """ + if config.josh_path and Path(config.josh_path).exists(): + return Path(config.josh_path), None + + if config.josh_content: + fd, temp_path = tempfile.mkstemp(suffix=".josh") + os.close(fd) + Path(temp_path).write_text(config.josh_content) + return Path(temp_path), temp_path + + raise RuntimeError( + f"Cannot inspect exports: no josh source available for {run_hash}. " + "Neither josh_path exists on disk nor josh_content stored in registry." + ) + + +def _configure_minio_access( + cli: JoshCLI, + registry: RunRegistry, + export_info: Any, + path_template: str, + *, + download: bool, + output_dir: Path | None, + minio_bucket: str | None, + quiet: bool, +) -> tuple[str, Path | None]: + """Configure S3 direct read or download from MinIO. + + Returns: + ``(bucket_name, download_dir_or_None)``. + """ + bucket = minio_bucket or export_info.host + + if not download: + endpoint = os.environ.get("MINIO_ENDPOINT", "") + access_key = os.environ.get("MINIO_ACCESS_KEY", "") + secret_key = os.environ.get("MINIO_SECRET_KEY", "") + + if not endpoint or not access_key or not secret_key: + raise RuntimeError( + "MINIO_ENDPOINT, MINIO_ACCESS_KEY, and MINIO_SECRET_KEY " + "environment variables are required for S3 reads." + ) + + configure_s3(registry.conn, endpoint, access_key, secret_key) + + if not quiet: + print(f" Reading directly from S3 (bucket: {bucket})") + + return bucket, None + + # download=True: stage files locally via stageFromMinio + prefix = str(Path(path_template).parent).lstrip("/") + if prefix and not prefix.endswith("/"): + prefix += "/" + + dl_dir = Path(output_dir) if output_dir else Path(tempfile.mkdtemp(prefix="joshpy-ingest-")) + dl_dir.mkdir(parents=True, exist_ok=True) + + if not quiet: + print(f" Downloading from minio://{bucket}/{prefix} to {dl_dir}") + + stage_result = cli.stage_from_minio( + StageFromMinioConfig( + output_dir=dl_dir, + prefix=prefix, + minio_bucket=bucket, + ) + ) + if not stage_result.success: + raise RuntimeError( + f"stageFromMinio failed (exit {stage_result.exit_code}): " + f"{stage_result.stderr}" + ) + + return bucket, dl_dir + + +def _load_ingest_replicates( + registry: RunRegistry, + export_paths: ExportPaths, + path_template: str, + *, + is_minio: bool, + download: bool, + bucket: str, + dl_dir: Path | None, + meta: _IngestMetadata, + run_id: str, + export_type: str, + quiet: bool, +) -> int: + """Load CSVs for each replicate into the registry.""" + loader = CellDataLoader(registry) + total_rows = 0 + loaded = 0 + skipped = 0 + + template_vars_base: dict[str, Any] = {"simulation": meta.simulation} + if meta.config.parameters: + template_vars_base.update(meta.config.parameters) + if meta.label: + template_vars_base["label"] = meta.label + + for rep in range(meta.total_replicates): + template_vars = {**template_vars_base, "replicate": rep} + + try: + resolved = export_paths.resolve_path(path_template, **template_vars) + except KeyError: + if not quiet: + print(f" Replicate {rep}: template variable missing, skipping") + skipped += 1 + continue + + # Determine the actual path/URL to load + if is_minio and not download: + resolved_path = resolved.as_posix().lstrip("/") + csv_target: Path | str = f"s3://{bucket}/{resolved_path}" + elif is_minio and download: + csv_target = dl_dir / resolved.name + else: + csv_target = resolved + + # Try loading -- skip gracefully if missing + try: + rows = loader.load_csv( + csv_path=csv_target, + run_id=run_id, + run_hash=meta.run_hash, + entity_type=export_type, + ) + total_rows += rows + loaded += 1 + if not quiet: + print(f" Replicate {rep}: {rows:,} rows loaded") + except FileNotFoundError: + skipped += 1 + if not quiet: + print(f" Replicate {rep}: not found, skipping") + except Exception as e: + skipped += 1 + if not quiet: + err_str = str(e) + # DuckDB raises IOException for missing S3 objects + if "HTTP 404" in err_str or "NoSuchKey" in err_str: + print(f" Replicate {rep}: not found in S3, skipping") + else: + print(f" Replicate {rep}: error loading: {e}") + + if not quiet: + print(f"\nDone: {total_rows:,} rows loaded ({loaded} replicates, {skipped} skipped)") + + return total_rows + + +def ingest_results( + cli: JoshCLI, + registry: RunRegistry, + label_or_hash: str, + *, + export_type: str = "patch", + download: bool = False, + output_dir: Path | None = None, + minio_bucket: str | None = None, + quiet: bool = False, +) -> int: + """Recover and ingest results into the registry by label or run hash. + + Looks up the run in the registry, discovers export paths via + ``inspect_exports``, and loads CSVs into the ``cell_data`` table. + + For ``minio://`` export paths the default behaviour reads CSVs directly + from S3 into DuckDB via ``httpfs`` (no local download). Set + ``download=True`` to download via ``stageFromMinio`` first. + + Missing CSVs (e.g. from an OOM'd replicate) are skipped gracefully. + + Args: + cli: JoshCLI instance. + registry: RunRegistry where results will be loaded. + label_or_hash: Human-readable label or 12-char run hash. + export_type: Type of export to load (``"patch"``, ``"meta"``, ``"entity"``). + download: If True, download CSVs locally via ``stageFromMinio`` + instead of reading directly from S3. + output_dir: Local directory for downloads (temp dir if None). + Only used when ``download=True``. + minio_bucket: Override the MinIO bucket (default: parsed from + the ``minio://`` export path). + quiet: Suppress progress output. + + Returns: + Total number of rows loaded. + + Raises: + KeyError: If label/hash not found in registry. + RuntimeError: If no export path configured for *export_type*, or + if ``inspect_exports`` fails. + + Examples: + >>> # Recover results for a labeled run (reads from S3) + >>> rows = ingest_results(cli, registry, "my-label") + + >>> # Download locally first, then load + >>> rows = ingest_results(cli, registry, "my-label", download=True) + """ + meta = _resolve_ingest_metadata(registry, label_or_hash, quiet=quiet) + josh_path, temp_josh = _get_josh_source(meta.config, meta.run_hash) + + try: + export_paths = cli.inspect_exports( + InspectExportsConfig(script=josh_path, simulation=meta.simulation) + ) + + export_info = export_paths.export_files.get(export_type) + if export_info is None: + raise RuntimeError( + f"No {export_type} export configured in {josh_path}. " + f"Check that exportFiles.{export_type} is set in your simulation." + ) + + path_template = export_info.path + is_minio = export_info.protocol == "minio" + + if not quiet: + proto = f"minio://{export_info.host}" if is_minio else "local" + print(f" Export path: {proto}{path_template}") + + bucket: str = "" + dl_dir: Path | None = None + if is_minio: + bucket, dl_dir = _configure_minio_access( + cli, registry, export_info, path_template, + download=download, output_dir=output_dir, + minio_bucket=minio_bucket, quiet=quiet, + ) + + run_id = registry._resolve_run_id_for_hash(meta.run_hash) + + return _load_ingest_replicates( + registry, export_paths, path_template, + is_minio=is_minio, download=download, bucket=bucket, + dl_dir=dl_dir, meta=meta, run_id=run_id, + export_type=export_type, quiet=quiet, + ) + finally: + if temp_josh: + Path(temp_josh).unlink(missing_ok=True) + + @dataclass class SweepManager: """Convenience orchestrator for parameter sweeps. @@ -530,6 +842,12 @@ def run( remote: bool = False, api_key: str | None = None, endpoint: str | None = None, + batch_remote: bool = False, + target: str | None = None, + batch_no_wait: bool = False, + poll_interval: int = 10, + batch_timeout: int | None = None, + auto_ingest: bool = True, stop_on_failure: bool = True, dry_run: bool = False, quiet: bool = False, @@ -619,6 +937,12 @@ def run( remote=remote, api_key=api_key, endpoint=endpoint, + batch_remote=batch_remote, + target=target, + batch_no_wait=batch_no_wait, + poll_interval=poll_interval, + batch_timeout=batch_timeout, + auto_ingest=auto_ingest, quiet=quiet, stop_on_failure=stop_on_failure, jfr=jfr, @@ -635,6 +959,12 @@ def run( remote=remote, api_key=api_key, endpoint=endpoint, + batch_remote=batch_remote, + target=target, + batch_no_wait=batch_no_wait, + poll_interval=poll_interval, + batch_timeout=batch_timeout, + auto_ingest=auto_ingest, on_complete=on_complete, stop_on_failure=stop_on_failure, dry_run=dry_run, @@ -698,6 +1028,49 @@ def load_results( quiet=quiet, ) + def ingest( + self, + *, + export_type: str = "patch", + download: bool = False, + output_dir: Path | None = None, + minio_bucket: str | None = None, + quiet: bool = False, + ) -> int: + """Recover and ingest results from MinIO (or local) by label. + + Uses ``ingest_results()`` to look up the run by label, discover + export paths, and load CSVs into the registry. Unlike + ``load_results()`` this does not require a prior ``run()`` call -- + it works from the registry alone. + + Args: + export_type: Type of export to load ("patch", "meta", "entity"). + download: If True, download CSVs locally instead of S3 direct read. + output_dir: Download destination (only used with download=True). + minio_bucket: Override MinIO bucket name. + quiet: Suppress progress output. + + Returns: + Total number of rows loaded. + + Examples: + >>> manager.ingest() # reads directly from S3 + >>> manager.ingest(download=True, output_dir=Path("./local")) + """ + label = self._label if hasattr(self, "_label") and self._label else None + identifier = label or self.job_set.jobs[0].run_hash + return ingest_results( + cli=self.cli, + registry=self.registry, + label_or_hash=identifier, + export_type=export_type, + download=download, + output_dir=output_dir, + minio_bucket=minio_bucket, + quiet=quiet, + ) + def query( self, variable: str, diff --git a/joshpy/targets.py b/joshpy/targets.py new file mode 100644 index 0000000..3fd19a5 --- /dev/null +++ b/joshpy/targets.py @@ -0,0 +1,350 @@ +"""Target profile system for batch remote execution. + +Reads and writes ``~/.josh/targets/.json`` — the shared config format +between josh (Java) and joshpy (Python). Each profile defines a deployment +target (HTTP Cloud Run or Kubernetes) with connection info, MinIO/S3 +credentials, and resource settings. + +Example usage:: + + from joshpy.targets import ( + TargetProfile, HttpTargetConfig, KubernetesTargetConfig, + save_target, load_target, list_targets, resolve_minio_creds, + ) + + # Create and save an HTTP target + profile = TargetProfile( + target_type="http", + http=HttpTargetConfig(endpoint="https://josh.example.com"), + minio_endpoint="https://storage.googleapis.com", + minio_bucket="josh-results", + ) + save_target("cloud-dev", profile) + + # Load and resolve credentials + loaded = load_target("cloud-dev") + creds = resolve_minio_creds(loaded) +""" + +from __future__ import annotations + +import json +import os +import re +from dataclasses import dataclass, field, fields +from pathlib import Path +from typing import Any, Literal + +# --------------------------------------------------------------------------- +# Constants +# --------------------------------------------------------------------------- + +TARGETS_DIR: Path = Path.home() / ".josh" / "targets" + +_VALID_NAME = re.compile(r"^[a-zA-Z0-9_-]+$") + +TargetType = Literal["http", "kubernetes"] + +# Python field name -> JSON key (only where they differ) +_TO_JSON: dict[str, str] = { + "target_type": "type", + "api_key": "apiKey", + "timeout_seconds": "timeoutSeconds", + "ttl_seconds_after_finished": "ttlSecondsAfterFinished", +} +_FROM_JSON: dict[str, str] = {v: k for k, v in _TO_JSON.items()} + +# --------------------------------------------------------------------------- +# Dataclasses +# --------------------------------------------------------------------------- + + +@dataclass(frozen=True) +class HttpTargetConfig: + """HTTP target configuration (Cloud Run / standalone server). + + Attributes: + endpoint: Server URL (required). + api_key: Optional API key for authentication. + """ + + endpoint: str + api_key: str | None = None + + +@dataclass(frozen=True) +class KubernetesTargetConfig: + """Kubernetes target configuration (GKE / any k8s cluster). + + Attributes: + namespace: Kubernetes namespace for jobs (required). + image: Container image for simulation pods (required). + context: kubectl context name (None = current context). + pod_minio_endpoint: In-cluster MinIO endpoint pods use (may differ + from the host-side ``minio_endpoint`` on TargetProfile). + resources: K8s resource spec, e.g. + ``{"requests": {"cpu": "1", "memory": "2Gi"}, + "limits": {"memory": "4Gi"}}``. + parallelism: Max concurrent pods per job. + timeout_seconds: Job timeout in seconds. + ttl_seconds_after_finished: Auto-cleanup delay after job completes. + spot: Use preemptible / spot nodes. + """ + + namespace: str + image: str + context: str | None = None + pod_minio_endpoint: str | None = None + resources: dict[str, Any] = field(default_factory=dict) + parallelism: int | None = None + timeout_seconds: int | None = None + ttl_seconds_after_finished: int | None = None + spot: bool = False + + +@dataclass(frozen=True) +class TargetProfile: + """Top-level target profile container. + + Attributes: + target_type: ``"http"`` or ``"kubernetes"``. + http: HTTP config (required when target_type is ``"http"``). + kubernetes: K8s config (required when target_type is ``"kubernetes"``). + minio_endpoint: Host-side MinIO/S3 endpoint for staging. + minio_access_key: MinIO access key (prefer env vars for secrets). + minio_secret_key: MinIO secret key (prefer env vars for secrets). + minio_bucket: MinIO/GCS bucket name. + """ + + target_type: TargetType + http: HttpTargetConfig | None = None + kubernetes: KubernetesTargetConfig | None = None + minio_endpoint: str | None = None + minio_access_key: str | None = None + minio_secret_key: str | None = None + minio_bucket: str | None = None + + def __post_init__(self) -> None: + if self.target_type == "http" and self.http is None: + raise ValueError( + "http config required when target_type='http'" + ) + if self.target_type == "kubernetes" and self.kubernetes is None: + raise ValueError( + "kubernetes config required when target_type='kubernetes'" + ) + + +@dataclass(frozen=True) +class ResolvedMinioCreds: + """Fully resolved MinIO credentials (profile + env vars merged). + + Attributes: + endpoint: MinIO/S3 endpoint URL. + access_key: Access key. + secret_key: Secret key. + bucket: Bucket name. + """ + + endpoint: str | None = None + access_key: str | None = None + secret_key: str | None = None + bucket: str | None = None + + +# --------------------------------------------------------------------------- +# Serialization helpers +# --------------------------------------------------------------------------- + + +def _to_json_dict(obj: Any) -> dict[str, Any]: + """Convert a dataclass instance to a JSON-compatible dict. + + Renames fields via ``_TO_JSON``, omits ``None`` values, and preserves + nested dicts (like ``resources``) as-is. + """ + result: dict[str, Any] = {} + for f in fields(obj): + value = getattr(obj, f.name) + if value is None: + continue + # Skip false booleans only when they match the field default + if isinstance(value, bool) and not value and f.default is False: + continue + # Skip empty dicts when that is the default + if isinstance(value, dict) and not value: + continue + key = _TO_JSON.get(f.name, f.name) + result[key] = value + return result + + +def _from_json_dict(d: dict[str, Any]) -> dict[str, Any]: + """Rename JSON keys to Python field names via ``_FROM_JSON``.""" + return {_FROM_JSON.get(k, k): v for k, v in d.items()} + + +# --------------------------------------------------------------------------- +# Path helpers +# --------------------------------------------------------------------------- + + +def _profile_path(name: str) -> Path: + """Validate *name* and return ``TARGETS_DIR / f"{name}.json"``. + + Raises: + ValueError: If *name* contains characters outside ``[a-zA-Z0-9_-]``. + """ + if not _VALID_NAME.match(name): + raise ValueError( + f"Invalid target name {name!r}: " + "must match [a-zA-Z0-9_-]+ (no dots, slashes, or spaces)." + ) + return TARGETS_DIR / f"{name}.json" + + +# --------------------------------------------------------------------------- +# CRUD +# --------------------------------------------------------------------------- + + +def save_target(name: str, profile: TargetProfile) -> Path: + """Write a target profile to ``~/.josh/targets/.json``. + + Creates the directory if it does not exist. + + Args: + name: Profile name (alphanumeric, hyphens, underscores). + profile: Target profile to save. + + Returns: + Path to the written JSON file. + """ + path = _profile_path(name) + path.parent.mkdir(parents=True, exist_ok=True) + + data: dict[str, Any] = {"type": profile.target_type} + + if profile.http is not None: + data["http"] = _to_json_dict(profile.http) + if profile.kubernetes is not None: + data["kubernetes"] = _to_json_dict(profile.kubernetes) + + # Top-level MinIO fields (snake_case, matching Java format) + for minio_field in ("minio_endpoint", "minio_access_key", + "minio_secret_key", "minio_bucket"): + value = getattr(profile, minio_field) + if value is not None: + data[minio_field] = value + + path.write_text(json.dumps(data, indent=2) + "\n") + return path + + +def load_target(name: str) -> TargetProfile: + """Read a target profile from ``~/.josh/targets/.json``. + + Args: + name: Profile name. + + Returns: + Parsed :class:`TargetProfile`. + + Raises: + FileNotFoundError: If profile does not exist. + ValueError: If JSON is malformed or missing required fields. + """ + path = _profile_path(name) + raw = json.loads(path.read_text()) + + target_type = raw.get("type") + if target_type not in ("http", "kubernetes"): + raise ValueError( + f"Invalid or missing 'type' in {path}: got {target_type!r}, " + "expected 'http' or 'kubernetes'." + ) + + http_config = None + k8s_config = None + + if target_type == "http" and "http" in raw: + http_config = HttpTargetConfig(**_from_json_dict(raw["http"])) + elif target_type == "kubernetes" and "kubernetes" in raw: + k8s_config = KubernetesTargetConfig(**_from_json_dict(raw["kubernetes"])) + + return TargetProfile( + target_type=target_type, + http=http_config, + kubernetes=k8s_config, + minio_endpoint=raw.get("minio_endpoint"), + minio_access_key=raw.get("minio_access_key"), + minio_secret_key=raw.get("minio_secret_key"), + minio_bucket=raw.get("minio_bucket"), + ) + + +def list_targets() -> list[str]: + """List available target profile names. + + Returns: + Sorted list of profile names (without ``.json`` extension). + Empty list if ``~/.josh/targets/`` does not exist. + """ + if not TARGETS_DIR.is_dir(): + return [] + return sorted(p.stem for p in TARGETS_DIR.glob("*.json")) + + +def delete_target(name: str) -> None: + """Remove a target profile. + + Args: + name: Profile name. + + Raises: + FileNotFoundError: If profile does not exist. + """ + _profile_path(name).unlink() + + +# --------------------------------------------------------------------------- +# Credential resolution +# --------------------------------------------------------------------------- + + +def resolve_minio_creds( + target: TargetProfile | None = None, +) -> ResolvedMinioCreds: + """Resolve MinIO credentials from profile + environment variables. + + Hierarchy (per field): profile JSON field > environment variable. + + Environment variables checked: + ``MINIO_ENDPOINT``, ``MINIO_ACCESS_KEY``, ``MINIO_SECRET_KEY``, + ``MINIO_BUCKET``. + + Args: + target: Optional target profile with MinIO fields. + + Returns: + :class:`ResolvedMinioCreds` with merged credentials. Fields may + still be ``None`` if neither source provides a value. + """ + return ResolvedMinioCreds( + endpoint=( + (target.minio_endpoint if target else None) + or os.environ.get("MINIO_ENDPOINT") + ), + access_key=( + (target.minio_access_key if target else None) + or os.environ.get("MINIO_ACCESS_KEY") + ), + secret_key=( + (target.minio_secret_key if target else None) + or os.environ.get("MINIO_SECRET_KEY") + ), + bucket=( + (target.minio_bucket if target else None) + or os.environ.get("MINIO_BUCKET") + ), + ) diff --git a/pixi.toml b/pixi.toml index 3c29df6..db36599 100644 --- a/pixi.toml +++ b/pixi.toml @@ -49,7 +49,8 @@ dev = { features = ["dev"], solve-group = "default" } [tasks] install = "pip install -e '.[full]' --quiet" install-dev = "pip install -e '.[dev]' --quiet" -test = { cmd = "pytest tests/ -v", depends-on = ["install-dev"] } +test = { cmd = "pytest tests/ -v -m 'not integration'", depends-on = ["install-dev"] } +test-integration = { cmd = "pytest tests/ -v -m integration", depends-on = ["install-dev"] } lint = "ruff check joshpy/" typecheck = "mypy joshpy/" format = "ruff format joshpy/" diff --git a/pyproject.toml b/pyproject.toml index a06f90d..6bce7d1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -92,6 +92,9 @@ testpaths = ["tests"] python_files = ["test_*.py"] python_functions = ["test_*"] addopts = "-v --tb=short" +markers = [ + "integration: marks tests requiring external services (MinIO)", +] [tool.mypy] python_version = "3.10" diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..3daf8d7 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,186 @@ +"""Shared fixtures and configuration for joshpy tests.""" + +from __future__ import annotations + +import tempfile +from pathlib import Path + +import pytest + +# --------------------------------------------------------------------------- +# Pytest marker registration +# --------------------------------------------------------------------------- + + +def pytest_configure(config): + config.addinivalue_line( + "markers", + "integration: marks tests requiring external services (MinIO)", + ) + + +# --------------------------------------------------------------------------- +# MinIO integration test constants (bitnami test image defaults) +# --------------------------------------------------------------------------- + +MINIO_ENDPOINT = "localhost:9000" +MINIO_ACCESS_KEY = "minioadmin" +MINIO_SECRET_KEY = "minioadmin" +TEST_BUCKET = "josh-test-bucket" + + +# --------------------------------------------------------------------------- +# Session-scoped guards — skip the entire suite when infra is missing +# --------------------------------------------------------------------------- + + +@pytest.fixture(scope="session") +def minio_available(): + """Skip if the MinIO test container is not reachable.""" + import requests + + try: + resp = requests.get( + f"http://{MINIO_ENDPOINT}/minio/health/ready", timeout=3 + ) + if resp.status_code != 200: + pytest.skip(f"MinIO not ready (HTTP {resp.status_code})") + except requests.ConnectionError: + pytest.skip("MinIO not available at localhost:9000") + + +@pytest.fixture(scope="session") +def jar_available(): + """Skip if the Josh JAR has not been downloaded.""" + from joshpy.jar import JarManager, JarMode + + manager = JarManager() + try: + manager.get_jar(JarMode.DEV, auto_download=False) + except FileNotFoundError: + pytest.skip( + "Josh JAR not found — run `pixi run get-jars` first" + ) + + +# --------------------------------------------------------------------------- +# Bucket name +# --------------------------------------------------------------------------- + + +@pytest.fixture(scope="session") +def test_bucket(): + return TEST_BUCKET + + +# --------------------------------------------------------------------------- +# DuckDB connection with S3 configured for the test MinIO +# --------------------------------------------------------------------------- + + +@pytest.fixture +def minio_conn(minio_available): + """Fresh DuckDB connection with httpfs configured for test MinIO.""" + import duckdb + from joshpy.registry import configure_s3 + + conn = duckdb.connect(":memory:") + configure_s3( + conn, + endpoint=MINIO_ENDPOINT, + access_key=MINIO_ACCESS_KEY, + secret_key=MINIO_SECRET_KEY, + use_ssl=False, + ) + yield conn + conn.close() + + +# --------------------------------------------------------------------------- +# RunRegistry with S3 pre-configured +# --------------------------------------------------------------------------- + + +@pytest.fixture +def minio_registry(minio_available): + """In-memory RunRegistry whose DuckDB connection can read S3.""" + from joshpy.registry import RunRegistry, configure_s3 + + registry = RunRegistry(":memory:") + configure_s3( + registry.conn, + endpoint=MINIO_ENDPOINT, + access_key=MINIO_ACCESS_KEY, + secret_key=MINIO_SECRET_KEY, + use_ssl=False, + ) + yield registry + registry.close() + + +# --------------------------------------------------------------------------- +# CSV seeding helper — writes to MinIO via DuckDB COPY +# --------------------------------------------------------------------------- + + +@pytest.fixture +def seed_csv(minio_conn, test_bucket): + """Return a callable that writes CSV content to MinIO. + + Usage:: + + url = seed_csv("level1/test.csv", "step,replicate,val\\n0,0,1.0\\n") + """ + cleanup: list[str] = [] + + def _seed(key: str, csv_content: str) -> str: + s3_url = f"s3://{test_bucket}/{key}" + with tempfile.NamedTemporaryFile( + mode="w", suffix=".csv", delete=False + ) as f: + f.write(csv_content) + local_path = f.name + try: + minio_conn.execute( + f"COPY (SELECT * FROM read_csv_auto('{local_path}')) " + f"TO '{s3_url}' (FORMAT CSV, HEADER)" + ) + finally: + Path(local_path).unlink(missing_ok=True) + cleanup.append(s3_url) + return s3_url + + yield _seed + + +# --------------------------------------------------------------------------- +# Real JoshCLI (session-scoped — JAR doesn't change) +# --------------------------------------------------------------------------- + + +@pytest.fixture(scope="session") +def josh_cli(jar_available): + """JoshCLI backed by the real downloaded JAR.""" + from joshpy.cli import JoshCLI + from joshpy.jar import JarMode + + return JoshCLI(josh_jar=JarMode.DEV) + + +# --------------------------------------------------------------------------- +# Monkeypatch for configure_s3 → use_ssl=False +# (needed by ingest_results which calls configure_s3 without use_ssl kwarg) +# --------------------------------------------------------------------------- + + +@pytest.fixture +def patch_s3_no_ssl(monkeypatch): + """Patch configure_s3 in the sweep module so it uses use_ssl=False.""" + from joshpy.registry import configure_s3 as real_configure_s3 + + def _no_ssl(conn, endpoint, access_key, secret_key, **kwargs): + real_configure_s3( + conn, endpoint, access_key, secret_key, use_ssl=False + ) + + monkeypatch.setattr("joshpy.sweep.configure_s3", _no_ssl) diff --git a/tests/fixtures/e2e_batch.josh b/tests/fixtures/e2e_batch.josh new file mode 100644 index 0000000..eea36ff --- /dev/null +++ b/tests/fixtures/e2e_batch.josh @@ -0,0 +1,42 @@ +# E2E batch remote test simulation. +# Tiny grid, 5 timesteps — completes in seconds on GKE. +# Writes CSV results to GCS via minio:// protocol. + +start simulation Main + + grid.size = 1000 m + grid.low = 33.7 degrees latitude, -115.4 degrees longitude + grid.high = 34.0 degrees latitude, -116.4 degrees longitude + grid.patch = "Default" + + steps.low = 0 count + steps.high = 5 count + + exportFiles.patch = "minio://josh-batch-storage/e2e-test/pr5_output_{replicate}.csv" + +end simulation + +start patch Default + + ForeverTree.init = create 5 count of ForeverTree + + export.treeCount.step = count(ForeverTree) + export.averageHeight.step = mean(ForeverTree.height) + +end patch + +start organism ForeverTree + + age.init = 0 year + age.step = prior.age + 1 year + + height.init = 0 meters + height.step = prior.height + sample uniform from 0 meters to 1 meters + +end organism + +start unit year + + alias years + +end unit diff --git a/tests/fixtures/minio_export.josh b/tests/fixtures/minio_export.josh new file mode 100644 index 0000000..6ae6c27 --- /dev/null +++ b/tests/fixtures/minio_export.josh @@ -0,0 +1,42 @@ +# Minimal simulation for MinIO integration tests. +# Writes CSV results to minio://josh-test-bucket/results/output_{replicate}.csv +# Tiny grid, 5 timesteps — completes in seconds. + +start simulation Main + + grid.size = 1000 m + grid.low = 33.7 degrees latitude, -115.4 degrees longitude + grid.high = 34.0 degrees latitude, -116.4 degrees longitude + grid.patch = "Default" + + steps.low = 0 count + steps.high = 5 count + + exportFiles.patch = "minio://josh-test-bucket/results/output_{replicate}.csv" + +end simulation + +start patch Default + + ForeverTree.init = create 5 count of ForeverTree + + export.treeCount.step = count(ForeverTree) + export.averageHeight.step = mean(ForeverTree.height) + +end patch + +start organism ForeverTree + + age.init = 0 year + age.step = prior.age + 1 year + + height.init = 0 meters + height.step = prior.height + sample uniform from 0 meters to 1 meters + +end organism + +start unit year + + alias years + +end unit diff --git a/tests/test_batch_orchestrator.py b/tests/test_batch_orchestrator.py new file mode 100644 index 0000000..fe85ca8 --- /dev/null +++ b/tests/test_batch_orchestrator.py @@ -0,0 +1,166 @@ +"""Tests for joshpy.batch_orchestrator.assemble_batch_workdir().""" + +import tempfile +import unittest +from pathlib import Path + +from joshpy.batch_orchestrator import assemble_batch_workdir +from joshpy.jobs import ExpandedJob + + +def _make_job( + tmp: Path, + *, + run_hash: str = "abc123def456", + file_mappings: dict[str, Path] | None = None, + source_path: Path | None = None, +) -> ExpandedJob: + """Build a minimal ExpandedJob for orchestrator tests.""" + if source_path is None: + source_path = tmp / "model.josh" + source_path.write_text("start simulation Main\nend simulation\n") + + config_path = tmp / "config.jshc" + config_path.write_text("rendered_jshc_placeholder") + + return ExpandedJob( + config_content="rendered_jshc_placeholder", + config_path=config_path, + config_name="sweep", + run_hash=run_hash, + parameters={"p": 1}, + simulation="Main", + replicates=1, + source_path=source_path, + file_mappings=file_mappings or {}, + ) + + +class TestAssembleBatchWorkdir(unittest.TestCase): + def test_creates_per_run_hash_subdir(self): + with tempfile.TemporaryDirectory() as tmp_str: + tmp = Path(tmp_str) + workdir = tmp / "work" + workdir.mkdir() + job = _make_job(tmp) + + result = assemble_batch_workdir(job, workdir) + + self.assertEqual(result, workdir / job.run_hash) + self.assertTrue(result.is_dir()) + + def test_sim_josh_symlink_targets_source_path(self): + with tempfile.TemporaryDirectory() as tmp_str: + tmp = Path(tmp_str) + workdir = tmp / "work" + workdir.mkdir() + job = _make_job(tmp) + + target = assemble_batch_workdir(job, workdir) + + sim_link = target / "sim.josh" + self.assertTrue(sim_link.is_symlink()) + self.assertEqual(sim_link.resolve(), job.source_path.resolve()) + + def test_config_jshc_written_from_config_content(self): + with tempfile.TemporaryDirectory() as tmp_str: + tmp = Path(tmp_str) + workdir = tmp / "work" + workdir.mkdir() + job = _make_job(tmp) + + target = assemble_batch_workdir(job, workdir) + + jshc = target / "config.jshc" + self.assertTrue(jshc.exists()) + self.assertFalse(jshc.is_symlink()) + self.assertEqual(jshc.read_text(), "rendered_jshc_placeholder") + + def test_file_mappings_become_symlinks_with_jshd_suffix(self): + with tempfile.TemporaryDirectory() as tmp_str: + tmp = Path(tmp_str) + workdir = tmp / "work" + workdir.mkdir() + + climate = tmp / "climate.jshd" + climate.write_bytes(b"CLIMATE_DATA") + cover = tmp / "cover_raw" # no suffix; should get .jshd appended + cover.write_bytes(b"COVER_DATA") + + job = _make_job( + tmp, + file_mappings={ + "climate": climate, # no ext in mapping key, has suffix in dest + "cover": cover, + }, + ) + + target = assemble_batch_workdir(job, workdir) + + self.assertTrue((target / "climate.jshd").is_symlink()) + self.assertEqual( + (target / "climate.jshd").resolve(), climate.resolve(), + ) + self.assertTrue((target / "cover.jshd").is_symlink()) + self.assertEqual( + (target / "cover.jshd").resolve(), cover.resolve(), + ) + + def test_file_mapping_key_with_jshd_suffix_is_not_doubled(self): + with tempfile.TemporaryDirectory() as tmp_str: + tmp = Path(tmp_str) + workdir = tmp / "work" + workdir.mkdir() + + data = tmp / "temp.jshd" + data.write_bytes(b"TEMP") + + job = _make_job(tmp, file_mappings={"temp.jshd": data}) + target = assemble_batch_workdir(job, workdir) + + self.assertTrue((target / "temp.jshd").is_symlink()) + self.assertFalse((target / "temp.jshd.jshd").exists()) + + def test_raises_when_source_path_none(self): + with tempfile.TemporaryDirectory() as tmp_str: + tmp = Path(tmp_str) + workdir = tmp / "work" + workdir.mkdir() + + job = ExpandedJob( + config_content="x", + config_path=tmp / "c.jshc", + config_name="sweep", + run_hash="abc", + parameters={}, + simulation="Main", + replicates=1, + source_path=None, + ) + with self.assertRaises(ValueError): + assemble_batch_workdir(job, workdir) + + def test_idempotent(self): + """Re-running replaces existing entries without erroring.""" + with tempfile.TemporaryDirectory() as tmp_str: + tmp = Path(tmp_str) + workdir = tmp / "work" + workdir.mkdir() + + data = tmp / "a.jshd" + data.write_bytes(b"v1") + job = _make_job(tmp, file_mappings={"a": data}) + + assemble_batch_workdir(job, workdir) + # Second call must not raise + target = assemble_batch_workdir(job, workdir) + + self.assertTrue((target / "sim.josh").is_symlink()) + self.assertTrue((target / "a.jshd").is_symlink()) + self.assertEqual( + (target / "config.jshc").read_text(), "rendered_jshc_placeholder", + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_cli.py b/tests/test_cli.py index 7fc84a7..3cc4963 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -446,9 +446,9 @@ def test_run_with_data_files(self, mock_run): cmd = mock_run.call_args[0][0] self.assertIn("--data", cmd) - # Find the data value + # Find the data value — name gets extension appended when missing data_idx = cmd.index("--data") - self.assertIn("editor=", cmd[data_idx + 1]) + self.assertIn("editor.jshc=", cmd[data_idx + 1]) @patch("subprocess.run") def test_run_with_custom_tags(self, mock_run): @@ -1446,5 +1446,681 @@ def test_stream_output_run_remote(self, mock_popen): self.assertIn("remote step", result.stdout) +class TestStageFromMinioConfig(unittest.TestCase): + """Tests for StageFromMinioConfig.""" + + def test_defaults(self): + from joshpy.cli import StageFromMinioConfig + + config = StageFromMinioConfig( + output_dir=Path("/tmp/out"), + prefix="batch-jobs/abc/inputs/", + ) + self.assertEqual(config.output_dir, Path("/tmp/out")) + self.assertEqual(config.prefix, "batch-jobs/abc/inputs/") + self.assertIsNone(config.minio_endpoint) + self.assertIsNone(config.minio_access_key) + self.assertIsNone(config.minio_secret_key) + self.assertIsNone(config.minio_bucket) + + def test_frozen(self): + from joshpy.cli import StageFromMinioConfig + + config = StageFromMinioConfig(output_dir=Path("/tmp"), prefix="p/") + with self.assertRaises(AttributeError): + config.prefix = "other/" + + +class TestStageFromMinio(unittest.TestCase): + """Tests for JoshCLI.stage_from_minio().""" + + JAR_MODE = JarMode.LOCAL + + @patch("joshpy.jar.JarManager.get_jar", return_value=Path("/fake/joshsim-fat.jar")) + @patch("subprocess.run") + def test_basic_args(self, mock_run, _mock_jar): + """stage_from_minio() should build correct CLI args.""" + from joshpy.cli import StageFromMinioConfig + + mock_run.return_value = MagicMock(returncode=0, stdout="", stderr="") + + cli = JoshCLI(josh_jar=self.JAR_MODE) + config = StageFromMinioConfig( + output_dir=Path("/tmp/out"), + prefix="batch-jobs/abc/inputs/", + ) + cli.stage_from_minio(config) + + cmd = mock_run.call_args[0][0] + self.assertIn("stageFromMinio", cmd) + self.assertIn("--output-dir", cmd) + self.assertIn("--prefix", cmd) + prefix_idx = cmd.index("--prefix") + self.assertEqual(cmd[prefix_idx + 1], "batch-jobs/abc/inputs/") + + @patch("joshpy.jar.JarManager.get_jar", return_value=Path("/fake/joshsim-fat.jar")) + @patch("subprocess.run") + def test_minio_flags_only_when_set(self, mock_run, _mock_jar): + """Only non-None minio flags should be passed.""" + from joshpy.cli import StageFromMinioConfig + + mock_run.return_value = MagicMock(returncode=0, stdout="", stderr="") + + cli = JoshCLI(josh_jar=self.JAR_MODE) + + # With no minio flags + config_no_minio = StageFromMinioConfig( + output_dir=Path("/tmp/out"), prefix="p/" + ) + cli.stage_from_minio(config_no_minio) + cmd = mock_run.call_args[0][0] + self.assertNotIn("--minio-endpoint", cmd) + self.assertNotIn("--minio-access-key", cmd) + self.assertNotIn("--minio-secret-key", cmd) + self.assertNotIn("--minio-bucket", cmd) + + # With all minio flags + config_with_minio = StageFromMinioConfig( + output_dir=Path("/tmp/out"), + prefix="p/", + minio_endpoint="https://storage.example.com", + minio_access_key="AKID", + minio_secret_key="SECRET", + minio_bucket="my-bucket", + ) + cli.stage_from_minio(config_with_minio) + cmd = mock_run.call_args[0][0] + self.assertIn("--minio-endpoint", cmd) + self.assertIn("--minio-access-key", cmd) + self.assertIn("--minio-secret-key", cmd) + self.assertIn("--minio-bucket", cmd) + ep_idx = cmd.index("--minio-endpoint") + self.assertEqual(cmd[ep_idx + 1], "https://storage.example.com") + bucket_idx = cmd.index("--minio-bucket") + self.assertEqual(cmd[bucket_idx + 1], "my-bucket") + + @patch("joshpy.jar.JarManager.get_jar", return_value=Path("/fake/joshsim-fat.jar")) + @patch("subprocess.run") + def test_extra_flags(self, mock_run, _mock_jar): + """config_file, ensure_bucket_exists, minio_path only emitted when set.""" + from joshpy.cli import StageFromMinioConfig + + mock_run.return_value = MagicMock(returncode=0, stdout="", stderr="") + cli = JoshCLI(josh_jar=self.JAR_MODE) + + cli.stage_from_minio( + StageFromMinioConfig( + output_dir=Path("/tmp/out"), + prefix="p/", + config_file=Path("/etc/josh.json"), + ensure_bucket_exists=True, + minio_path="models/v1/", + ), + ) + cmd = mock_run.call_args[0][0] + self.assertTrue(any(c.startswith("--config-file=") for c in cmd)) + self.assertIn("--ensure-bucket-exists", cmd) + self.assertIn("--minio-path=models/v1/", cmd) + + +class TestStageToMinio(unittest.TestCase): + """Tests for JoshCLI.stage_to_minio().""" + + JAR_MODE = JarMode.LOCAL + + @patch("joshpy.jar.JarManager.get_jar", return_value=Path("/fake/joshsim-fat.jar")) + @patch("subprocess.run") + def test_basic_args(self, mock_run, _mock_jar): + """stage_to_minio() should build correct CLI args.""" + from joshpy.cli import StageToMinioConfig + + mock_run.return_value = MagicMock(returncode=0, stdout="", stderr="") + + cli = JoshCLI(josh_jar=self.JAR_MODE) + config = StageToMinioConfig( + input_dir=Path("/tmp/inputs"), + prefix="batch-jobs/abc/inputs/", + ) + cli.stage_to_minio(config) + + cmd = mock_run.call_args[0][0] + self.assertIn("stageToMinio", cmd) + self.assertIn("--input-dir", cmd) + self.assertIn("--prefix", cmd) + prefix_idx = cmd.index("--prefix") + self.assertEqual(cmd[prefix_idx + 1], "batch-jobs/abc/inputs/") + + @patch("joshpy.jar.JarManager.get_jar", return_value=Path("/fake/joshsim-fat.jar")) + @patch("subprocess.run") + def test_minio_flags_only_when_set(self, mock_run, _mock_jar): + """Only non-None minio flags should be passed.""" + from joshpy.cli import StageToMinioConfig + + mock_run.return_value = MagicMock(returncode=0, stdout="", stderr="") + + cli = JoshCLI(josh_jar=self.JAR_MODE) + + # With no minio flags + config_no_minio = StageToMinioConfig( + input_dir=Path("/tmp/inputs"), prefix="p/" + ) + cli.stage_to_minio(config_no_minio) + cmd = mock_run.call_args[0][0] + self.assertNotIn("--minio-endpoint", cmd) + self.assertNotIn("--minio-access-key", cmd) + self.assertNotIn("--minio-secret-key", cmd) + self.assertNotIn("--minio-bucket", cmd) + + # With all minio flags + config_with_minio = StageToMinioConfig( + input_dir=Path("/tmp/inputs"), + prefix="p/", + minio_endpoint="https://storage.example.com", + minio_access_key="AKID", + minio_secret_key="SECRET", + minio_bucket="my-bucket", + ) + cli.stage_to_minio(config_with_minio) + cmd = mock_run.call_args[0][0] + self.assertIn("--minio-endpoint", cmd) + self.assertIn("--minio-bucket", cmd) + ep_idx = cmd.index("--minio-endpoint") + self.assertEqual(cmd[ep_idx + 1], "https://storage.example.com") + + @patch("joshpy.jar.JarManager.get_jar", return_value=Path("/fake/joshsim-fat.jar")) + @patch("subprocess.run") + def test_extra_flags(self, mock_run, _mock_jar): + """config_file, ensure_bucket_exists, minio_path only emitted when set.""" + from joshpy.cli import StageToMinioConfig + + mock_run.return_value = MagicMock(returncode=0, stdout="", stderr="") + cli = JoshCLI(josh_jar=self.JAR_MODE) + + # Defaults: none of the extras + cli.stage_to_minio( + StageToMinioConfig(input_dir=Path("/tmp/inputs"), prefix="p/"), + ) + cmd = mock_run.call_args[0][0] + self.assertFalse(any(c.startswith("--config-file=") for c in cmd)) + self.assertNotIn("--ensure-bucket-exists", cmd) + self.assertFalse(any(c.startswith("--minio-path=") for c in cmd)) + + # All extras set + cli.stage_to_minio( + StageToMinioConfig( + input_dir=Path("/tmp/inputs"), + prefix="p/", + config_file=Path("/etc/josh.json"), + ensure_bucket_exists=True, + minio_path="models/v1/", + ), + ) + cmd = mock_run.call_args[0][0] + self.assertTrue(any(c.startswith("--config-file=") for c in cmd)) + self.assertIn("--ensure-bucket-exists", cmd) + self.assertIn("--minio-path=models/v1/", cmd) + + +class TestStageToMinioConfig(unittest.TestCase): + """Tests for StageToMinioConfig dataclass.""" + + def test_basic_creation(self): + from joshpy.cli import StageToMinioConfig + + config = StageToMinioConfig( + input_dir=Path("/tmp/inputs"), prefix="batch-jobs/abc/" + ) + self.assertEqual(config.input_dir, Path("/tmp/inputs")) + self.assertEqual(config.prefix, "batch-jobs/abc/") + self.assertIsNone(config.minio_endpoint) + + def test_frozen(self): + from joshpy.cli import StageToMinioConfig + + config = StageToMinioConfig( + input_dir=Path("/tmp/inputs"), prefix="p/" + ) + with self.assertRaises(AttributeError): + config.prefix = "other/" + + +class TestBatchRemote(unittest.TestCase): + """Tests for JoshCLI.batch_remote() (post-josh#423 flag-based CLI).""" + + JAR_MODE = JarMode.LOCAL + + @patch("joshpy.jar.JarManager.get_jar", return_value=Path("/fake/joshsim-fat.jar")) + @patch("subprocess.run") + def test_basic_args(self, mock_run, _mock_jar): + """batch_remote() should build correct CLI args.""" + from joshpy.cli import BatchRemoteConfig + + mock_run.return_value = MagicMock(returncode=0, stdout="", stderr="") + + cli = JoshCLI(josh_jar=self.JAR_MODE) + config = BatchRemoteConfig( + simulation="Main", + target="gke-test", + minio_prefix="sweeps/test/", + ) + result = cli.batch_remote(config) + + self.assertTrue(result.success) + cmd = mock_run.call_args[0][0] + self.assertIn("batchRemote", cmd) + self.assertIn("Main", cmd) + self.assertIn("--target=gke-test", cmd) + self.assertIn("--minio-prefix=sweeps/test/", cmd) + # No positional script arg anymore; simulation is trailing positional + self.assertFalse(any("sim.josh" in c for c in cmd)) + # Simulation name is the trailing positional (after all flags) + self.assertEqual(cmd[-1], "Main") + + @patch("joshpy.jar.JarManager.get_jar", return_value=Path("/fake/joshsim-fat.jar")) + @patch("subprocess.run") + def test_replicates(self, mock_run, _mock_jar): + """batch_remote() should include --replicates when > 1.""" + from joshpy.cli import BatchRemoteConfig + + mock_run.return_value = MagicMock(returncode=0, stdout="", stderr="") + + cli = JoshCLI(josh_jar=self.JAR_MODE) + config = BatchRemoteConfig( + simulation="Main", + target="gke-test", + minio_prefix="sweeps/test/", + replicates=5, + ) + cli.batch_remote(config) + + cmd = mock_run.call_args[0][0] + self.assertIn("--replicates=5", cmd) + + @patch("joshpy.jar.JarManager.get_jar", return_value=Path("/fake/joshsim-fat.jar")) + @patch("subprocess.run") + def test_replicates_default_omitted(self, mock_run, _mock_jar): + """batch_remote() should omit --replicates when == 1.""" + from joshpy.cli import BatchRemoteConfig + + mock_run.return_value = MagicMock(returncode=0, stdout="", stderr="") + + cli = JoshCLI(josh_jar=self.JAR_MODE) + config = BatchRemoteConfig( + simulation="Main", + target="gke-test", + minio_prefix="sweeps/test/", + ) + cli.batch_remote(config) + + cmd = mock_run.call_args[0][0] + self.assertFalse(any("--replicates" in c for c in cmd)) + + @patch("joshpy.jar.JarManager.get_jar", return_value=Path("/fake/joshsim-fat.jar")) + @patch("subprocess.run") + def test_no_wait(self, mock_run, _mock_jar): + """batch_remote() should include --no-wait flag.""" + from joshpy.cli import BatchRemoteConfig + + mock_run.return_value = MagicMock(returncode=0, stdout="", stderr="") + + cli = JoshCLI(josh_jar=self.JAR_MODE) + config = BatchRemoteConfig( + simulation="Main", + target="gke-test", + minio_prefix="sweeps/test/", + no_wait=True, + ) + cli.batch_remote(config) + + cmd = mock_run.call_args[0][0] + self.assertIn("--no-wait", cmd) + + @patch("joshpy.jar.JarManager.get_jar", return_value=Path("/fake/joshsim-fat.jar")) + @patch("subprocess.run") + def test_no_wait_default_omitted(self, mock_run, _mock_jar): + """batch_remote() should omit --no-wait when False.""" + from joshpy.cli import BatchRemoteConfig + + mock_run.return_value = MagicMock(returncode=0, stdout="", stderr="") + + cli = JoshCLI(josh_jar=self.JAR_MODE) + config = BatchRemoteConfig( + simulation="Main", + target="gke-test", + minio_prefix="sweeps/test/", + ) + cli.batch_remote(config) + + cmd = mock_run.call_args[0][0] + self.assertNotIn("--no-wait", cmd) + + @patch("joshpy.jar.JarManager.get_jar", return_value=Path("/fake/joshsim-fat.jar")) + @patch("subprocess.run") + def test_poll_interval_and_timeout(self, mock_run, _mock_jar): + """batch_remote() should include --poll-interval and --timeout.""" + from joshpy.cli import BatchRemoteConfig + + mock_run.return_value = MagicMock(returncode=0, stdout="", stderr="") + + cli = JoshCLI(josh_jar=self.JAR_MODE) + config = BatchRemoteConfig( + simulation="Main", + target="gke-test", + minio_prefix="sweeps/test/", + poll_interval=30, + timeout=600, + ) + cli.batch_remote(config) + + cmd = mock_run.call_args[0][0] + self.assertIn("--poll-interval=30", cmd) + self.assertIn("--timeout=600", cmd) + + @patch("joshpy.jar.JarManager.get_jar", return_value=Path("/fake/joshsim-fat.jar")) + @patch("subprocess.run") + def test_stage_from_local_dir(self, mock_run, _mock_jar): + """batch_remote() should include --stage-from-local-dir when set.""" + from joshpy.cli import BatchRemoteConfig + + mock_run.return_value = MagicMock(returncode=0, stdout="", stderr="") + + cli = JoshCLI(josh_jar=self.JAR_MODE) + config = BatchRemoteConfig( + simulation="Main", + target="gke-test", + minio_prefix="sweeps/test/", + stage_from_local_dir=Path("/tmp/inputs"), + ) + cli.batch_remote(config) + + cmd = mock_run.call_args[0][0] + # Resolved path is absolute; just check prefix + suffix + stage_args = [c for c in cmd if c.startswith("--stage-from-local-dir=")] + self.assertEqual(len(stage_args), 1) + self.assertTrue(stage_args[0].endswith("/inputs")) + + @patch("joshpy.jar.JarManager.get_jar", return_value=Path("/fake/joshsim-fat.jar")) + @patch("subprocess.run") + def test_require_prestaged(self, mock_run, _mock_jar): + """batch_remote() should include --require-prestaged when set.""" + from joshpy.cli import BatchRemoteConfig + + mock_run.return_value = MagicMock(returncode=0, stdout="", stderr="") + + cli = JoshCLI(josh_jar=self.JAR_MODE) + config = BatchRemoteConfig( + simulation="Main", + target="gke-test", + minio_prefix="sweeps/test/", + require_prestaged=True, + ) + cli.batch_remote(config) + + cmd = mock_run.call_args[0][0] + self.assertIn("--require-prestaged", cmd) + + +class TestBatchRemoteConfig(unittest.TestCase): + """Tests for BatchRemoteConfig dataclass (post-josh#423).""" + + def test_basic_creation(self): + from joshpy.cli import BatchRemoteConfig + + config = BatchRemoteConfig( + simulation="Main", + target="gke-test", + minio_prefix="sweeps/test/", + ) + self.assertEqual(config.simulation, "Main") + self.assertEqual(config.target, "gke-test") + self.assertEqual(config.minio_prefix, "sweeps/test/") + self.assertEqual(config.replicates, 1) + self.assertFalse(config.no_wait) + self.assertIsNone(config.poll_interval) + self.assertIsNone(config.timeout) + self.assertIsNone(config.stage_from_local_dir) + self.assertFalse(config.require_prestaged) + + def test_frozen(self): + from joshpy.cli import BatchRemoteConfig + + config = BatchRemoteConfig( + simulation="Main", + target="gke-test", + minio_prefix="sweeps/test/", + ) + with self.assertRaises(AttributeError): + config.target = "other" + + def test_mutex_stage_and_prestaged(self): + """stage_from_local_dir and require_prestaged are mutually exclusive.""" + from joshpy.cli import BatchRemoteConfig + + with self.assertRaises(ValueError) as ctx: + BatchRemoteConfig( + simulation="Main", + target="gke-test", + minio_prefix="sweeps/test/", + stage_from_local_dir=Path("/tmp/x"), + require_prestaged=True, + ) + self.assertIn("mutually exclusive", str(ctx.exception)) + + +class TestPreprocessBatch(unittest.TestCase): + """Tests for JoshCLI.preprocess_batch().""" + + JAR_MODE = JarMode.LOCAL + + @patch("joshpy.jar.JarManager.get_jar", return_value=Path("/fake/joshsim-fat.jar")) + @patch("subprocess.run") + def test_basic_args(self, mock_run, _mock_jar): + """preprocess_batch() should build correct CLI args.""" + from joshpy.cli import PreprocessBatchConfig + + mock_run.return_value = MagicMock(returncode=0, stdout="", stderr="") + + cli = JoshCLI(josh_jar=self.JAR_MODE) + config = PreprocessBatchConfig( + script=Path("/path/to/sim.josh"), + simulation="Main", + data_file=Path("/path/to/data.nc"), + variable="temperature", + units="K", + output=Path("/path/to/output.jshd"), + target="gke-test", + ) + result = cli.preprocess_batch(config) + + self.assertTrue(result.success) + cmd = mock_run.call_args[0][0] + self.assertIn("preprocessBatch", cmd) + self.assertTrue(any("sim.josh" in c for c in cmd)) + self.assertIn("Main", cmd) + self.assertTrue(any("data.nc" in c for c in cmd)) + self.assertIn("temperature", cmd) + self.assertIn("K", cmd) + self.assertTrue(any("output.jshd" in c for c in cmd)) + self.assertIn("--target=gke-test", cmd) + + @patch("joshpy.jar.JarManager.get_jar", return_value=Path("/fake/joshsim-fat.jar")) + @patch("subprocess.run") + def test_positional_arg_order(self, mock_run, _mock_jar): + """preprocessBatch positional args must be in correct order.""" + from joshpy.cli import PreprocessBatchConfig + + mock_run.return_value = MagicMock(returncode=0, stdout="", stderr="") + + cli = JoshCLI(josh_jar=self.JAR_MODE) + config = PreprocessBatchConfig( + script=Path("/path/to/sim.josh"), + simulation="Main", + data_file=Path("/path/to/data.nc"), + variable="temp", + units="K", + output=Path("/path/to/out.jshd"), + target="gke-test", + ) + cli.preprocess_batch(config) + + cmd = mock_run.call_args[0][0] + # Find indices of positional args after "preprocessBatch" + pb_idx = cmd.index("preprocessBatch") + self.assertIn("Main", cmd[pb_idx + 2:pb_idx + 3]) + self.assertIn("temp", cmd) + self.assertIn("K", cmd) + # --target should come after positional args + target_idx = cmd.index("--target=gke-test") + self.assertGreater(target_idx, pb_idx + 6) + + @patch("joshpy.jar.JarManager.get_jar", return_value=Path("/fake/joshsim-fat.jar")) + @patch("subprocess.run") + def test_optional_flags(self, mock_run, _mock_jar): + """preprocess_batch() should include optional flags only when set.""" + from joshpy.cli import PreprocessBatchConfig + + mock_run.return_value = MagicMock(returncode=0, stdout="", stderr="") + cli = JoshCLI(josh_jar=self.JAR_MODE) + + # Defaults: no optional flags + cli.preprocess_batch( + PreprocessBatchConfig( + script=Path("/s.josh"), simulation="M", + data_file=Path("/d.nc"), variable="v", units="u", + output=Path("/o.jshd"), target="t", + ), + ) + cmd = mock_run.call_args[0][0] + for flag in ("--crs=", "--x-coord=", "--y-coord=", "--time-dim=", + "--timestep=", "--default-value=", "--parallel", "--amend"): + self.assertFalse( + any(c == flag or c.startswith(flag) for c in cmd), + f"unexpected {flag} in defaults", + ) + + # All set + cli.preprocess_batch( + PreprocessBatchConfig( + script=Path("/s.josh"), simulation="M", + data_file=Path("/d.nc"), variable="v", units="u", + output=Path("/o.jshd"), target="t", + crs="EPSG:4326", + x_coord="lon", y_coord="lat", time_dim="time", + timestep=0, default_value=-999.0, + parallel=True, amend=True, + ), + ) + cmd = mock_run.call_args[0][0] + self.assertIn("--crs=EPSG:4326", cmd) + self.assertIn("--x-coord=lon", cmd) + self.assertIn("--y-coord=lat", cmd) + self.assertIn("--time-dim=time", cmd) + self.assertIn("--timestep=0", cmd) + self.assertIn("--default-value=-999.0", cmd) + self.assertIn("--parallel", cmd) + self.assertIn("--amend", cmd) + + +class TestPollBatch(unittest.TestCase): + """Tests for JoshCLI.poll_batch().""" + + JAR_MODE = JarMode.LOCAL + + @patch("joshpy.jar.JarManager.get_jar", return_value=Path("/fake/joshsim-fat.jar")) + @patch("subprocess.run") + def test_basic_args(self, mock_run, _mock_jar): + """poll_batch() should build correct CLI args.""" + from joshpy.cli import PollBatchConfig + + mock_run.return_value = MagicMock(returncode=0, stdout='{"status":"complete"}', stderr="") + + cli = JoshCLI(josh_jar=self.JAR_MODE) + config = PollBatchConfig(job_id="abc-123", target="gke-test") + result = cli.poll_batch(config) + + self.assertTrue(result.success) + cmd = mock_run.call_args[0][0] + self.assertIn("pollBatch", cmd) + self.assertIn("abc-123", cmd) + self.assertIn("--target=gke-test", cmd) + + @patch("joshpy.jar.JarManager.get_jar", return_value=Path("/fake/joshsim-fat.jar")) + @patch("subprocess.run") + def test_exit_code_2_running(self, mock_run, _mock_jar): + """poll_batch() exit code 2 means job is still running.""" + from joshpy.cli import PollBatchConfig + + mock_run.return_value = MagicMock(returncode=2, stdout="", stderr="") + + cli = JoshCLI(josh_jar=self.JAR_MODE) + result = cli.poll_batch(PollBatchConfig(job_id="abc", target="t")) + self.assertFalse(result.success) + self.assertEqual(result.exit_code, 2) + + @patch("joshpy.jar.JarManager.get_jar", return_value=Path("/fake/joshsim-fat.jar")) + @patch("subprocess.run") + def test_exit_code_100_poll_failure(self, mock_run, _mock_jar): + """poll_batch() exit code 100 means transient poll failure.""" + from joshpy.cli import PollBatchConfig + + mock_run.return_value = MagicMock(returncode=100, stdout="", stderr="poll failed") + + cli = JoshCLI(josh_jar=self.JAR_MODE) + result = cli.poll_batch(PollBatchConfig(job_id="abc", target="t")) + self.assertEqual(result.exit_code, 100) + + +class TestPollBatchConfig(unittest.TestCase): + """Tests for PollBatchConfig dataclass.""" + + def test_basic_creation(self): + from joshpy.cli import PollBatchConfig + + config = PollBatchConfig(job_id="abc-123", target="gke-test") + self.assertEqual(config.job_id, "abc-123") + self.assertEqual(config.target, "gke-test") + + def test_frozen(self): + from joshpy.cli import PollBatchConfig + + config = PollBatchConfig(job_id="abc", target="t") + with self.assertRaises(AttributeError): + config.job_id = "other" + + +class TestPreprocessBatchConfig(unittest.TestCase): + """Tests for PreprocessBatchConfig dataclass.""" + + def test_basic_creation(self): + from joshpy.cli import PreprocessBatchConfig + + config = PreprocessBatchConfig( + script=Path("/path/to/sim.josh"), + simulation="Main", + data_file=Path("/path/to/data.nc"), + variable="temp", + units="K", + output=Path("/path/to/out.jshd"), + target="gke-test", + ) + self.assertEqual(config.variable, "temp") + self.assertEqual(config.target, "gke-test") + + def test_frozen(self): + from joshpy.cli import PreprocessBatchConfig + + config = PreprocessBatchConfig( + script=Path("/path/to/sim.josh"), + simulation="Main", + data_file=Path("/path/to/data.nc"), + variable="temp", + units="K", + output=Path("/path/to/out.jshd"), + target="gke-test", + ) + with self.assertRaises(AttributeError): + config.target = "other" + + if __name__ == "__main__": unittest.main() diff --git a/tests/test_diff.py b/tests/test_diff.py index 967dfa3..8a2cd82 100644 --- a/tests/test_diff.py +++ b/tests/test_diff.py @@ -319,7 +319,8 @@ def test_main_view(self): file_registry.label_run("h1", "run_a") file_registry.close() - with patch("sys.argv", ["prog", str(db_path), "--view", "run_a"]): + with patch("sys.argv", ["prog", str(db_path), "--view", "run_a"]), \ + patch("joshpy.inspect._core._launch_ide"): result = main() self.assertEqual(result, 0) diff --git a/tests/test_jobs.py b/tests/test_jobs.py index f859050..9758d93 100644 --- a/tests/test_jobs.py +++ b/tests/test_jobs.py @@ -3,6 +3,7 @@ import tempfile import unittest from pathlib import Path +from unittest.mock import MagicMock try: import duckdb # noqa: F401 @@ -2360,5 +2361,239 @@ def test_empty_directory(self): self.assertEqual(result, {}) +class TestToBatchRemoteConfig(unittest.TestCase): + """Tests for to_batch_remote_config() (post-josh#423).""" + + def test_basic_conversion(self): + from joshpy.jobs import to_batch_remote_config + + job = ExpandedJob( + config_content="x = 1 count", + config_path=Path("/tmp/config.jshc"), + config_name="config.jshc", + run_hash="abc123", + parameters={"x": 1}, + simulation="Main", + replicates=3, + source_path=Path("/path/to/sim.josh"), + ) + + config = to_batch_remote_config(job, "gke-test", "sweeps/s1/jobs/abc123/") + + self.assertEqual(config.simulation, "Main") + self.assertEqual(config.target, "gke-test") + self.assertEqual(config.minio_prefix, "sweeps/s1/jobs/abc123/") + self.assertEqual(config.replicates, 3) + self.assertFalse(config.no_wait) + # require_prestaged defaults to True (safe default for sweeps) + self.assertTrue(config.require_prestaged) + # No stage_from_local_dir on the sweep path + self.assertIsNone(config.stage_from_local_dir) + # BatchRemoteConfig should no longer carry custom_tags (removed) + self.assertFalse(hasattr(config, "custom_tags")) + + def test_no_wait_mode(self): + from joshpy.jobs import to_batch_remote_config + + job = ExpandedJob( + config_content="x = 1 count", + config_path=Path("/tmp/config.jshc"), + config_name="config.jshc", + run_hash="abc123", + parameters={"x": 1}, + simulation="Main", + replicates=1, + source_path=Path("/path/to/sim.josh"), + ) + + config = to_batch_remote_config( + job, "gke-test", "sweeps/s1/jobs/abc123/", + no_wait=True, timeout=600, + ) + + self.assertTrue(config.no_wait) + self.assertEqual(config.timeout, 600) + + def test_source_path_required(self): + from joshpy.jobs import to_batch_remote_config + + job = ExpandedJob( + config_content="x = 1 count", + config_path=Path("/tmp/config.jshc"), + config_name="config.jshc", + run_hash="abc123", + parameters={"x": 1}, + simulation="Main", + replicates=1, + source_path=None, + ) + + with self.assertRaises(ValueError, msg="source_path is required"): + to_batch_remote_config(job, "gke-test", "sweeps/s1/jobs/abc123/") + + +class TestRunSweepBatchRemote(unittest.TestCase): + """Tests for run_sweep() batch_remote mode (post-josh#423 stage + dispatch).""" + + def _make_real_job(self, tmp: Path, run_hash: str = "abc123") -> ExpandedJob: + """Build an ExpandedJob backed by a real on-disk .josh file.""" + src = tmp / "sim.josh" + src.write_text("start simulation Main\nend simulation\n") + return ExpandedJob( + config_content="x = 1 count", + config_path=tmp / "config.jshc", + config_name="config.jshc", + run_hash=run_hash, + parameters={"x": 1}, + simulation="Main", + replicates=1, + source_path=src, + ) + + def test_batch_remote_requires_target(self): + from joshpy.jobs import run_sweep + + with self.assertRaises(ValueError, msg="target is required"): + run_sweep( + MagicMock(), + MagicMock(total_jobs=1, total_replicates=1, __iter__=lambda s: iter([])), + batch_remote=True, + target=None, + ) + + def test_batch_remote_exclusive_with_remote(self): + from joshpy.jobs import run_sweep + + with self.assertRaises(ValueError, msg="mutually exclusive"): + run_sweep( + MagicMock(), + MagicMock(total_jobs=1, total_replicates=1, __iter__=lambda s: iter([])), + batch_remote=True, + remote=True, + target="gke-test", + ) + + def test_stage_then_batch_remote_with_require_prestaged(self): + """Blocking batch_remote path stages first, then dispatches with + require_prestaged=True.""" + from joshpy.jobs import run_sweep + + mock_cli = MagicMock() + mock_cli.stage_to_minio.return_value = MagicMock( + success=True, exit_code=0, stdout="", stderr="", + ) + mock_cli.batch_remote.return_value = MagicMock( + success=True, exit_code=0, stdout="", stderr="", + ) + + with tempfile.TemporaryDirectory() as tmp_str: + tmp = Path(tmp_str) + job = self._make_real_job(tmp) + job_set = MagicMock( + total_jobs=1, total_replicates=1, + __iter__=lambda s: iter([job]), + ) + + result = run_sweep( + mock_cli, job_set, + batch_remote=True, + target="gke-test", + session_id="s1", + quiet=True, + ) + + # stage happened before dispatch + mock_cli.stage_to_minio.assert_called_once() + mock_cli.batch_remote.assert_called_once() + + # dispatch config uses the same prefix staged to, with require_prestaged=True + stage_cfg = mock_cli.stage_to_minio.call_args[0][0] + dispatch_cfg = mock_cli.batch_remote.call_args[0][0] + self.assertEqual(stage_cfg.prefix, dispatch_cfg.minio_prefix) + self.assertTrue(dispatch_cfg.require_prestaged) + # Per-job prefix is keyed on run_hash + self.assertIn(job.run_hash, dispatch_cfg.minio_prefix) + self.assertIn("sweeps/s1/", dispatch_cfg.minio_prefix) + + self.assertEqual(result.succeeded, 1) + self.assertEqual(result.failed, 0) + + def test_stage_failure_short_circuits_dispatch(self): + """If stage_to_minio fails, batch_remote should not be called.""" + from joshpy.jobs import run_sweep + + mock_cli = MagicMock() + mock_cli.stage_to_minio.return_value = MagicMock( + success=False, exit_code=1, stdout="", stderr="bucket denied", + ) + + with tempfile.TemporaryDirectory() as tmp_str: + tmp = Path(tmp_str) + job = self._make_real_job(tmp) + job_set = MagicMock( + total_jobs=1, total_replicates=1, + __iter__=lambda s: iter([job]), + ) + + result = run_sweep( + mock_cli, job_set, + batch_remote=True, + target="gke-test", + session_id="s1", + quiet=True, + stop_on_failure=False, + ) + + mock_cli.stage_to_minio.assert_called_once() + mock_cli.batch_remote.assert_not_called() + self.assertEqual(result.failed, 1) + + def test_async_dispatch_parses_json(self): + """Async batch_remote should still stage first, then parse --no-wait JSON.""" + import json + from joshpy.jobs import run_sweep + + dispatch_json = json.dumps({ + "jobId": "test-job-123", + "target": "gke-test", + "statusPath": "batch-status/test-job-123/status.json", + }) + + mock_cli = MagicMock() + mock_cli.stage_to_minio.return_value = MagicMock( + success=True, exit_code=0, stdout="", stderr="", + ) + mock_cli.batch_remote.return_value = MagicMock( + success=True, exit_code=0, stdout=dispatch_json, stderr="", + ) + mock_cli.poll_batch.return_value = MagicMock( + success=True, exit_code=0, + stdout='{"status":"complete"}', stderr="", + ) + + with tempfile.TemporaryDirectory() as tmp_str: + tmp = Path(tmp_str) + job = self._make_real_job(tmp) + job_set = MagicMock( + total_jobs=1, total_replicates=1, + __iter__=lambda s: iter([job]), + ) + + result = run_sweep( + mock_cli, job_set, + batch_remote=True, + target="gke-test", + batch_no_wait=True, + session_id="s1", + quiet=True, + ) + + mock_cli.stage_to_minio.assert_called_once() + mock_cli.batch_remote.assert_called_once() + mock_cli.poll_batch.assert_called() + self.assertEqual(result.succeeded, 1) + self.assertEqual(result.failed, 0) + + if __name__ == '__main__': unittest.main() diff --git a/tests/test_minio_integration.py b/tests/test_minio_integration.py new file mode 100644 index 0000000..6611337 --- /dev/null +++ b/tests/test_minio_integration.py @@ -0,0 +1,691 @@ +"""MinIO integration tests for joshpy. + +Escalating levels of integration testing against a real MinIO service: + +- Level 1: DuckDB writes CSV to MinIO and reads it back +- Level 2: Josh JAR runs a simulation that exports to MinIO, Python reads it +- Level 3: CellDataLoader.load_csv() ingests JAR output from S3 into registry +- Level 4: End-to-end ingest_results() from MinIO by label +- Level 5: Partial/interrupted sweep recovery from MinIO +- Edge cases: bad creds, missing bucket, namespace isolation + +Requires: + - MinIO running at localhost:9000 (bitnamilegacy/minio with josh-test-bucket:public) + - Josh JAR downloaded (pixi run get-jars) + +Run with: pixi run -e dev test-integration +""" + +from __future__ import annotations + +import os +import uuid +from pathlib import Path +from unittest.mock import MagicMock + +import pytest + +from tests.conftest import ( + MINIO_ACCESS_KEY, + MINIO_ENDPOINT, + MINIO_SECRET_KEY, + TEST_BUCKET, +) + +# All tests in this file require MinIO +pytestmark = pytest.mark.integration + + +# --------------------------------------------------------------------------- +# Test CSV data +# --------------------------------------------------------------------------- + +SIMPLE_CSV = "step,replicate,position.x,position.y,treeCount,averageHeight\n" + + +def _make_csv(replicate: int = 0, steps: int = 5, n_patches: int = 1) -> str: + """Generate a CSV matching Josh export format.""" + lines = [SIMPLE_CSV.rstrip("\n")] + for step in range(steps): + for _ in range(n_patches): + lines.append( + f"{step},{replicate},0.0,0.0,{10 + step},{5.0 + step * 0.5}" + ) + return "\n".join(lines) + "\n" + + +# =================================================================== +# Level 1: DuckDB httpfs writes to and reads from MinIO +# =================================================================== + + +class TestMinioWrite: + """Level 1: Prove DuckDB httpfs can write CSV to MinIO.""" + + def test_duckdb_copy_csv_to_s3(self, minio_conn, test_bucket): + """COPY ... TO 's3://...' should succeed without error.""" + key = f"test-level1/{uuid.uuid4().hex[:8]}/write.csv" + s3_url = f"s3://{test_bucket}/{key}" + + minio_conn.execute( + f"COPY (SELECT 1 as step, 0 as replicate, 42.0 as val) " + f"TO '{s3_url}' (FORMAT CSV, HEADER)" + ) + + # Verify by reading back + result = minio_conn.execute( + f"SELECT * FROM read_csv_auto('{s3_url}')" + ).fetchall() + assert len(result) == 1 + assert result[0] == (1, 0, 42.0) + + def test_write_then_read_roundtrip(self, seed_csv): + """seed_csv fixture writes CSV, read it back via DuckDB.""" + csv_data = "a,b,c\n1,hello,3.14\n2,world,2.72\n" + key = f"test-level1/{uuid.uuid4().hex[:8]}/roundtrip.csv" + s3_url = seed_csv(key, csv_data) + + import duckdb + from joshpy.registry import configure_s3 + + conn = duckdb.connect(":memory:") + configure_s3( + conn, + endpoint=MINIO_ENDPOINT, + access_key=MINIO_ACCESS_KEY, + secret_key=MINIO_SECRET_KEY, + use_ssl=False, + ) + rows = conn.execute( + f"SELECT * FROM read_csv_auto('{s3_url}')" + ).fetchall() + conn.close() + + assert len(rows) == 2 + assert rows[0][1] == "hello" + assert rows[1][1] == "world" + + +# =================================================================== +# Level 2: Josh JAR writes to MinIO, Python reads +# =================================================================== + + +class TestMinioJarWrite: + """Level 2: Run a real simulation that exports to MinIO, verify from Python.""" + + SCRIPT = Path(__file__).parent / "fixtures" / "minio_export.josh" + + @pytest.fixture(autouse=True, scope="class") + def _run_simulation(self, request, josh_cli, minio_available, jar_available): + """Run the test simulation once for the whole class.""" + env_backup = {} + for k, v in { + "MINIO_ENDPOINT": f"http://{MINIO_ENDPOINT}", + "MINIO_ACCESS_KEY": MINIO_ACCESS_KEY, + "MINIO_SECRET_KEY": MINIO_SECRET_KEY, + }.items(): + env_backup[k] = os.environ.get(k) + os.environ[k] = v + + from joshpy.cli import RunConfig + + result = josh_cli.run( + RunConfig( + script=self.SCRIPT, + simulation="Main", + replicates=2, + seed=42, + ) + ) + + # Store result on the class for tests to inspect + request.cls.jar_result = result + + yield + + # Restore env + for k, orig in env_backup.items(): + if orig is None: + os.environ.pop(k, None) + else: + os.environ[k] = orig + + def test_jar_run_succeeds(self): + """The Josh JAR should complete the simulation without error.""" + assert self.jar_result.success, ( + f"JAR failed (exit {self.jar_result.exit_code}): " + f"{self.jar_result.stderr}" + ) + + def test_jar_inspect_exports_minio(self, josh_cli): + """inspect_exports should parse the minio:// export path.""" + from joshpy.cli import InspectExportsConfig + + exports = josh_cli.inspect_exports( + InspectExportsConfig(script=self.SCRIPT, simulation="Main") + ) + patch_info = exports.export_files["patch"] + assert patch_info is not None + assert patch_info.protocol == "minio" + assert patch_info.host == TEST_BUCKET + assert "{replicate}" in patch_info.path + + def test_jar_output_readable_from_s3(self, minio_conn): + """CSV written by the JAR should be readable via DuckDB S3.""" + s3_url = f"s3://{TEST_BUCKET}/results/output_0.csv" + rows = minio_conn.execute( + f"SELECT * FROM read_csv_auto('{s3_url}')" + ).fetchall() + + assert len(rows) > 0 + + # Check expected columns exist + cols = [ + desc[0] + for desc in minio_conn.execute( + f"SELECT * FROM read_csv_auto('{s3_url}') LIMIT 0" + ).description + ] + assert "step" in cols + assert "replicate" in cols + assert "treeCount" in cols + assert "averageHeight" in cols + + +# =================================================================== +# Level 3: CellDataLoader loads JAR output from S3 +# =================================================================== + + +class TestMinioCellDataLoader: + """Level 3: CellDataLoader.load_csv with s3:// URL.""" + + def _setup_registry_for_load(self, registry): + """Register a minimal run so load_csv has a valid run_id.""" + from joshpy.jobs import JobConfig + + config = JobConfig( + source_path=Path("/tmp/sim.josh"), + simulation="Main", + replicates=1, + ) + session_id = registry.create_session( + config=config, experiment_name="test" + ) + registry.register_run( + session_id=session_id, + run_hash="load_test_hash", + josh_path="/tmp/sim.josh", + config_content="test", + file_mappings=None, + parameters={}, + ) + run_id = registry.start_run("load_test_hash", session_id=session_id) + registry.complete_run(run_id, exit_code=0) + return run_id + + def test_load_csv_from_s3_url(self, minio_registry, seed_csv, test_bucket): + """load_csv with an s3:// URL should insert rows into cell_data.""" + from joshpy.cell_data import CellDataLoader + + run_id = self._setup_registry_for_load(minio_registry) + csv_data = _make_csv(replicate=0, steps=3) + key = f"test-level3/{uuid.uuid4().hex[:8]}/export.csv" + s3_url = seed_csv(key, csv_data) + + loader = CellDataLoader(minio_registry) + rows = loader.load_csv( + csv_path=s3_url, + run_id=run_id, + run_hash="load_test_hash", + ) + + assert rows == 3 + + # Verify data in registry + result = minio_registry.conn.execute( + "SELECT step, replicate, \"treeCount\", \"averageHeight\" " + "FROM cell_data ORDER BY step" + ).fetchall() + assert len(result) == 3 + assert result[0][0] == 0 # step + assert result[0][1] == 0 # replicate + assert result[0][2] == 10 # treeCount at step 0 + + def test_load_csv_creates_variable_columns( + self, minio_registry, seed_csv, test_bucket + ): + """Variable columns from the S3 CSV should be auto-created.""" + from joshpy.cell_data import CellDataLoader + + run_id = self._setup_registry_for_load(minio_registry) + csv_data = _make_csv(replicate=0, steps=2) + key = f"test-level3/{uuid.uuid4().hex[:8]}/vars.csv" + s3_url = seed_csv(key, csv_data) + + CellDataLoader(minio_registry).load_csv( + csv_path=s3_url, run_id=run_id, run_hash="load_test_hash" + ) + + var_cols = minio_registry.list_variable_columns() + assert "treeCount" in var_cols + assert "averageHeight" in var_cols + + def test_load_csv_s3_nonexistent_key(self, minio_registry): + """Missing S3 object should raise a recognizable error.""" + from joshpy.cell_data import CellDataLoader + + run_id = self._setup_registry_for_load(minio_registry) + loader = CellDataLoader(minio_registry) + + with pytest.raises(Exception, match="HTTP|404|NoSuchKey|IOException"): + loader.load_csv( + csv_path=f"s3://{TEST_BUCKET}/nonexistent/{uuid.uuid4()}.csv", + run_id=run_id, + run_hash="load_test_hash", + ) + + def test_load_csv_s3_missing_required_columns( + self, minio_registry, seed_csv + ): + """CSV without step/replicate should raise ValueError even from S3.""" + from joshpy.cell_data import CellDataLoader + + run_id = self._setup_registry_for_load(minio_registry) + bad_csv = "a,b,c\n1,2,3\n" + key = f"test-level3/{uuid.uuid4().hex[:8]}/bad.csv" + s3_url = seed_csv(key, bad_csv) + + loader = CellDataLoader(minio_registry) + with pytest.raises(ValueError, match="step.*replicate"): + loader.load_csv( + csv_path=s3_url, + run_id=run_id, + run_hash="load_test_hash", + ) + + +# =================================================================== +# Level 4: End-to-end ingest_results() from MinIO +# =================================================================== + + +def _make_ingest_registry(minio_registry, josh_content, replicates=2): + """Set up registry metadata for ingest_results() tests. + + Creates session, registers run with josh_content, labels it, + and creates completed job_runs. Returns (run_hash, run_id). + """ + from joshpy.jobs import JobConfig + + run_hash = f"ingest_{uuid.uuid4().hex[:8]}" + + config = JobConfig( + source_path=Path("/tmp/sim.josh"), + simulation="Main", + replicates=replicates, + ) + session_id = minio_registry.create_session( + config=config, experiment_name="ingest-test" + ) + minio_registry.register_run( + session_id=session_id, + run_hash=run_hash, + josh_path="/tmp/sim.josh", + config_content="test", + file_mappings=None, + parameters={}, + josh_content=josh_content, + ) + minio_registry.label_run(run_hash, f"label-{run_hash}") + + run_id = None + for _ in range(replicates): + run_id = minio_registry.start_run(run_hash, session_id=session_id) + minio_registry.complete_run(run_id, exit_code=0) + + return run_hash, run_id + + +class TestMinioIngestResults: + """Level 4: Full ingest_results() reading real CSVs from MinIO.""" + + JOSH_CONTENT = (Path(__file__).parent / "fixtures" / "minio_export.josh").read_text() + + def test_ingest_all_replicates( + self, + minio_registry, + seed_csv, + test_bucket, + patch_s3_no_ssl, + monkeypatch, + ): + """ingest_results() should load all replicates from S3.""" + from joshpy.cli import ExportFileInfo, ExportPaths + from joshpy.sweep import ingest_results + + run_hash, _ = _make_ingest_registry( + minio_registry, self.JOSH_CONTENT, replicates=3 + ) + label = f"label-{run_hash}" + + # Seed 3 replicate CSVs + prefix = f"test-level4/{run_hash}" + for rep in range(3): + csv_data = _make_csv(replicate=rep, steps=4) + seed_csv(f"{prefix}/output_{rep}.csv", csv_data) + + # Mock CLI — only inspect_exports needs the JAR + mock_cli = MagicMock() + mock_cli.inspect_exports.return_value = ExportPaths( + simulation="Main", + export_files={ + "patch": ExportFileInfo( + raw=f"minio://{test_bucket}/{prefix}/output_{{replicate}}.csv", + protocol="minio", + host=test_bucket, + path=f"/{prefix}/output_{{replicate}}.csv", + file_type="csv", + ), + "meta": None, + "entity": None, + }, + debug_files={ + "organism": None, + "patch": None, + "agent": None, + "disturbance": None, + }, + ) + + monkeypatch.setenv("MINIO_ENDPOINT", MINIO_ENDPOINT) + monkeypatch.setenv("MINIO_ACCESS_KEY", MINIO_ACCESS_KEY) + monkeypatch.setenv("MINIO_SECRET_KEY", MINIO_SECRET_KEY) + + rows = ingest_results(mock_cli, minio_registry, label, quiet=True) + + # 3 replicates x 4 steps x 1 patch = 12 rows + assert rows == 12 + + # Verify data is queryable + result = minio_registry.conn.execute( + "SELECT DISTINCT replicate FROM cell_data ORDER BY replicate" + ).fetchall() + assert [r[0] for r in result] == [0, 1, 2] + + def test_ingest_results_queryable( + self, + minio_registry, + seed_csv, + test_bucket, + patch_s3_no_ssl, + monkeypatch, + ): + """After ingest, cell_data should be queryable with aggregates.""" + from joshpy.cli import ExportFileInfo, ExportPaths + from joshpy.sweep import ingest_results + + run_hash, _ = _make_ingest_registry( + minio_registry, self.JOSH_CONTENT, replicates=2 + ) + label = f"label-{run_hash}" + + prefix = f"test-level4-query/{run_hash}" + for rep in range(2): + seed_csv(f"{prefix}/output_{rep}.csv", _make_csv(replicate=rep, steps=5)) + + mock_cli = MagicMock() + mock_cli.inspect_exports.return_value = ExportPaths( + simulation="Main", + export_files={ + "patch": ExportFileInfo( + raw=f"minio://{test_bucket}/{prefix}/output_{{replicate}}.csv", + protocol="minio", + host=test_bucket, + path=f"/{prefix}/output_{{replicate}}.csv", + file_type="csv", + ), + "meta": None, + "entity": None, + }, + debug_files={ + "organism": None, + "patch": None, + "agent": None, + "disturbance": None, + }, + ) + + monkeypatch.setenv("MINIO_ENDPOINT", MINIO_ENDPOINT) + monkeypatch.setenv("MINIO_ACCESS_KEY", MINIO_ACCESS_KEY) + monkeypatch.setenv("MINIO_SECRET_KEY", MINIO_SECRET_KEY) + + ingest_results(mock_cli, minio_registry, label, quiet=True) + + # Aggregate query + avg = minio_registry.conn.execute( + 'SELECT AVG("treeCount") FROM cell_data WHERE run_hash = ?', + [run_hash], + ).fetchone()[0] + assert avg is not None + assert avg > 0 + + +# =================================================================== +# Level 5: Partial / interrupted sweep recovery +# =================================================================== + + +class TestMinioPartialRecovery: + """Level 5: Graceful recovery when some replicates are missing.""" + + JOSH_CONTENT = (Path(__file__).parent / "fixtures" / "minio_export.josh").read_text() + + def _run_ingest( + self, + minio_registry, + seed_csv, + test_bucket, + monkeypatch, + *, + replicates_registered: int, + replicates_seeded: list[int], + steps: int = 3, + ) -> tuple[int, str]: + """Helper: set up registry, seed some replicates, call ingest_results.""" + from joshpy.cli import ExportFileInfo, ExportPaths + from joshpy.sweep import ingest_results + + run_hash, _ = _make_ingest_registry( + minio_registry, self.JOSH_CONTENT, replicates=replicates_registered + ) + label = f"label-{run_hash}" + + prefix = f"test-level5/{run_hash}" + for rep in replicates_seeded: + seed_csv( + f"{prefix}/output_{rep}.csv", + _make_csv(replicate=rep, steps=steps), + ) + + mock_cli = MagicMock() + mock_cli.inspect_exports.return_value = ExportPaths( + simulation="Main", + export_files={ + "patch": ExportFileInfo( + raw=f"minio://{test_bucket}/{prefix}/output_{{replicate}}.csv", + protocol="minio", + host=test_bucket, + path=f"/{prefix}/output_{{replicate}}.csv", + file_type="csv", + ), + "meta": None, + "entity": None, + }, + debug_files={ + "organism": None, + "patch": None, + "agent": None, + "disturbance": None, + }, + ) + + monkeypatch.setenv("MINIO_ENDPOINT", MINIO_ENDPOINT) + monkeypatch.setenv("MINIO_ACCESS_KEY", MINIO_ACCESS_KEY) + monkeypatch.setenv("MINIO_SECRET_KEY", MINIO_SECRET_KEY) + + rows = ingest_results(mock_cli, minio_registry, label, quiet=True) + return rows, run_hash + + def test_partial_replicates_graceful( + self, minio_registry, seed_csv, test_bucket, patch_s3_no_ssl, monkeypatch + ): + """Only 2 of 3 replicates exist — should load 2, skip 1, no error.""" + rows, run_hash = self._run_ingest( + minio_registry, + seed_csv, + test_bucket, + monkeypatch, + replicates_registered=3, + replicates_seeded=[0, 2], # replicate 1 missing + steps=4, + ) + + # 2 replicates x 4 steps = 8 rows + assert rows == 8 + + # Verify only replicates 0 and 2 present + reps = minio_registry.conn.execute( + "SELECT DISTINCT replicate FROM cell_data " + "WHERE run_hash = ? ORDER BY replicate", + [run_hash], + ).fetchall() + assert [r[0] for r in reps] == [0, 2] + + def test_zero_replicates_available( + self, minio_registry, seed_csv, test_bucket, patch_s3_no_ssl, monkeypatch + ): + """No CSVs in MinIO — should return 0 rows, no exception.""" + rows, _ = self._run_ingest( + minio_registry, + seed_csv, + test_bucket, + monkeypatch, + replicates_registered=3, + replicates_seeded=[], # nothing written + ) + assert rows == 0 + + def test_single_replicate_of_many( + self, minio_registry, seed_csv, test_bucket, patch_s3_no_ssl, monkeypatch + ): + """1 of 10 replicates available — should load only that one.""" + rows, run_hash = self._run_ingest( + minio_registry, + seed_csv, + test_bucket, + monkeypatch, + replicates_registered=10, + replicates_seeded=[7], + steps=3, + ) + assert rows == 3 + + reps = minio_registry.conn.execute( + "SELECT DISTINCT replicate FROM cell_data WHERE run_hash = ?", + [run_hash], + ).fetchall() + assert [r[0] for r in reps] == [7] + + +# =================================================================== +# Edge cases +# =================================================================== + + +class TestMinioEdgeCases: + """Edge cases: bad credentials, missing bucket, namespace isolation.""" + + def test_bad_credentials_clear_error(self, minio_available, test_bucket): + """Wrong credentials should produce an actionable error.""" + import duckdb + from joshpy.registry import configure_s3 + + conn = duckdb.connect(":memory:") + configure_s3( + conn, + endpoint=MINIO_ENDPOINT, + access_key="WRONG_KEY", + secret_key="WRONG_SECRET", + use_ssl=False, + ) + + with pytest.raises(Exception, match="403|AccessDenied|Forbidden|signature"): + conn.execute( + f"SELECT * FROM read_csv_auto('s3://{test_bucket}/results/output_0.csv')" + ).fetchall() + + conn.close() + + def test_nonexistent_bucket_clear_error(self, minio_conn): + """Reading from a missing bucket should raise a clear error.""" + with pytest.raises(Exception, match="404|NoSuchBucket|NoSuchKey|not found"): + minio_conn.execute( + "SELECT * FROM read_csv_auto(" + "'s3://this-bucket-does-not-exist/file.csv')" + ).fetchall() + + def test_namespace_isolation( + self, minio_registry, seed_csv, test_bucket + ): + """Two run_hashes should not leak data into each other.""" + from joshpy.cell_data import CellDataLoader + from joshpy.jobs import JobConfig + + config = JobConfig( + source_path=Path("/tmp/sim.josh"), + simulation="Main", + replicates=1, + ) + session_id = minio_registry.create_session( + config=config, experiment_name="isolation-test" + ) + + # Register two runs + for rh in ("hash_AAA", "hash_BBB"): + minio_registry.register_run( + session_id=session_id, + run_hash=rh, + josh_path="/tmp/sim.josh", + config_content="test", + file_mappings=None, + parameters={}, + ) + + run_id_a = minio_registry.start_run("hash_AAA", session_id=session_id) + minio_registry.complete_run(run_id_a, exit_code=0) + run_id_b = minio_registry.start_run("hash_BBB", session_id=session_id) + minio_registry.complete_run(run_id_b, exit_code=0) + + # Seed different CSVs + prefix = f"test-isolation/{uuid.uuid4().hex[:8]}" + csv_a = "step,replicate,position.x,position.y,val\n0,0,0.0,0.0,111\n" + csv_b = "step,replicate,position.x,position.y,val\n0,0,0.0,0.0,999\n" + url_a = seed_csv(f"{prefix}/a.csv", csv_a) + url_b = seed_csv(f"{prefix}/b.csv", csv_b) + + loader = CellDataLoader(minio_registry) + loader.load_csv(csv_path=url_a, run_id=run_id_a, run_hash="hash_AAA") + loader.load_csv(csv_path=url_b, run_id=run_id_b, run_hash="hash_BBB") + + # Query by hash — should be isolated + val_a = minio_registry.conn.execute( + 'SELECT val FROM cell_data WHERE run_hash = ?', ["hash_AAA"] + ).fetchone()[0] + val_b = minio_registry.conn.execute( + 'SELECT val FROM cell_data WHERE run_hash = ?', ["hash_BBB"] + ).fetchone()[0] + + assert val_a == 111 + assert val_b == 999 diff --git a/tests/test_sweep.py b/tests/test_sweep.py index 092839c..5c2df1a 100644 --- a/tests/test_sweep.py +++ b/tests/test_sweep.py @@ -1008,5 +1008,273 @@ def test_with_label_on_collision_timestamp(self): registry.close() +class TestIngestResults(unittest.TestCase): + """Tests for ingest_results().""" + + def _make_registry_with_run(self, replicates=3): + """Create an in-memory registry with a labeled run for testing.""" + registry = RunRegistry(":memory:") + config = JobConfig( + source_path=Path("/tmp/sim.josh"), + simulation="Main", + replicates=replicates, + ) + session_id = registry.create_session( + config=config, + experiment_name="test", + ) + # Register a config + registry.register_run( + session_id=session_id, + run_hash="abc123def456", + josh_path="/tmp/sim.josh", + config_content="config_here", + file_mappings=None, + parameters={"maxGrowth": 50}, + josh_content="simulation Main { }", + ) + registry.label_run("abc123def456", "test-label") + + # Start runs so _resolve_run_id_for_hash works and replicate count is right + run_id = None + for _ in range(replicates): + run_id = registry.start_run("abc123def456", session_id=session_id) + registry.complete_run(run_id, exit_code=0) + + return registry, session_id, run_id + + @patch("joshpy.sweep.CellDataLoader") + def test_local_file_protocol(self, mock_loader_cls): + """ingest_results with file:// protocol loads local CSVs.""" + from joshpy.sweep import ingest_results + from joshpy.cli import ExportFileInfo, ExportPaths + + registry, _, run_id = self._make_registry_with_run() + + mock_loader = MagicMock() + mock_loader.load_csv.return_value = 100 + mock_loader_cls.return_value = mock_loader + + mock_cli = MagicMock() + mock_cli.inspect_exports.return_value = ExportPaths( + simulation="Main", + export_files={ + "patch": ExportFileInfo( + raw="file:///tmp/output_{replicate}.csv", + protocol="file", + host="", + path="/tmp/output_{replicate}.csv", + file_type="csv", + ), + "meta": None, + "entity": None, + }, + debug_files={"organism": None, "patch": None, "agent": None, "disturbance": None}, + ) + + # Create fake CSV files + import tempfile, os + with tempfile.TemporaryDirectory() as tmpdir: + for rep in range(3): + csv_path = Path(f"/tmp/output_{rep}.csv") + csv_path.write_text("step,replicate,val\n0,0,1.0\n") + + try: + rows = ingest_results(mock_cli, registry, "test-label", quiet=True) + # Should have called load_csv 3 times + self.assertEqual(mock_loader.load_csv.call_count, 3) + finally: + for rep in range(3): + Path(f"/tmp/output_{rep}.csv").unlink(missing_ok=True) + + registry.close() + + @patch("joshpy.sweep.CellDataLoader") + def test_missing_replicate_skipped(self, mock_loader_cls): + """Missing CSVs should be skipped gracefully.""" + from joshpy.sweep import ingest_results + from joshpy.cli import ExportFileInfo, ExportPaths + + registry, _, _ = self._make_registry_with_run() + + mock_loader = MagicMock() + mock_loader.load_csv.side_effect = FileNotFoundError("not found") + mock_loader_cls.return_value = mock_loader + + mock_cli = MagicMock() + mock_cli.inspect_exports.return_value = ExportPaths( + simulation="Main", + export_files={ + "patch": ExportFileInfo( + raw="file:///tmp/missing_{replicate}.csv", + protocol="file", + host="", + path="/tmp/missing_{replicate}.csv", + file_type="csv", + ), + "meta": None, + "entity": None, + }, + debug_files={"organism": None, "patch": None, "agent": None, "disturbance": None}, + ) + + rows = ingest_results(mock_cli, registry, "test-label", quiet=True) + self.assertEqual(rows, 0) + registry.close() + + def test_unknown_label_raises(self): + """ingest_results should raise KeyError for unknown label.""" + from joshpy.sweep import ingest_results + + registry = RunRegistry(":memory:") + mock_cli = MagicMock() + + with self.assertRaises(KeyError): + ingest_results(mock_cli, registry, "nonexistent-label") + registry.close() + + @patch("joshpy.sweep.CellDataLoader") + def test_minio_protocol_configures_s3(self, mock_loader_cls): + """minio:// protocol should call configure_s3 and build s3:// URLs.""" + from joshpy.sweep import ingest_results + from joshpy.cli import ExportFileInfo, ExportPaths + + registry, _, _ = self._make_registry_with_run() + + mock_loader = MagicMock() + mock_loader.load_csv.return_value = 50 + mock_loader_cls.return_value = mock_loader + + mock_cli = MagicMock() + mock_cli.inspect_exports.return_value = ExportPaths( + simulation="Main", + export_files={ + "patch": ExportFileInfo( + raw="minio://my-bucket/results/output_{replicate}.csv", + protocol="minio", + host="my-bucket", + path="/results/output_{replicate}.csv", + file_type="csv", + ), + "meta": None, + "entity": None, + }, + debug_files={"organism": None, "patch": None, "agent": None, "disturbance": None}, + ) + + env = { + "MINIO_ENDPOINT": "storage.example.com", + "MINIO_ACCESS_KEY": "AKID", + "MINIO_SECRET_KEY": "SECRET", + } + with patch("joshpy.sweep.configure_s3") as mock_configure, \ + patch.dict("os.environ", env): + rows = ingest_results(mock_cli, registry, "test-label", quiet=True) + + # Should have configured S3 + mock_configure.assert_called_once() + call_args = mock_configure.call_args + self.assertEqual(call_args[0][1], "storage.example.com") + + # load_csv should have been called with s3:// URLs + for call in mock_loader.load_csv.call_args_list: + csv_arg = call[1].get("csv_path") or call[0][0] + self.assertTrue(str(csv_arg).startswith("s3://my-bucket/")) + + registry.close() + + @patch("joshpy.sweep.CellDataLoader") + def test_minio_missing_creds_raises(self, mock_loader_cls): + """minio:// without env vars should raise RuntimeError.""" + from joshpy.sweep import ingest_results + from joshpy.cli import ExportFileInfo, ExportPaths + + registry, _, _ = self._make_registry_with_run() + + mock_cli = MagicMock() + mock_cli.inspect_exports.return_value = ExportPaths( + simulation="Main", + export_files={ + "patch": ExportFileInfo( + raw="minio://bucket/out_{replicate}.csv", + protocol="minio", + host="bucket", + path="/out_{replicate}.csv", + file_type="csv", + ), + "meta": None, + "entity": None, + }, + debug_files={"organism": None, "patch": None, "agent": None, "disturbance": None}, + ) + + # Clear any minio env vars + clean_env = {k: v for k, v in __import__("os").environ.items() + if not k.startswith("MINIO_")} + with patch.dict("os.environ", clean_env, clear=True): + with self.assertRaises(RuntimeError): + ingest_results(mock_cli, registry, "test-label", quiet=True) + + registry.close() + + @patch("joshpy.sweep.CellDataLoader") + def test_josh_content_fallback(self, mock_loader_cls): + """Should use josh_content from registry when josh_path doesn't exist.""" + from joshpy.sweep import ingest_results + from joshpy.cli import ExportFileInfo, ExportPaths + + registry, _, _ = self._make_registry_with_run() + + mock_loader = MagicMock() + mock_loader.load_csv.return_value = 10 + mock_loader_cls.return_value = mock_loader + + mock_cli = MagicMock() + mock_cli.inspect_exports.return_value = ExportPaths( + simulation="Main", + export_files={ + "patch": ExportFileInfo( + raw="file:///tmp/out_{replicate}.csv", + protocol="file", + host="", + path="/tmp/out_{replicate}.csv", + file_type="csv", + ), + "meta": None, + "entity": None, + }, + debug_files={"organism": None, "patch": None, "agent": None, "disturbance": None}, + ) + + # josh_path is /tmp/sim.josh which doesn't exist — should fall back to josh_content + rows = ingest_results(mock_cli, registry, "test-label", quiet=True) + + # inspect_exports should have been called with a temp file (not /tmp/sim.josh) + call_config = mock_cli.inspect_exports.call_args[0][0] + self.assertNotEqual(str(call_config.script), "/tmp/sim.josh") + # Temp file has .josh suffix + self.assertTrue(str(call_config.script).endswith(".josh")) + + registry.close() + + +class TestConfigureS3(unittest.TestCase): + """Tests for configure_s3().""" + + def test_executes_install_and_create_secret(self): + """configure_s3 should call INSTALL httpfs and CREATE SECRET.""" + from joshpy.registry import configure_s3 + + mock_conn = MagicMock() + configure_s3(mock_conn, "storage.example.com", "AKID", "SECRET") + + # Should have called execute twice: INSTALL + CREATE SECRET + self.assertEqual(mock_conn.execute.call_count, 2) + first_call = mock_conn.execute.call_args_list[0] + self.assertIn("INSTALL httpfs", first_call[0][0]) + second_call = mock_conn.execute.call_args_list[1] + self.assertIn("CREATE OR REPLACE SECRET", second_call[0][0]) + + if __name__ == "__main__": unittest.main() diff --git a/tests/test_targets.py b/tests/test_targets.py new file mode 100644 index 0000000..9791ef6 --- /dev/null +++ b/tests/test_targets.py @@ -0,0 +1,406 @@ +"""Tests for joshpy.targets — target profile system.""" + +import json +import os +import tempfile +import unittest +from pathlib import Path +from unittest.mock import patch + +from joshpy.targets import ( + HttpTargetConfig, + KubernetesTargetConfig, + ResolvedMinioCreds, + TargetProfile, + _from_json_dict, + _to_json_dict, + delete_target, + list_targets, + load_target, + resolve_minio_creds, + save_target, +) + + +# ----------------------------------------------------------------------- +# Dataclass construction and validation +# ----------------------------------------------------------------------- + + +class TestHttpTargetConfig(unittest.TestCase): + def test_required_fields(self): + cfg = HttpTargetConfig(endpoint="https://example.com") + self.assertEqual(cfg.endpoint, "https://example.com") + + def test_defaults(self): + cfg = HttpTargetConfig(endpoint="https://example.com") + self.assertIsNone(cfg.api_key) + + def test_frozen(self): + cfg = HttpTargetConfig(endpoint="https://example.com") + with self.assertRaises(AttributeError): + cfg.endpoint = "other" # type: ignore[misc] + + +class TestKubernetesTargetConfig(unittest.TestCase): + def test_required_fields(self): + cfg = KubernetesTargetConfig(namespace="josh", image="img:latest") + self.assertEqual(cfg.namespace, "josh") + self.assertEqual(cfg.image, "img:latest") + + def test_defaults(self): + cfg = KubernetesTargetConfig(namespace="josh", image="img:latest") + self.assertIsNone(cfg.context) + self.assertIsNone(cfg.pod_minio_endpoint) + self.assertEqual(cfg.resources, {}) + self.assertIsNone(cfg.parallelism) + self.assertIsNone(cfg.timeout_seconds) + self.assertIsNone(cfg.ttl_seconds_after_finished) + self.assertFalse(cfg.spot) + + def test_resources_dict_isolation(self): + a = KubernetesTargetConfig(namespace="a", image="a") + b = KubernetesTargetConfig(namespace="b", image="b") + self.assertIsNot(a.resources, b.resources) + + def test_frozen(self): + cfg = KubernetesTargetConfig(namespace="josh", image="img:latest") + with self.assertRaises(AttributeError): + cfg.namespace = "other" # type: ignore[misc] + + +class TestTargetProfile(unittest.TestCase): + def test_http_profile(self): + p = TargetProfile( + target_type="http", + http=HttpTargetConfig(endpoint="https://example.com"), + ) + self.assertEqual(p.target_type, "http") + self.assertIsNotNone(p.http) + self.assertIsNone(p.kubernetes) + + def test_k8s_profile(self): + p = TargetProfile( + target_type="kubernetes", + kubernetes=KubernetesTargetConfig(namespace="ns", image="img"), + ) + self.assertEqual(p.target_type, "kubernetes") + self.assertIsNone(p.http) + self.assertIsNotNone(p.kubernetes) + + def test_http_requires_config(self): + with self.assertRaises(ValueError, msg="http config required"): + TargetProfile(target_type="http") + + def test_k8s_requires_config(self): + with self.assertRaises(ValueError, msg="kubernetes config required"): + TargetProfile(target_type="kubernetes") + + def test_minio_fields(self): + p = TargetProfile( + target_type="http", + http=HttpTargetConfig(endpoint="https://example.com"), + minio_endpoint="https://storage.googleapis.com", + minio_bucket="my-bucket", + ) + self.assertEqual(p.minio_endpoint, "https://storage.googleapis.com") + self.assertEqual(p.minio_bucket, "my-bucket") + self.assertIsNone(p.minio_access_key) + self.assertIsNone(p.minio_secret_key) + + +# ----------------------------------------------------------------------- +# Serialization helpers +# ----------------------------------------------------------------------- + + +class TestSerialization(unittest.TestCase): + def test_to_json_renames_keys(self): + cfg = HttpTargetConfig(endpoint="https://ex.com", api_key="secret") + d = _to_json_dict(cfg) + self.assertIn("apiKey", d) + self.assertNotIn("api_key", d) + self.assertEqual(d["apiKey"], "secret") + + def test_to_json_omits_none(self): + cfg = HttpTargetConfig(endpoint="https://ex.com") + d = _to_json_dict(cfg) + self.assertNotIn("apiKey", d) + self.assertNotIn("api_key", d) + + def test_from_json_renames_keys(self): + d = _from_json_dict({"apiKey": "secret", "endpoint": "https://ex.com"}) + self.assertIn("api_key", d) + self.assertNotIn("apiKey", d) + self.assertEqual(d["api_key"], "secret") + + def test_roundtrip_k8s(self): + original = KubernetesTargetConfig( + namespace="ns", + image="img:latest", + context="gke_proj_region_cluster", + pod_minio_endpoint="https://storage.googleapis.com", + resources={"requests": {"cpu": "1", "memory": "2Gi"}}, + parallelism=5, + timeout_seconds=600, + ttl_seconds_after_finished=3600, + spot=True, + ) + json_dict = _to_json_dict(original) + python_dict = _from_json_dict(json_dict) + restored = KubernetesTargetConfig(**python_dict) + self.assertEqual(restored, original) + + +# ----------------------------------------------------------------------- +# Save / load round-trips (filesystem) +# ----------------------------------------------------------------------- + + +class TestSaveLoadTarget(unittest.TestCase): + def test_save_creates_directory(self): + with tempfile.TemporaryDirectory() as tmpdir: + target_dir = Path(tmpdir) / "nested" / "targets" + with patch("joshpy.targets.TARGETS_DIR", target_dir): + profile = TargetProfile( + target_type="http", + http=HttpTargetConfig(endpoint="https://ex.com"), + ) + path = save_target("test", profile) + self.assertTrue(path.exists()) + self.assertTrue(target_dir.is_dir()) + + def test_roundtrip_http(self): + with tempfile.TemporaryDirectory() as tmpdir: + with patch("joshpy.targets.TARGETS_DIR", Path(tmpdir)): + original = TargetProfile( + target_type="http", + http=HttpTargetConfig( + endpoint="https://josh.example.com", + api_key="sk-123", + ), + minio_endpoint="https://storage.googleapis.com", + minio_bucket="josh-bucket", + ) + save_target("cloud-dev", original) + loaded = load_target("cloud-dev") + self.assertEqual(loaded.target_type, "http") + self.assertEqual(loaded.http, original.http) + self.assertEqual(loaded.minio_endpoint, original.minio_endpoint) + self.assertEqual(loaded.minio_bucket, original.minio_bucket) + + def test_roundtrip_k8s(self): + with tempfile.TemporaryDirectory() as tmpdir: + with patch("joshpy.targets.TARGETS_DIR", Path(tmpdir)): + original = TargetProfile( + target_type="kubernetes", + kubernetes=KubernetesTargetConfig( + namespace="joshsim", + image="ghcr.io/schmidtdse/joshsim:latest", + context="gke_proj_us-west1_cluster", + pod_minio_endpoint="https://storage.googleapis.com", + resources={ + "requests": {"cpu": "1", "memory": "2Gi"}, + "limits": {"memory": "4Gi"}, + }, + parallelism=5, + timeout_seconds=600, + ttl_seconds_after_finished=3600, + spot=True, + ), + minio_endpoint="https://storage.googleapis.com", + minio_bucket="josh-bucket", + ) + save_target("gke-test", original) + loaded = load_target("gke-test") + self.assertEqual(loaded.target_type, "kubernetes") + self.assertEqual(loaded.kubernetes, original.kubernetes) + self.assertEqual(loaded.minio_endpoint, original.minio_endpoint) + + def test_roundtrip_minio_creds(self): + with tempfile.TemporaryDirectory() as tmpdir: + with patch("joshpy.targets.TARGETS_DIR", Path(tmpdir)): + original = TargetProfile( + target_type="http", + http=HttpTargetConfig(endpoint="https://ex.com"), + minio_endpoint="https://storage.googleapis.com", + minio_access_key="GOOG123", + minio_secret_key="secret456", + minio_bucket="my-bucket", + ) + save_target("creds-test", original) + loaded = load_target("creds-test") + self.assertEqual(loaded.minio_access_key, "GOOG123") + self.assertEqual(loaded.minio_secret_key, "secret456") + + def test_load_nonexistent_raises(self): + with tempfile.TemporaryDirectory() as tmpdir: + with patch("joshpy.targets.TARGETS_DIR", Path(tmpdir)): + with self.assertRaises(FileNotFoundError): + load_target("does-not-exist") + + def test_json_uses_correct_keys(self): + """Verify the raw JSON matches the format joshsim expects.""" + with tempfile.TemporaryDirectory() as tmpdir: + with patch("joshpy.targets.TARGETS_DIR", Path(tmpdir)): + profile = TargetProfile( + target_type="kubernetes", + kubernetes=KubernetesTargetConfig( + namespace="ns", + image="img", + timeout_seconds=600, + ttl_seconds_after_finished=3600, + ), + minio_endpoint="https://storage.googleapis.com", + minio_bucket="bucket", + ) + path = save_target("keys-test", profile) + raw = json.loads(path.read_text()) + + # Top-level keys + self.assertEqual(raw["type"], "kubernetes") + self.assertEqual(raw["minio_endpoint"], "https://storage.googleapis.com") + self.assertEqual(raw["minio_bucket"], "bucket") + + # Nested K8s keys — camelCase where Java expects it + k8s = raw["kubernetes"] + self.assertIn("timeoutSeconds", k8s) + self.assertIn("ttlSecondsAfterFinished", k8s) + # snake_case fields stay snake_case + self.assertNotIn("timeout_seconds", k8s) + self.assertNotIn("ttl_seconds_after_finished", k8s) + + def test_invalid_name_rejected(self): + with self.assertRaises(ValueError, msg="Invalid target name"): + save_target("../etc/passwd", TargetProfile( + target_type="http", + http=HttpTargetConfig(endpoint="https://ex.com"), + )) + + with self.assertRaises(ValueError): + save_target("has.dot", TargetProfile( + target_type="http", + http=HttpTargetConfig(endpoint="https://ex.com"), + )) + + with self.assertRaises(ValueError): + save_target("has spaces", TargetProfile( + target_type="http", + http=HttpTargetConfig(endpoint="https://ex.com"), + )) + + +# ----------------------------------------------------------------------- +# List / delete +# ----------------------------------------------------------------------- + + +class TestListDeleteTargets(unittest.TestCase): + def test_empty_dir(self): + with tempfile.TemporaryDirectory() as tmpdir: + with patch("joshpy.targets.TARGETS_DIR", Path(tmpdir)): + self.assertEqual(list_targets(), []) + + def test_multiple_targets_sorted(self): + with tempfile.TemporaryDirectory() as tmpdir: + with patch("joshpy.targets.TARGETS_DIR", Path(tmpdir)): + for name in ("zebra", "alpha", "middle"): + save_target(name, TargetProfile( + target_type="http", + http=HttpTargetConfig(endpoint="https://ex.com"), + )) + self.assertEqual(list_targets(), ["alpha", "middle", "zebra"]) + + def test_no_dir_exists(self): + with patch("joshpy.targets.TARGETS_DIR", Path("/tmp/nonexistent-josh-targets-abc")): + self.assertEqual(list_targets(), []) + + def test_delete_existing(self): + with tempfile.TemporaryDirectory() as tmpdir: + with patch("joshpy.targets.TARGETS_DIR", Path(tmpdir)): + save_target("to-delete", TargetProfile( + target_type="http", + http=HttpTargetConfig(endpoint="https://ex.com"), + )) + self.assertIn("to-delete", list_targets()) + delete_target("to-delete") + self.assertNotIn("to-delete", list_targets()) + + def test_delete_nonexistent_raises(self): + with tempfile.TemporaryDirectory() as tmpdir: + with patch("joshpy.targets.TARGETS_DIR", Path(tmpdir)): + with self.assertRaises(FileNotFoundError): + delete_target("nope") + + +# ----------------------------------------------------------------------- +# Credential resolution +# ----------------------------------------------------------------------- + + +class TestResolveMinioCredentials(unittest.TestCase): + def test_no_target_uses_env(self): + env = { + "MINIO_ENDPOINT": "https://env.example.com", + "MINIO_ACCESS_KEY": "env-ak", + "MINIO_SECRET_KEY": "env-sk", + "MINIO_BUCKET": "env-bucket", + } + with patch.dict(os.environ, env, clear=False): + creds = resolve_minio_creds() + self.assertEqual(creds.endpoint, "https://env.example.com") + self.assertEqual(creds.access_key, "env-ak") + self.assertEqual(creds.secret_key, "env-sk") + self.assertEqual(creds.bucket, "env-bucket") + + def test_profile_overrides_env(self): + env = { + "MINIO_ENDPOINT": "https://env.example.com", + "MINIO_BUCKET": "env-bucket", + } + profile = TargetProfile( + target_type="http", + http=HttpTargetConfig(endpoint="https://ex.com"), + minio_endpoint="https://profile.example.com", + minio_bucket="profile-bucket", + ) + with patch.dict(os.environ, env, clear=False): + creds = resolve_minio_creds(profile) + self.assertEqual(creds.endpoint, "https://profile.example.com") + self.assertEqual(creds.bucket, "profile-bucket") + + def test_partial_merge(self): + env = { + "MINIO_ACCESS_KEY": "env-ak", + "MINIO_SECRET_KEY": "env-sk", + } + profile = TargetProfile( + target_type="http", + http=HttpTargetConfig(endpoint="https://ex.com"), + minio_endpoint="https://profile.example.com", + minio_bucket="profile-bucket", + ) + with patch.dict(os.environ, env, clear=False): + creds = resolve_minio_creds(profile) + self.assertEqual(creds.endpoint, "https://profile.example.com") + self.assertEqual(creds.access_key, "env-ak") + self.assertEqual(creds.secret_key, "env-sk") + self.assertEqual(creds.bucket, "profile-bucket") + + def test_no_source_returns_none(self): + with patch.dict(os.environ, {}, clear=True): + creds = resolve_minio_creds() + self.assertIsNone(creds.endpoint) + self.assertIsNone(creds.access_key) + self.assertIsNone(creds.secret_key) + self.assertIsNone(creds.bucket) + + def test_none_target_uses_env(self): + env = {"MINIO_ENDPOINT": "https://env.example.com"} + with patch.dict(os.environ, env, clear=False): + creds = resolve_minio_creds(target=None) + self.assertEqual(creds.endpoint, "https://env.example.com") + + +if __name__ == "__main__": + unittest.main()