diff --git a/modules/accelerators/cdf_common/functions/write.Function.yaml b/modules/accelerators/cdf_common/functions/write.Function.yaml index 1c957c17..3126698e 100644 --- a/modules/accelerators/cdf_common/functions/write.Function.yaml +++ b/modules/accelerators/cdf_common/functions/write.Function.yaml @@ -2,5 +2,6 @@ name: Annotation to Direct Relations Writer externalId: contextualization_connection_writer owner: Anonymous description: Writes all approved Annotations as direct relations connections. +runtime: py312 functionPath: handler.py space: {{ functionSpace }} diff --git a/modules/accelerators/cdf_ingestion/default.config.yaml b/modules/accelerators/cdf_ingestion/default.config.yaml index 6ba7cf4f..ea428691 100644 --- a/modules/accelerators/cdf_ingestion/default.config.yaml +++ b/modules/accelerators/cdf_ingestion/default.config.yaml @@ -14,7 +14,7 @@ instanceSpaces: - springfield_instances - cdf_cdm_units -runWorkflowSchedule: "0 0 29 2 *" +runWorkflowSchedule: "0 3 * * MON" timeseriesTransformationExternalId: pi_timeseries_springfield_aveva_pi assetTransformationExternalId: sap_assets_springfield_s4hana equipmentTransformationExternalId: sap_equipment_springfield_s4hana diff --git a/modules/accelerators/contextualization/cdf_entity_matching/raw/entity_matching_db.Database.yaml b/modules/accelerators/contextualization/cdf_entity_matching/raw/entity_matching_db.Database.yaml new file mode 100644 index 00000000..08202ab8 --- /dev/null +++ b/modules/accelerators/contextualization/cdf_entity_matching/raw/entity_matching_db.Database.yaml @@ -0,0 +1 @@ +dbName: {{ dbName }} \ No newline at end of file diff --git a/modules/accelerators/contextualization/cdf_entity_matching/workflows/trigger.WorkflowTrigger.yaml b/modules/accelerators/contextualization/cdf_entity_matching/workflows/trigger.WorkflowTrigger.yaml index 5c5600f3..d139655f 100644 --- a/modules/accelerators/contextualization/cdf_entity_matching/workflows/trigger.WorkflowTrigger.yaml +++ b/modules/accelerators/contextualization/cdf_entity_matching/workflows/trigger.WorkflowTrigger.yaml @@ -1,7 +1,7 @@ externalId: {{ workflow }}_trigger triggerRule: triggerType: schedule - cronExpression: "0 4 * * *" + cronExpression: "0 5 * * MON" workflowExternalId: {{ workflow }} workflowVersion: v1 authentication: diff --git a/modules/accelerators/contextualization/cdf_file_annotation/data_modeling/containers/hdm.container.yaml b/modules/accelerators/contextualization/cdf_file_annotation/data_modeling/containers/hdm.container.yaml index d5831152..137512be 100644 --- a/modules/accelerators/contextualization/cdf_file_annotation/data_modeling/containers/hdm.container.yaml +++ b/modules/accelerators/contextualization/cdf_file_annotation/data_modeling/containers/hdm.container.yaml @@ -1,7 +1,6 @@ - constraints: {} description: Container for file contextualization pipeline annotation data externalId: {{ annotationStateExternalId }} - indexes: {} name: {{ annotationStateExternalId }} properties: annotatedPageCount: @@ -13,9 +12,6 @@ type: int64 name: Annotated page count annotationMessage: - autoIncrement: false - immutable: false - nullable: true autoIncrement: false immutable: false nullable: true diff --git a/modules/accelerators/contextualization/cdf_file_annotation/default.config.yaml b/modules/accelerators/contextualization/cdf_file_annotation/default.config.yaml index abfcd9e5..642798bf 100644 --- a/modules/accelerators/contextualization/cdf_file_annotation/default.config.yaml +++ b/modules/accelerators/contextualization/cdf_file_annotation/default.config.yaml @@ -45,7 +45,7 @@ promoteFunctionVersion: v1.0.0 # used in /workflows (single v1 version runs: prepare โ†’ launch โ†’ finalize โ†’ promote) workflowExternalId: wf_file_annotation -workflowSchedule: "0 0 29 2 *" +workflowSchedule: "0 5 * * MON" # used in /transformations fileToAssetTransformationExternalId: tr_file_to_asset_from_annotations diff --git a/modules/accelerators/contextualization/cdf_file_annotation/raw/rawDb.Database.yaml b/modules/accelerators/contextualization/cdf_file_annotation/raw/rawDb.Database.yaml new file mode 100644 index 00000000..81727ee6 --- /dev/null +++ b/modules/accelerators/contextualization/cdf_file_annotation/raw/rawDb.Database.yaml @@ -0,0 +1 @@ +dbName: {{ rawDb }} \ No newline at end of file diff --git a/modules/accelerators/open_industrial_data_sync/default.config.yaml b/modules/accelerators/open_industrial_data_sync/default.config.yaml index 7c59255e..980d508e 100644 --- a/modules/accelerators/open_industrial_data_sync/default.config.yaml +++ b/modules/accelerators/open_industrial_data_sync/default.config.yaml @@ -8,7 +8,7 @@ extractionPipelineExternalId: ep_oid_sync functionExternalId: fn_oid_sync functionSpace: springfield_functions scheduleExternalId: schedule_oid_sync -scheduleCronExpression: "0 0 29 2 *" +scheduleCronExpression: "*/10 * * * *" openIdClientSecret: ${OPEN_ID_CLIENT_SECRET} schemaSpace: sp_enterprise_process_industry datamodelVersion: v1 diff --git a/modules/accelerators/open_industrial_data_sync/functions/fn_oid_sync.Function.yaml b/modules/accelerators/open_industrial_data_sync/functions/fn_oid_sync.Function.yaml index 0cbe110d..c617e64e 100644 --- a/modules/accelerators/open_industrial_data_sync/functions/fn_oid_sync.Function.yaml +++ b/modules/accelerators/open_industrial_data_sync/functions/fn_oid_sync.Function.yaml @@ -5,8 +5,7 @@ functionPath: handler.py secrets: oid-secret: "{{openIdClientSecret}}" - cpu: 1.0 - memory: 1.5 metadata: version: "v1.0.0" space: {{ functionSpace }} + runtime: py312 diff --git a/modules/accelerators/quickstart/README.md b/modules/accelerators/quickstart/README.md new file mode 100644 index 00000000..8942241c --- /dev/null +++ b/modules/accelerators/quickstart/README.md @@ -0,0 +1,221 @@ +# Quickstart Deployment Pack + +This module provides a consolidated deployment and validation path for the Quickstart Deployment Pack (`dp:quickstart`), combining ingestion, contextualization, search, model, and quality reporting modules into one end-to-end setup. + +## Why Use This Package? + +**Deploy a complete CDF demo/reference pipeline with one package selection** + +Configuring all dependent Quickstart modules manually can be time-consuming and error-prone. This package provides a **single, guided path** to initialize, configure, and test a full data-to-context flow. + +**Key Benefits:** + +- โšก **Single Package Selection**: Install the entire Quickstart module set from `quickstartdp` +- ๐Ÿ”— **End-to-End Scope**: Covers source ingestion, contextualization, search, and quality reporting +- ๐Ÿงญ **Guided Setup**: Includes setup wizard for required config and SQL mode updates +- ๐Ÿงช **Synthetic Data Testing**: Validate workflows without live source integrations +- ๐Ÿ“š **Centralized Docs**: One index with links to each module README + +## ๐ŸŽฏ Overview + +Package metadata from `modules/packages.toml`: + +- `id`: `dp:quickstart` +- `title`: `Quickstart Deployment Pack` +- `canCherryPick`: `false` + +Included modules: + +- `accelerators/cdf_common` +- `accelerators/cdf_ingestion` +- `accelerators/contextualization/cdf_file_annotation` +- `accelerators/contextualization/cdf_entity_matching` +- `accelerators/contextualization/cdf_connection_sql` +- `accelerators/industrial_tools/cdf_search` +- `accelerators/open_industrial_data_sync` +- `accelerators/quickstart` +- `sourcesystem/cdf_pi` +- `sourcesystem/cdf_sap_assets` +- `sourcesystem/cdf_sap_events` +- `sourcesystem/cdf_sharepoint` +- `dashboards/rpt_quality` +- `models/qs_enterprise_dm` + +## ๐Ÿ—๏ธ Package Structure + +```text +modules/ +โ”œโ”€โ”€ accelerators/ +โ”‚ โ”œโ”€โ”€ cdf_common/ +โ”‚ โ”œโ”€โ”€ cdf_ingestion/ +โ”‚ โ”œโ”€โ”€ contextualization/ +โ”‚ โ”‚ โ”œโ”€โ”€ cdf_file_annotation/ +โ”‚ โ”‚ โ”œโ”€โ”€ cdf_entity_matching/ +โ”‚ โ”‚ โ””โ”€โ”€ cdf_connection_sql/ +โ”‚ โ”œโ”€โ”€ industrial_tools/cdf_search/ +โ”‚ โ”œโ”€โ”€ open_industrial_data_sync/ +โ”‚ โ””โ”€โ”€ quickstart/ +โ”œโ”€โ”€ sourcesystem/ +โ”‚ โ”œโ”€โ”€ cdf_pi/ +โ”‚ โ”œโ”€โ”€ cdf_sap_assets/ +โ”‚ โ”œโ”€โ”€ cdf_sap_events/ +โ”‚ โ””โ”€โ”€ cdf_sharepoint/ +โ”œโ”€โ”€ dashboards/rpt_quality/ +โ””โ”€โ”€ models/qs_enterprise_dm/ +``` + +## ๐Ÿ“š Module Documentation Index + +### Foundation + + +| Module | Purpose | Documentation | +| ------------------ | -------------------------------------------------------------- | -------------------------------------------------------------------------------------- | +| `cdf_common` | Shared spaces, datasets, RAW, and common runtime resources | `[modules/accelerators/cdf_common/README.md](../cdf_common/README.md)` | +| `cdf_ingestion` | Ingestion workflow orchestration and transformation sequencing | `[modules/accelerators/cdf_ingestion/README.md](../cdf_ingestion/README.md)` | +| `qs_enterprise_dm` | Quickstart enterprise data model | `[modules/models/qs_enterprise_dm/README.md](../../models/qs_enterprise_dm/README.md)` | + + +### Source system + data simulation + + +| Module | Purpose | Documentation | +| --------------------------- | ---------------------------------------- | ---------------------------------------------------------------------------------------------------- | +| `cdf_pi` | PI sample ingestion and timeseries setup | `[modules/sourcesystem/cdf_pi/README.md](../../sourcesystem/cdf_pi/README.md)` | +| `cdf_sap_assets` | SAP asset/functional location ingestion | `[modules/sourcesystem/cdf_sap_assets/README.md](../../sourcesystem/cdf_sap_assets/README.md)` | +| `cdf_sap_events` | SAP maintenance event ingestion | `[modules/sourcesystem/cdf_sap_events/README.md](../../sourcesystem/cdf_sap_events/README.md)` | +| `cdf_sharepoint` | File ingestion for annotation testing | `[modules/sourcesystem/cdf_sharepoint/README.md](../../sourcesystem/cdf_sharepoint/README.md)` | +| `open_industrial_data_sync` | Time-shifted OID replay and sync | `[modules/accelerators/open_industrial_data_sync/README.md](../open_industrial_data_sync/README.md)` | + + +### Contextualization + + +| Module | Purpose | Documentation | +| --------------------- | ---------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------- | +| `cdf_connection_sql` | SQL-based relationship creation | `[modules/accelerators/contextualization/cdf_connection_sql/README.md](../contextualization/cdf_connection_sql/README.md)` | +| `cdf_entity_matching` | Entity matching and metadata optimization | `[modules/accelerators/contextualization/cdf_entity_matching/README.md](../contextualization/cdf_entity_matching/README.md)` | +| `cdf_file_annotation` | File annotation workflow (prepare/launch/finalize/promote) | `[modules/accelerators/contextualization/cdf_file_annotation/README.md](../contextualization/cdf_file_annotation/README.md)` | + + +### Monitoring and industrial tools + + +| Module | Purpose | Documentation | +| ------------- | ---------------------------------------------- | -------------------------------------------------------------------------------------------------------- | +| `cdf_search` | Search/location resources for Industrial Tools | `[modules/accelerators/industrial_tools/cdf_search/README.md](../industrial_tools/cdf_search/README.md)` | +| `rpt_quality` | Contextualization quality KPI reporting | `[modules/dashboards/rpt_quality/README.md](../../dashboards/rpt_quality/README.md)` | + + +## ๐Ÿ”ง Configuration + +### Prerequisites + +- Cognite Toolkit `0.7.33` or newer +- `cdf.toml` present in project root +- Auth initialized and verified (`cdf auth init`, `cdf auth verify`) +- Data plugin enabled: + +```toml +[plugins] +data = true +``` + +- Library source configured: + +```toml +[library.cognite] +url = "https://github.com/cognitedata/library/releases/download/latest/packages.zip" +``` + +## ๐Ÿƒ Getting Started + +### 1. Initialize package modules + +```bash +cdf modules init . --clean +``` + +Select **Quickstart Deployment Pack** in the module picker. + +> `--clean` can overwrite existing module folders. + +### 2. Run setup wizard (recommended) + +```bash +python3 modules/accelerators/quickstart/scripts/qs_dp_setup_wizard.py --env +``` + +The wizard updates required Quickstart settings in: + +- `config..yaml` +- `.env` +- `modules/sourcesystem/cdf_sap_assets/transformations/population/asset.Transformation.sql` + +It also creates `.bak` backups on first write. + +### 3. Verify generated changes + +Confirm: + +- `environment.project` is correct +- Entity matching defaults are updated for Quickstart +- `ApplicationOwner` is set +- group source and secret values exist in `.env` +- `FILE_ANNOTATION MODE` is active in `asset.Transformation.sql` + +> If you run `cdf auth init` after the wizard, re-check `.env` values before deploy. + +## ๐Ÿงช Testing the Quickstart Package + +QS DP includes synthetic data for full validation without live source connectors. + +### 1. Build and deploy + +```bash +cdf build +cdf deploy --dry-run +cdf deploy +``` + +### 2. Upload synthetic data + +```bash +cdf data upload dir modules/sourcesystem/cdf_pi/upload_data +cdf data upload dir modules/sourcesystem/cdf_sap_assets/upload_data +cdf data upload dir modules/sourcesystem/cdf_sap_events/upload_data +cdf data upload dir modules/sourcesystem/cdf_sharepoint/upload_data +cdf data upload dir modules/accelerators/contextualization/cdf_entity_matching/upload_data +cdf data upload dir modules/accelerators/contextualization/cdf_file_annotation/upload_data +``` + +If needed in test environments, add `--skip-verify-cdf-project` to upload commands. + +### 3. Trigger workflows in order + +Run from Data Workflows in UI: + +1. `ingestion` +2. `wf_file_annotation` +3. `EntityMatching` + +## โœ… Post-Deployment Verification + +- Verify file links in Industrial Tools Search (`Files`) +- Validate entity matching runs for `dm:context:timeseries:entity_matching` +- Confirm workflow runs complete successfully +- Run `wf_contextualization_rate` and review `tbl_contextualization_rate_report` in `db_quality_reports` + +## ๐Ÿ“š References + +- [Toolkit setup](https://docs.cognite.com/cdf/deploy/cdf_toolkit/guides/setup) +- [Toolkit authentication](https://docs.cognite.com/cdf/deploy/cdf_toolkit/guides/auth) +- [Entity matching module guide](https://hub.cognite.com/deployment-packs-472/how-to-cdf-entity-matching-module-cognite-official-5317) +- [Quickstart enterprise DM guide](https://hub.cognite.com/deployment-packs-472/how-to-get-started-with-quick-start-enterprise-data-model-5997) + +## ๐Ÿ†˜ Support + +- Refer to Cognite docs for Toolkit/deployment guidance +- Contact Cognite support for environment-specific issues +- Deployment packs channel: `#topic-deployment-packs` + diff --git a/modules/accelerators/quickstart/auth/.gitkeep b/modules/accelerators/quickstart/auth/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/modules/accelerators/quickstart/module.toml b/modules/accelerators/quickstart/module.toml new file mode 100644 index 00000000..fe1a6371 --- /dev/null +++ b/modules/accelerators/quickstart/module.toml @@ -0,0 +1,4 @@ +[module] +title = "Quickstart Script" +id = "dp:accelerators:quickstart_script" +package_id = "dp:accelerators" \ No newline at end of file diff --git a/modules/accelerators/quickstart/scripts/README.md b/modules/accelerators/quickstart/scripts/README.md new file mode 100644 index 00000000..f47e13e8 --- /dev/null +++ b/modules/accelerators/quickstart/scripts/README.md @@ -0,0 +1,259 @@ +# Quickstart Deployment Pack โ€” Setup Wizard + +`qs_dp_setup_wizard.py` is an interactive command-line wizard that performs all +post-install configuration for the **Quickstart Deployment Pack** in a Cognite +Toolkit project. Run it once per environment after cloning the pack. + +--- + +## What it does + + +| Step | Action | +| ---- | ------------------------------------------------------------------------------------------------------------------ | +| 1 | Verifies Cognite Toolkit โ‰ฅ 0.7.210 is installed | +| 2 | Ensures `cdf.toml` has `[alpha_flags] deployment-pack = true` and `data = true` โ€” adds missing flags automatically | +| 3 | Sets `environment.project` in `config..yaml` | +| 4 | Populates all `cdf_entity_matching` defaults required by the how-to guide | +| 5 | Sets `cdf_file_annotation.ApplicationOwner` (validated email address) | +| 6 | Wires up `groupSourceId` for all 9 modules โ€” either one shared group or one per module | +| 7 | Ensures `GROUP_SOURCE_ID` and `OPEN_ID_CLIENT_SECRET` exist in `.env` | +| 8 | Switches `asset.Transformation.sql` from COMMON MODE to FILE_ANNOTATION MODE | +| 9 | Shows a full change table and asks for confirmation before writing any file | +| 10 | *(optional)* Runs `cdf build` + `cdf deploy --dry-run` and offers a live deploy | + + +Every file that is modified gets a timestamped backup before it is overwritten +(see [Backups and recovery](#backups-and-recovery)). + +--- + +## Running the wizard + +Navigate to the **root of your Toolkit project** (the directory that contains +`cdf.toml` or `modules/`), then run: + +```bash +python modules/accelerators/quickstart/scripts/qs_dp_setup_wizard.py +``` + +You can also run from inside the `scripts/` directory โ€” the wizard resolves all +file paths from its own location, so the working directory does not matter: + +```bash +cd modules/accelerators/quickstart/scripts +python qs_dp_setup_wizard.py +``` + +To skip the environment prompt, pass it directly: + +```bash +# Configure the dev environment +python modules/accelerators/quickstart/scripts/qs_dp_setup_wizard.py --env dev + +# Configure prod, skip post-write build verification +python modules/accelerators/quickstart/scripts/qs_dp_setup_wizard.py --env prod --skip-verify +``` + +### CLI flags + + +| Flag | Description | +| -------------------------- | ----------------------------------------------------------------- | +| `--env {dev,prod,staging}` | Target environment. If omitted, you are prompted. | +| `--skip-verify` | Skip the `cdf build` / `cdf deploy --dry-run` step after writing. | + + +### What the wizard asks + +1. **CDF project name** โ€” the project slug for `config..yaml` (e.g. `my-company-dev`). + Existing value is shown as a default. +2. **ApplicationOwner email(s)** โ€” one or more comma-separated email addresses for streamlit app in + `cdf_file_annotation` module. Validated with a regex before being accepted. +3. **Group source strategy** โ€” choose between one shared `GROUP_SOURCE_ID` for all + modules (simpler) or a dedicated ID per module (finer-grained access control). + Existing `.env` values are shown masked (`ab****cd`) with a keep/replace choice. +4. **OpenID client secret** โ€” `OPEN_ID_CLIENT_SECRET` in `.env`. Shown masked if + already set. + +Before anything is written, the wizard shows: + +- A **change table** listing every config field with its old and new value +(changed rows highlighted, unchanged rows dimmed). +- A **.env summary** listing which keys will be added or updated (values are +never printed). + +Type `n` at the final confirmation to abort without touching any file. + +--- + +## File layout + +``` +scripts/ +โ”œโ”€โ”€ qs_dp_setup_wizard.py # Entry point โ€” orchestrates all wizard steps +โ”œโ”€โ”€ pytest.ini # pytest configuration (testpaths, addopts) +โ”œโ”€โ”€ requirements.txt # Runtime and test dependencies +โ”œโ”€โ”€ README.md # This file +โ”œโ”€โ”€ wizard/ # Internal helper package (one concern per module) +โ”‚ โ”œโ”€โ”€ __init__.py # Package docstring listing all sub-modules +โ”‚ โ”œโ”€โ”€ _constants.py # All constants: versions, env-var names, YAML paths, +โ”‚ โ”‚ # SQL markers, regexes, dataclasses, module registries +โ”‚ โ”œโ”€โ”€ _messages.py # All user-facing strings: section titles, banners, +โ”‚ โ”‚ # prompt labels, static hints and status messages +โ”‚ โ”œโ”€โ”€ _file_io.py # Backups, line reads/writes, .env parsing +โ”‚ โ”œโ”€โ”€ _yaml.py # YAML path building and value mutation +โ”‚ โ”œโ”€โ”€ _prompts.py # Terminal prompts, email validation, change-table display +โ”‚ โ”œโ”€โ”€ _sql.py # SQL mode switch (COMMON โ†’ FILE_ANNOTATION) +โ”‚ โ”œโ”€โ”€ _preflight.py # Toolkit version check, cdf.toml validation, org_dir lookup +โ”‚ โ”œโ”€โ”€ _verification.py # Post-write cdf build / deploy verification +โ”‚ โ””โ”€โ”€ _style.py # ANSI terminal styling (colours off when not a TTY) +โ””โ”€โ”€ tests/ + โ”œโ”€โ”€ conftest.py # Shared pytest fixtures + โ”œโ”€โ”€ test_wizard.py # 61 unit / integration tests + โ””โ”€โ”€ fixtures/ + โ””โ”€โ”€ qs_dp/ # Minimal self-contained Toolkit project used by tests + โ”œโ”€โ”€ cdf.toml + โ””โ”€โ”€ config.dev.yaml +``` + +### Module responsibilities + + +| Module | Responsibility | +| ------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| `_constants.py` | Single source of truth for every named constant โ€” version thresholds, environment variable names, YAML key paths, SQL block markers, compiled regexes, dataclasses (`GroupTarget`, `ChangeRecord`), and the `GROUP_TARGETS` / `ENTITY_MATCHING_UPDATES` registries | +| `_messages.py` | All user-visible text: section/banner titles, prompt labels, static hints and warnings. Changing wording never requires touching logic files | +| `_file_io.py` | Low-level file operations: timestamped backups, line-based reads/writes, `.env` parsing | +| `_yaml.py` | Line-based YAML parser/mutator โ€” builds key-path โ†’ line-index maps and writes individual values without disturbing comments or indentation | +| `_prompts.py` | Interactive prompts (`prompt_text`, `prompt_yes_no`, `prompt_email`), email validation, secret masking, and the pre-write change-table / `.env` summary display | +| `_sql.py` | Switches `asset.Transformation.sql` between COMMON MODE and FILE_ANNOTATION MODE by commenting/uncommenting SQL blocks | +| `_preflight.py` | Pre-flight checks: Toolkit version enforcement, `cdf.toml` alpha-flag validation (auto-adds missing flags), `organization_dir` lookup, `.gitignore` safety warning | +| `_verification.py` | Post-write `cdf build` โ†’ `cdf deploy --dry-run` โ†’ optional live deploy sequence | +| `_style.py` | ANSI colour/style helpers; auto-disables when `stdout` is not a TTY, `NO_COLOR` is set, or `TERM=dumb` | + + +--- + +## Running the tests + +### 1. Install dependencies + +```bash +cd modules/accelerators/quickstart/scripts + +pip install pytest +``` + +> **Tip:** use a virtual environment to keep things isolated. +> +> ```bash +> python -m venv .venv && source .venv/bin/activate +> pip install pytest +> ``` + +### 2. Run all tests + +```bash +pytest +``` + +`pytest.ini` already sets `testpaths = tests` and `addopts = -v --tb=short`, +so pytest discovers and runs everything automatically. + +### 3. Common pytest invocations + +```bash +# Stop after the first failure +pytest -x + +# Run a single test class +pytest tests/test_wizard.py::TestValidateEmails -v + +# Run a single test +pytest tests/test_wizard.py::TestEnableFileAnnotationMode::test_idempotent_second_run -v + +# Show print output even for passing tests +pytest -s +``` + +### 4. What the tests cover + + +| Test class | What is verified | +| ------------------------------ | ----------------------------------------------------------------------------------------------------------- | +| `TestEnsureBackup` | Regular files get `.bak.` suffix; `.env` gets `qs_backup_.env` name; content and original preserved | +| `TestCdfEnvArgs` | Old toolkit uses `--env=`; new toolkit uses `-c `; unknown version defaults to `-c` | +| `TestValidateEmails` | Valid/invalid single and multiple addresses, empty input, missing `@` | +| `TestSetYamlValueByPath` | Found/not-found, true no-op detection, nested paths, inline comment preserved | +| `TestEnableFileAnnotationMode` | Switches COMMON โ†’ FILE_ANNOTATION, idempotent on second run, backup file created | +| `TestParseVersion` | Plain semver, prefixed string, extra text, unparseable | +| `TestCheckToolkitVersion` | Below minimum exits 1, not found exits 1, timeout warns, unparseable warns | +| `TestCheckCdfToml` | Both flags present โ†’ no change; missing file exits; missing flags auto-added to existing or new section | +| `TestMainEarlyExits` | Unsupported env, missing config file, missing SQL file | +| `TestMainCancelPath` | Full run cancelled at confirmation โ†’ 0 exit, no files written; `KeyboardInterrupt` handled | +| `TestRunPostWriteVerification` | Build success โ†’ dry-run offered; build failure โ†’ stderr printed + hints; dry-run failure โ†’ no live deploy | +| `TestStripYamlQuotes` | Double-quoted, single-quoted, mismatched, empty, single char | +| `TestGetOrgDir` | Missing file, key absent, double/single-quoted value, whitespace around `=` | + + +The test suite uses `tmp_path` for all file writes and mocks `subprocess.run` +for pre-flight and post-write checks โ€” **no real CDF credentials or network +access are required**. + +--- + +## CI/CD + +The GitHub Actions workflow at `.github/workflows/qs-dp-wizard.yml` runs on +every push or pull request that touches `modules/accelerators/quickstart/scripts/`. + + +| Job | Runs on | What it does | +| --------------- | ---------------------------------------- | ----------------------------------------------------------------------------- | +| `unit-tests` | `ubuntu-latest` / Python 3.9, 3.11, 3.12 | Full pytest suite | +| `toolkit-build` | `ubuntu-latest` / Python 3.12 | Installs Toolkit 0.7.210, runs `cdf build --env=dev` against the fixture repo | + + +Both jobs must pass before a PR can be merged. + +--- + +## Backups and recovery + +Every file the wizard modifies receives a timestamped backup **before** any +write takes place (all three backups are created upfront, then all writes +follow โ€” so a partial-write failure always leaves a recoverable state). + + +| File | Backup name | +| -------------------------- | ---------------------------------------------- | +| `config..yaml` | `config..yaml.bak.YYYYMMDD-HHMMSS` | +| `.env` | `qs_backup_YYYYMMDD-HHMMSS.env` | +| `asset.Transformation.sql` | `asset.Transformation.sql.bak.YYYYMMDD-HHMMSS` | + + +> `.env` uses a different naming scheme so the backup is not auto-discovered +> by tooling that scans for dotfiles. + +Backups accumulate across runs โ€” there is no automatic pruning. To restore: + +```bash +cp config.dev.yaml.bak.20260421-143012 config.dev.yaml +cp qs_backup_20260421-143012.env .env +``` + +--- + +## Troubleshooting + + +| Symptom | Likely cause | Fix | +| --------------------------------------------------- | ----------------------------------------------- | ------------------------------------------------------------------------------------- | +| `Error: 'cdf' command not found` | Toolkit not installed or not on `$PATH` | `pip install cognite-toolkit>=0.7.210` | +| `Error: Toolkit X.Y.Z is below the minimum` | Outdated Toolkit | `pip install --upgrade cognite-toolkit>=0.7.210` | +| `Error: config file not found: config.dev.yaml` | Wrong working directory or file not created yet | Run from the project root or the `scripts/` directory; create `config.dev.yaml` first | +| `Build FAILED` after writing | Auth or config issue | Run `cdf auth verify`; check the alpha flags; inspect the `.bak` file | +| `Warning: .env does not appear to be in .gitignore` | Risk of committing secrets | Add `.env` to `.gitignore` before committing | + + diff --git a/modules/accelerators/quickstart/scripts/pytest.ini b/modules/accelerators/quickstart/scripts/pytest.ini new file mode 100644 index 00000000..d47cc874 --- /dev/null +++ b/modules/accelerators/quickstart/scripts/pytest.ini @@ -0,0 +1,3 @@ +[pytest] +testpaths = tests +addopts = -v --tb=short diff --git a/modules/accelerators/quickstart/scripts/qs_dp_setup_wizard.py b/modules/accelerators/quickstart/scripts/qs_dp_setup_wizard.py new file mode 100644 index 00000000..b176b9d6 --- /dev/null +++ b/modules/accelerators/quickstart/scripts/qs_dp_setup_wizard.py @@ -0,0 +1,372 @@ +#!/usr/bin/env python3 +""" +Interactive setup wizard for Quickstart Deployment Pack post-install edits. + +What it does: +1) Pre-flight: verifies Toolkit version (โ‰ฅ 0.7.210) and cdf.toml alpha flags +2) Enables FILE_ANNOTATION mode in asset.Transformation.sql +3) Updates config..yaml: + - environment.project + - cdf_entity_matching defaults from the how-to guide + - cdf_file_annotation.ApplicationOwner + - group source env references (single or module-specific) +4) Ensures required secrets exist in .env: + - GROUP_SOURCE_ID (or per-module variants) + - OPEN_ID_CLIENT_SECRET +5) (optional) Runs cdf build + cdf deploy --dry-run after writing + +Run from the Toolkit project root: + python modules/accelerators/quickstart/scripts/qs_dp_setup_wizard.py [--env dev] [--skip-verify] +""" +from __future__ import annotations + +import argparse +from pathlib import Path + +from wizard import _style as style + +# Sub-module imports (wizard/ package โ€” each file owns one concern) +from wizard._constants import ( + APP_OWNER_YAML_PATH, + ENTITY_MATCHING_UPDATES, + ENV_PROJECT_YAML_PATH, + ENV_VAR_GROUP_SOURCE_ID, + ENV_VAR_OPEN_ID_CLIENT_SECRET, + GROUP_TARGETS, + VALID_ENVIRONMENTS, + ChangeRecord, +) +from wizard._file_io import ( + ensure_backup, + parse_env_file, + read_lines, + upsert_env_var, + write_lines, +) +from wizard._messages import ( + BANNER_PENDING, + ENV_SELECT_INTRO, + GROUP_SOURCE_INTRO, + GROUP_SOURCE_PER_MODULE_HINT, + HINT_APP_OWNER_FORMAT, + HINT_BACKUPS, + HINT_CURRENT_VALUE, + HINT_SQL_PENDING, + MSG_DONE, + PROMPT_APP_OWNER, + PROMPT_APPLY, + PROMPT_PROJECT, + PROMPT_SHARED_GROUP, + SEC_APP_OWNER, + SEC_CDF_PROJECT, + SEC_GROUP_SOURCE_IDS, + SEC_OPENID_SECRET, + WARN_ABORTED, +) +from wizard._preflight import ( + _check_gitignore, + _get_org_dir, + check_cdf_toml, + check_toolkit_version, +) +from wizard._prompts import ( + mask_secret, + prompt_email, + prompt_text, + prompt_yes_no, + show_changes_table, + show_env_summary, +) +from wizard._sql import enable_file_annotation_mode +from wizard._verification import run_post_write_verification +from wizard._yaml import ( + _strip_yaml_quotes, + build_yaml_paths, + get_yaml_current_value, + quote_yaml_string, + set_target_view_filter_values, + set_yaml_value_by_path, +) + +# Private helpers + +def _prompt_env_var( + var: str, + env_lines: list[str], + env_key_to_line: dict[str, int], + env_values: dict[str, str], +) -> None: + """Prompt to keep or replace an .env variable; updates *env_lines* and *env_values* in place. + + Shows the masked current value when the variable already exists and asks + keep/replace. Creates the variable from scratch when absent. + """ + existing = env_values.get(var, "").strip() + if existing: + print(f" Found {var} in .env (current: {style.CYAN}{mask_secret(existing)}{style.RESET})") + if not prompt_yes_no(f" Keep existing {var}?", default=True): + new_val = prompt_text(f" New {var}") + upsert_env_var(env_lines, env_key_to_line, var, new_val) + env_values[var] = new_val + else: + style.warning(f" {var} not found in .env โ€” it will be created.") + new_val = prompt_text(f" {var}") + upsert_env_var(env_lines, env_key_to_line, var, new_val) + env_values[var] = new_val + + +def select_env(repo_root: Path, cli_env: str | None) -> str: + if cli_env: + return cli_env + print(ENV_SELECT_INTRO) + return prompt_text("Target environment", default="dev").lower() + + +def find_repo_root(start_path: Path) -> Path: + """Walk up from *start_path* looking for a directory containing both + ``cdf.toml`` and ``modules/``. + + Falls back to a path derived from this script's known position in the + project tree (``/modules/accelerators/quickstart/scripts/``) when + no ``cdf.toml`` ancestor is found โ€” e.g. when developing inside the + library repo that has no ``cdf.toml`` at its root. + + NOTE: start_path is resolved from ``__file__``, so this always finds the + Toolkit project that contains this script regardless of the current working + directory. + """ + for candidate in [start_path, *start_path.parents]: + if (candidate / "cdf.toml").exists() and (candidate / "modules").exists(): + return candidate + # Fallback: the script lives at /modules/accelerators/quickstart/scripts/ + # so parents[4] == . + script_derived = Path(__file__).resolve().parents[4] + if (script_derived / "modules").exists(): + return script_derived + raise RuntimeError( + "Could not detect project root. Run this wizard from inside a Toolkit project " + "containing cdf.toml and modules/." + ) + + +# Main + +def main( + cli_env: str | None = None, + skip_verify: bool = False, + repo_root_override: Path | None = None, + sql_path_override: Path | None = None, +) -> int: + # --- Pre-flight --------------------------------------------------------- + toolkit_version = check_toolkit_version() + + repo_root = repo_root_override or find_repo_root(Path(__file__).resolve().parent) + check_cdf_toml(repo_root) + + env = select_env(repo_root, cli_env).strip().lower() + if env not in VALID_ENVIRONMENTS: + style.error(f" Error: unsupported environment '{env}'. Choose one of: {', '.join(sorted(VALID_ENVIRONMENTS))}.") + return 1 + + # Resolve config path โ€” check org_dir prefix first, fall back to repo root. + org_dir = _get_org_dir(repo_root) + config_filename = f"config.{env}.yaml" + if org_dir and (repo_root / org_dir / config_filename).exists(): + config_path = repo_root / org_dir / config_filename + config_arg = f"{org_dir}/{config_filename}" + else: + config_path = repo_root / config_filename + config_arg = config_filename + + env_path = repo_root / ".env" + + # Derive sql_path from this script's absolute location so it resolves + # correctly regardless of the current working directory. + # The script lives at /modules/accelerators/quickstart/scripts/, + # so parents[3] == /modules/ and the SQL sits under sourcesystem/. + sql_path = sql_path_override or ( + Path(__file__).resolve().parents[3] + / "sourcesystem/cdf_sap_assets/transformations/population/asset.Transformation.sql" + ) + + if not config_path.exists(): + style.error( + f" Error: config file not found: {config_path}\n" + " Tip: create it first (for staging, add config.staging.yaml) and rerun." + ) + return 1 + if not sql_path.exists(): + style.error(f" Error: SQL file not found: {sql_path}") + return 1 + + print(f"\n{style.BOLD}Using config{style.RESET} : {config_path.name}") + print(f"{style.BOLD}Environment {style.RESET} : {env}") + _check_gitignore(repo_root) + + # --- Load files --------------------------------------------------------- + config_lines = read_lines(config_path) + env_lines, env_values, env_key_to_line = parse_env_file(env_path) + original_env_values = dict(env_values) + initial_key_line_map = build_yaml_paths(config_lines) + + # --- CDF project name --------------------------------------------------- + style.section(SEC_CDF_PROJECT) + current_project = get_yaml_current_value(config_lines, ENV_PROJECT_YAML_PATH, initial_key_line_map) + if current_project: + current_project = _strip_yaml_quotes(current_project) + style.hint(HINT_CURRENT_VALUE.format(value=current_project)) + plain_project_name = prompt_text(PROMPT_PROJECT, default=current_project or None) + project_name = quote_yaml_string(plain_project_name) + + # --- ApplicationOwner email(s) ------------------------------------------ + style.section(SEC_APP_OWNER) + current_app_owner = get_yaml_current_value(config_lines, APP_OWNER_YAML_PATH, initial_key_line_map) + if current_app_owner: + current_app_owner = _strip_yaml_quotes(current_app_owner) + style.hint(HINT_CURRENT_VALUE.format(value=current_app_owner)) + style.hint(HINT_APP_OWNER_FORMAT) + app_owner = quote_yaml_string( + prompt_email(PROMPT_APP_OWNER, default=current_app_owner or None) + ) + + # --- Group source strategy ---------------------------------------------- + n_modules = len(GROUP_TARGETS) + module_names = ", ".join(t.module.split("/")[-1] for t in GROUP_TARGETS) + style.section(SEC_GROUP_SOURCE_IDS) + print(GROUP_SOURCE_INTRO.format(n=n_modules, module_names=module_names)) + print( + f" {style.BOLD}Option A{style.RESET} : one shared group for all โ€” simpler to manage. (recommended)\n" + f" {style.BOLD}Option B{style.RESET} : one group per module โ€” finer-grained access control.\n" + f" (when choosing B you will be prompted for each of the {n_modules} modules;\n" + " modules already set in .env will show their current value with keep/replace)\n" + ) + use_same_group_everywhere = prompt_yes_no(PROMPT_SHARED_GROUP, default=True) + + group_env_by_target: dict[tuple[str, ...], str] = {} + if use_same_group_everywhere: + _prompt_env_var(ENV_VAR_GROUP_SOURCE_ID, env_lines, env_key_to_line, env_values) + for target in GROUP_TARGETS: + group_env_by_target[target.path] = ENV_VAR_GROUP_SOURCE_ID + else: + style.hint(GROUP_SOURCE_PER_MODULE_HINT) + for target in GROUP_TARGETS: + var = target.default_env_var + print(f" {style.BOLD}Module {style.RESET} : {target.module}") + print(f" {style.BOLD}Param {style.RESET} : {target.label.split('.')[-1]}") + style.hint(f" Purpose : {target.description}") + print(f" {style.BOLD}Env var{style.RESET} : {var}") + _prompt_env_var(var, env_lines, env_key_to_line, env_values) + group_env_by_target[target.path] = var + print() + + # --- OPEN_ID_CLIENT_SECRET ---------------------------------------------- + style.section(SEC_OPENID_SECRET) + _prompt_env_var(ENV_VAR_OPEN_ID_CLIENT_SECRET, env_lines, env_key_to_line, env_values) + + # --- Apply changes to in-memory lines ----------------------------------- + key_line_map = build_yaml_paths(config_lines) + records: list[ChangeRecord] = [] + + def _apply(label: str, result: tuple[str, str] | None) -> bool: + if result is None: + style.warning(f" Warning: could not find {label} in config.") + return False + old_v, new_v = result + records.append(ChangeRecord(label, old_v, new_v)) + return True + + _apply( + "environment.project", + set_yaml_value_by_path(config_lines, ENV_PROJECT_YAML_PATH, project_name, key_line_map), + ) + for path, value in ENTITY_MATCHING_UPDATES: + _apply( + ".".join(path[-2:]), + set_yaml_value_by_path(config_lines, path, value, key_line_map), + ) + _apply( + "cdf_entity_matching.targetViewFilterValues", + set_target_view_filter_values(config_lines, "root:ast_VAL", key_line_map), + ) + _apply( + "cdf_file_annotation.ApplicationOwner", + set_yaml_value_by_path(config_lines, APP_OWNER_YAML_PATH, app_owner, key_line_map), + ) + for target in GROUP_TARGETS: + env_var = group_env_by_target[target.path] + _apply( + target.label, + set_yaml_value_by_path( + config_lines, target.path, f"${{{env_var}}}", key_line_map + ), + ) + + # --- Show summary and confirm before writing ---------------------------- + real_changes = sum(1 for r in records if r.changed) + + style.banner(BANNER_PENDING) + + print(f"\n {style.BOLD}[{config_path.name}]{style.RESET} ({real_changes} field(s) will actually change)") + show_changes_table(records) + + print(f"\n {style.BOLD}[.env]{style.RESET}") + show_env_summary(original_env_values, env_values) + + print(f"\n {style.BOLD}[{sql_path.name}]{style.RESET}") + style.hint(HINT_SQL_PENDING) + + print() + if not prompt_yes_no(PROMPT_APPLY, default=True): + style.warning(WARN_ABORTED) + return 0 + + # --- Write files (ACID: all backups created before any write) ----------- + if env_path.exists(): + ensure_backup(env_path) + ensure_backup(config_path) + ensure_backup(sql_path) + + if env_lines: + write_lines(env_path, env_lines) + write_lines(config_path, config_lines) + sql_changed = enable_file_annotation_mode(sql_path, skip_backup=True) + + # --- Summary ------------------------------------------------------------ + style.success(MSG_DONE) + print(f" {style.BOLD}Config {style.RESET} : {config_path} ({real_changes} field(s) changed)") + print(f" {style.BOLD}.env {style.RESET} : {env_path}") + print( + f" {style.BOLD}SQL {style.RESET} : FILE_ANNOTATION mode " + f"{'enabled' if sql_changed else 'was already enabled'} in {sql_path.name}" + ) + style.hint(HINT_BACKUPS) + + if not skip_verify: + run_post_write_verification(repo_root, env, config_arg, toolkit_version, plain_project_name, org_dir) + + return 0 + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description=( + "Interactive Quickstart DP setup wizard. " + "Configures exactly one environment (dev/prod/staging) per run." + ) + ) + parser.add_argument( + "--env", + choices=sorted(VALID_ENVIRONMENTS), + help="Target environment. If omitted, you will be prompted.", + ) + parser.add_argument( + "--skip-verify", + action="store_true", + help="Skip the post-write cdf build / deploy verification step.", + ) + _args = parser.parse_args() + try: + raise SystemExit(main(cli_env=_args.env, skip_verify=_args.skip_verify)) + except KeyboardInterrupt: + print("\nCancelled by user.") + raise SystemExit(130) diff --git a/modules/accelerators/quickstart/scripts/requirements.txt b/modules/accelerators/quickstart/scripts/requirements.txt new file mode 100644 index 00000000..07220bd1 --- /dev/null +++ b/modules/accelerators/quickstart/scripts/requirements.txt @@ -0,0 +1,2 @@ +# Development / test dependency โ€” not needed to run the wizard itself. +pytest>=7.0 diff --git a/modules/accelerators/quickstart/scripts/tests/__init__.py b/modules/accelerators/quickstart/scripts/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/modules/accelerators/quickstart/scripts/tests/conftest.py b/modules/accelerators/quickstart/scripts/tests/conftest.py new file mode 100644 index 00000000..22740e37 --- /dev/null +++ b/modules/accelerators/quickstart/scripts/tests/conftest.py @@ -0,0 +1,64 @@ +"""Shared pytest fixtures for the QS DP setup wizard tests.""" +from __future__ import annotations + +import shutil +from pathlib import Path + +import pytest + +# Import SQL marker constants so the generated fixture always stays in sync +# with the production code โ€” no static SQL file needed. +from wizard._constants import ( + SQL_COMMON_BLOCK_ANCHOR, + SQL_COMMON_MODE_MARKER, + SQL_FILE_ANNOTATION_BLOCK_ANCHOR, + SQL_FILE_ANNOTATION_MODE_MARKER, +) + +FIXTURE_ROOT = Path(__file__).parent / "fixtures" / "qs_dp" + +# Minimal SQL content that satisfies enable_file_annotation_mode(): +# - contains both mode markers +# - contains the COMMON block anchor (active, uncommented) +# - contains the FILE_ANNOTATION block anchor (inactive, commented out) +_SQL_CONTENT = ( + f"-- {SQL_COMMON_MODE_MARKER}\n" + f"{SQL_COMMON_BLOCK_ANCHOR}\n" + f" SELECT 1\n" + f");\n" + f"\n" + f"-- {SQL_FILE_ANNOTATION_MODE_MARKER}\n" + f"-- {SQL_FILE_ANNOTATION_BLOCK_ANCHOR}\n" + f"-- SELECT 1\n" + f"-- );\n" +) + + +@pytest.fixture() +def fixture_root() -> Path: + """Path to the canonical fixture repo (read-only โ€” tests that write use tmp_fixture_root).""" + return FIXTURE_ROOT + + +@pytest.fixture() +def tmp_fixture_root(tmp_path: Path) -> Path: + """ + A writable copy of the fixture repo in a pytest tmp_path. + Tests that call main() or mutate files should use this. + """ + dest = tmp_path / "qs_dp" + shutil.copytree(FIXTURE_ROOT, dest) + return dest + + +@pytest.fixture() +def fixture_config_lines() -> list[str]: + return (FIXTURE_ROOT / "config.dev.yaml").read_text(encoding="utf-8").splitlines(keepends=True) + + +@pytest.fixture() +def tmp_sql_path(tmp_path: Path) -> Path: + """Writable SQL file generated from the production marker constants.""" + path = tmp_path / "asset.Transformation.sql" + path.write_text(_SQL_CONTENT, encoding="utf-8") + return path diff --git a/modules/accelerators/quickstart/scripts/tests/fixtures/qs_dp/cdf.toml b/modules/accelerators/quickstart/scripts/tests/fixtures/qs_dp/cdf.toml new file mode 100644 index 00000000..03901949 --- /dev/null +++ b/modules/accelerators/quickstart/scripts/tests/fixtures/qs_dp/cdf.toml @@ -0,0 +1,12 @@ +[module] +version = "1" + +[alpha_flags] +deployment-pack = true + +[plugins] +data = true + +[environment.dev] +project = "" +type = "dev" diff --git a/modules/accelerators/quickstart/scripts/tests/fixtures/qs_dp/config.dev.yaml b/modules/accelerators/quickstart/scripts/tests/fixtures/qs_dp/config.dev.yaml new file mode 100644 index 00000000..01f88e75 --- /dev/null +++ b/modules/accelerators/quickstart/scripts/tests/fixtures/qs_dp/config.dev.yaml @@ -0,0 +1,38 @@ +environment: + project: placeholder-project + type: dev + +variables: + modules: + accelerators: + cdf_ingestion: + groupSourceId: ${GROUP_SOURCE_ID} + contextualization: + cdf_entity_matching: + entity_matching_processing_group_source_id: ${GROUP_SOURCE_ID} + targetViewSearchProperty: placeholder + AssetViewExternalId: placeholder + TimeSeriesViewExternalId: placeholder + targetViewExternalId: placeholder + entityViewExternalId: placeholder + targetViewFilterValues: + - placeholder + cdf_file_annotation: + groupSourceId: ${GROUP_SOURCE_ID} + ApplicationOwner: placeholder@example.com + open_industrial_data_sync: + groupSourceId: ${GROUP_SOURCE_ID} + dashboards: + rpt_quality: + groupSourceId: ${GROUP_SOURCE_ID} + sourcesystem: + cdf_pi: + groupSourceId: ${GROUP_SOURCE_ID} + schedule: 0 0 29 2 * + cdf_sap_assets: + groupSourceId: ${GROUP_SOURCE_ID} + schedule: 0 0 29 2 * + cdf_sap_events: + groupSourceId: ${GROUP_SOURCE_ID} + cdf_sharepoint: + groupSourceId: ${GROUP_SOURCE_ID} diff --git a/modules/accelerators/quickstart/scripts/tests/test_wizard.py b/modules/accelerators/quickstart/scripts/tests/test_wizard.py new file mode 100644 index 00000000..dd49c58d --- /dev/null +++ b/modules/accelerators/quickstart/scripts/tests/test_wizard.py @@ -0,0 +1,533 @@ +""" +pytest test suite for qs_dp_setup_wizard.py + +Coverage: +- Email validation (valid, multiple, invalid, empty, partial) +- Cron placeholder regex (unquoted, quoted, edited, non-matching) +- set_yaml_value_by_path: found/not-found, no-op detection +- enable_file_annotation_mode: basic switch + idempotency +- Toolkit version pre-flight (below minimum, not found, unparseable, above ceiling) +- cdf.toml alpha-flag check (missing file, missing flag, valid) +- Unsupported environment name โ†’ exit 1 +- Cancel at confirmation prompt โ†’ exit 0, no files written +- KeyboardInterrupt โ†’ exit 130 +- Post-write verification: mocked build success + failure paths +""" +from __future__ import annotations + +import sys +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pytest + +# Make the scripts/ directory importable without installing. +sys.path.insert(0, str(Path(__file__).parent.parent)) + +from qs_dp_setup_wizard import main +from wizard._constants import _CONFIG_FLAG_VERSION, MIN_TOOLKIT_VERSION +from wizard._file_io import ensure_backup +from wizard._preflight import ( + _ensure_toml_flag, + _get_org_dir, + _parse_version, + check_cdf_toml, + check_toolkit_version, +) +from wizard._prompts import validate_emails +from wizard._sql import enable_file_annotation_mode +from wizard._verification import _cdf_env_args, run_post_write_verification +from wizard._yaml import ( + _strip_yaml_quotes, + build_yaml_paths, + set_yaml_value_by_path, +) + +# ensure_backup naming + +class TestEnsureBackup: + def test_regular_file_gets_bak_suffix(self, tmp_path: Path) -> None: + f = tmp_path / "config.dev.yaml" + f.write_text("content") + bak = ensure_backup(f) + assert ".bak." in bak.name + assert bak.suffix != ".yaml" # has extra .bak. appended + + def test_dotfile_gets_qs_backup_name(self, tmp_path: Path) -> None: + f = tmp_path / ".env" + f.write_text("SECRET=abc") + bak = ensure_backup(f) + assert bak.name.startswith("qs_backup_") + assert bak.name.endswith(".env") + assert not bak.name.startswith(".") + + def test_dotfile_backup_content_matches(self, tmp_path: Path) -> None: + f = tmp_path / ".env" + f.write_text("SECRET=abc") + bak = ensure_backup(f) + assert bak.read_text() == "SECRET=abc" + + def test_original_file_unchanged(self, tmp_path: Path) -> None: + f = tmp_path / ".env" + f.write_text("SECRET=abc") + ensure_backup(f) + assert f.read_text() == "SECRET=abc" + + +# _cdf_env_args โ€” build/deploy flag selection by toolkit version + +class TestCdfEnvArgs: + def test_old_version_uses_env_flag(self) -> None: + # Any version below 0.8.0 should use --env= + old = (0, 7, 210) + args = _cdf_env_args("dev", "config.dev.yaml", old) + assert args == ["--env=dev"] + + def test_new_version_uses_c_flag(self) -> None: + new = _CONFIG_FLAG_VERSION # exactly 0.8.0 + args = _cdf_env_args("dev", "config.dev.yaml", new) + assert args == ["-c", "config.dev.yaml"] + + def test_above_new_version_uses_c_flag(self) -> None: + above = (0, 9, 0) + args = _cdf_env_args("prod", "myorg/config.prod.yaml", above) + assert args == ["-c", "myorg/config.prod.yaml"] + + def test_unknown_version_defaults_to_c_flag(self) -> None: + # None means version could not be determined โ€” default to newer form + args = _cdf_env_args("dev", "config.dev.yaml", None) + assert args == ["-c", "config.dev.yaml"] + + +# Email validation + +class TestValidateEmails: + def test_single_valid(self) -> None: + ok, _ = validate_emails("user@example.com") + assert ok + + def test_multiple_valid(self) -> None: + ok, _ = validate_emails("a@b.com, c@d.org, e@f.io") + assert ok + + def test_single_invalid(self) -> None: + ok, msg = validate_emails("not-an-email") + assert not ok + assert "not-an-email" in msg + + def test_mixed_valid_invalid(self) -> None: + ok, msg = validate_emails("good@email.com, bademail") + assert not ok + assert "bademail" in msg + + def test_empty_string(self) -> None: + ok, msg = validate_emails("") + assert not ok + assert "required" in msg + + def test_missing_at(self) -> None: + ok, msg = validate_emails("userexample.com") + assert not ok + + def test_missing_domain(self) -> None: + ok, msg = validate_emails("user@") + assert not ok + + def test_whitespace_only(self) -> None: + ok, msg = validate_emails(" ") + assert not ok + + +# Cron placeholder regex + +# YAML value mutation + +class TestSetYamlValueByPath: + def test_found_and_changed(self) -> None: + lines = ["project: old-value\n"] + km = build_yaml_paths(lines) + result = set_yaml_value_by_path(lines, ("project",), "new-value", km) + assert result is not None + old_v, new_v = result + assert old_v == "old-value" + assert new_v == "new-value" + assert "new-value" in lines[0] + + def test_noop_when_value_unchanged(self) -> None: + lines = ["project: same\n"] + km = build_yaml_paths(lines) + result = set_yaml_value_by_path(lines, ("project",), "same", km) + assert result is not None + old_v, new_v = result + assert old_v == new_v # no real change + + def test_not_found_returns_none(self) -> None: + lines = ["project: value\n"] + km = build_yaml_paths(lines) + result = set_yaml_value_by_path(lines, ("nonexistent",), "x", km) + assert result is None + + def test_nested_path(self) -> None: + lines = [ + "environment:\n", + " project: old\n", + ] + km = build_yaml_paths(lines) + result = set_yaml_value_by_path(lines, ("environment", "project"), "new", km) + assert result is not None + assert result[1] == "new" + + def test_preserves_inline_comment(self) -> None: + lines = ["project: old # keep me\n"] + km = build_yaml_paths(lines) + set_yaml_value_by_path(lines, ("project",), "new", km) + assert "# keep me" in lines[0] + + +# SQL mode switch + +class TestEnableFileAnnotationMode: + def test_switches_to_file_annotation(self, tmp_sql_path: Path) -> None: + changed = enable_file_annotation_mode(tmp_sql_path) + assert changed + content = tmp_sql_path.read_text() + # FILE_ANNOTATION block uncommented + assert "with root as (" in content + # COMMON MODE block commented + assert "-- with parentLookup as (" in content + + def test_idempotent_second_run(self, tmp_sql_path: Path) -> None: + enable_file_annotation_mode(tmp_sql_path) + content_after_first = tmp_sql_path.read_text() + changed_again = enable_file_annotation_mode(tmp_sql_path) + assert not changed_again + assert tmp_sql_path.read_text() == content_after_first + + def test_backup_created(self, tmp_sql_path: Path) -> None: + enable_file_annotation_mode(tmp_sql_path) + bak_files = list(tmp_sql_path.parent.glob("*.bak.*")) + assert len(bak_files) == 1 + + +# Toolkit version pre-flight + +class TestParseVersion: + def test_plain_semver(self) -> None: + assert _parse_version("0.7.34") == (0, 7, 34) + + def test_prefixed(self) -> None: + assert _parse_version("cdf/0.7.34") == (0, 7, 34) + + def test_with_extra_text(self) -> None: + assert _parse_version("Cognite Toolkit version 1.2.3 (build 42)") == (1, 2, 3) + + def test_unparseable(self) -> None: + assert _parse_version("no version here") is None + + +class TestCheckToolkitVersion: + def _mock_run(self, stdout: str, returncode: int = 0) -> MagicMock: + m = MagicMock() + m.stdout = stdout + m.stderr = "" + m.returncode = returncode + return m + + def test_exact_minimum_passes(self) -> None: + min_str = ".".join(str(x) for x in MIN_TOOLKIT_VERSION) + with patch("subprocess.run", return_value=self._mock_run(min_str)): + check_toolkit_version() # should not raise + + def test_above_minimum_passes(self) -> None: + major, minor, patch_v = MIN_TOOLKIT_VERSION + higher = f"{major}.{minor}.{patch_v + 1}" + with patch("subprocess.run", return_value=self._mock_run(higher)): + check_toolkit_version() + + def test_below_minimum_exits(self) -> None: + major, minor, patch_v = MIN_TOOLKIT_VERSION + lower = f"{major}.{minor}.{max(0, patch_v - 10)}" + with patch("subprocess.run", return_value=self._mock_run(lower)): + with pytest.raises(SystemExit) as exc: + check_toolkit_version() + assert exc.value.code == 1 + + def test_not_found_exits(self) -> None: + with patch("subprocess.run", side_effect=FileNotFoundError): + with pytest.raises(SystemExit) as exc: + check_toolkit_version() + assert exc.value.code == 1 + + def test_timeout_warns_but_continues(self, capsys: pytest.CaptureFixture[str]) -> None: + import subprocess as sp + with patch("subprocess.run", side_effect=sp.TimeoutExpired(cmd="cdf", timeout=10)): + check_toolkit_version() # must not raise + assert "timed out" in capsys.readouterr().out + + def test_unparseable_warns_but_continues(self, capsys: pytest.CaptureFixture[str]) -> None: + with patch("subprocess.run", return_value=self._mock_run("no version info")): + check_toolkit_version() + assert "Warning" in capsys.readouterr().out + + + +# _ensure_toml_flag unit tests + +class TestEnsureTomlFlag: + def _lines(self, text: str) -> list[str]: + return text.splitlines(keepends=True) + + def test_already_true_returns_none(self) -> None: + lines = self._lines("[alpha_flags]\ndeployment-pack = true\n") + assert _ensure_toml_flag(lines, "[alpha_flags]", "deployment-pack") is None + + def test_wrong_value_updated_in_place(self) -> None: + lines = self._lines("[alpha_flags]\ndeployment-pack = false\n") + result = _ensure_toml_flag(lines, "[alpha_flags]", "deployment-pack") + assert result is not None and "updated" in result + assert any("deployment-pack = true" in ln for ln in lines) + assert not any("deployment-pack = false" in ln for ln in lines) + + def test_missing_flag_inserted_after_section_header(self) -> None: + lines = self._lines("[alpha_flags]\n") + result = _ensure_toml_flag(lines, "[alpha_flags]", "deployment-pack") + assert result is not None and "added" in result + assert any("deployment-pack = true" in ln for ln in lines) + + def test_missing_section_appended_at_end(self) -> None: + lines = self._lines("[module]\nversion = '1'\n") + result = _ensure_toml_flag(lines, "[plugins]", "data") + assert result is not None and "added" in result + content = "".join(lines) + assert "[plugins]" in content + assert "data = true" in content + + +# cdf.toml check + +class TestCheckCdfToml: + def test_both_flags_correct_no_change(self, tmp_path: Path) -> None: + original = "[alpha_flags]\ndeployment-pack = true\n\n[plugins]\ndata = true\n" + (tmp_path / "cdf.toml").write_text(original, encoding="utf-8") + check_cdf_toml(tmp_path) + assert (tmp_path / "cdf.toml").read_text() == original # file untouched + + def test_missing_file_exits(self, tmp_path: Path) -> None: + with pytest.raises(SystemExit) as exc: + check_cdf_toml(tmp_path) + assert exc.value.code == 1 + + def test_no_sections_adds_both(self, tmp_path: Path) -> None: + (tmp_path / "cdf.toml").write_text("[module]\nversion = '1'\n", encoding="utf-8") + check_cdf_toml(tmp_path) + content = (tmp_path / "cdf.toml").read_text() + assert "[alpha_flags]" in content + assert "deployment-pack = true" in content + assert "[plugins]" in content + assert "data = true" in content + + def test_existing_sections_flags_appended(self, tmp_path: Path) -> None: + (tmp_path / "cdf.toml").write_text("[alpha_flags]\n\n[plugins]\n", encoding="utf-8") + check_cdf_toml(tmp_path) + content = (tmp_path / "cdf.toml").read_text() + assert "deployment-pack = true" in content + assert "data = true" in content + + def test_deployment_pack_false_updated(self, tmp_path: Path) -> None: + (tmp_path / "cdf.toml").write_text( + "[alpha_flags]\ndeployment-pack = false\n\n[plugins]\ndata = true\n", + encoding="utf-8", + ) + check_cdf_toml(tmp_path) + content = (tmp_path / "cdf.toml").read_text() + assert "deployment-pack = true" in content + assert "deployment-pack = false" not in content + assert content.count("deployment-pack") == 1 + + def test_data_false_updated(self, tmp_path: Path) -> None: + (tmp_path / "cdf.toml").write_text( + "[alpha_flags]\ndeployment-pack = true\n\n[plugins]\ndata = false\n", + encoding="utf-8", + ) + check_cdf_toml(tmp_path) + content = (tmp_path / "cdf.toml").read_text() + assert "data = true" in content + assert "data = false" not in content + assert content.count("data =") == 1 + + def test_deployment_pack_in_wrong_section_still_added_to_alpha_flags(self, tmp_path: Path) -> None: + (tmp_path / "cdf.toml").write_text( + "[other]\ndeployment-pack = true\n\n[plugins]\ndata = true\n", encoding="utf-8" + ) + check_cdf_toml(tmp_path) + content = (tmp_path / "cdf.toml").read_text() + assert "[alpha_flags]" in content + assert content.count("deployment-pack = true") >= 1 + + +# main() integration paths + +def _patch_preflight() -> "pytest.FixtureRequest": + """Helper: patch check_toolkit_version so tests don't need a real cdf binary.""" + return patch("qs_dp_setup_wizard.check_toolkit_version") + + +class TestMainEarlyExits: + def test_unsupported_env(self, tmp_fixture_root: Path) -> None: + with _patch_preflight(): + result = main(cli_env="staging_extra", repo_root_override=tmp_fixture_root) + assert result == 1 + + def test_missing_config_file(self, tmp_fixture_root: Path) -> None: + (tmp_fixture_root / "config.dev.yaml").unlink() + with _patch_preflight(): + result = main(cli_env="dev", repo_root_override=tmp_fixture_root) + assert result == 1 + + def test_missing_sql_file(self, tmp_fixture_root: Path) -> None: + with _patch_preflight(): + result = main( + cli_env="dev", + repo_root_override=tmp_fixture_root, + sql_path_override=tmp_fixture_root / "nonexistent.sql", + ) + assert result == 1 + + +class TestMainCancelPath: + """User answers 'n' at the final confirmation โ€” no files should be written.""" + + def test_cancel_writes_nothing( + self, tmp_fixture_root: Path, monkeypatch: pytest.MonkeyPatch + ) -> None: + config_before = (tmp_fixture_root / "config.dev.yaml").read_text() + + responses = iter([ + "my-cdf-project", # CDF project name + "ops@acme.com", # ApplicationOwner + "", # shared group (Y = default) + "grp-abc123", # GROUP_SOURCE_ID + "", # OPEN_ID_CLIENT_SECRET first attempt (empty loops) + "secret-xyz", # OPEN_ID_CLIENT_SECRET value + "n", # DON'T apply changes โ† cancels + ]) + monkeypatch.setattr("builtins.input", lambda _prompt: next(responses)) + + with _patch_preflight(): + result = main( + cli_env="dev", skip_verify=True, repo_root_override=tmp_fixture_root + ) + + assert result == 0 + # Config must be untouched + assert (tmp_fixture_root / "config.dev.yaml").read_text() == config_before + # No backup files + assert not list(tmp_fixture_root.glob("*.bak.*")) + + def test_keyboard_interrupt_raises_system_exit_130( + self, tmp_fixture_root: Path, monkeypatch: pytest.MonkeyPatch + ) -> None: + monkeypatch.setattr("builtins.input", lambda _: (_ for _ in ()).throw(KeyboardInterrupt())) + with _patch_preflight(): + with pytest.raises((KeyboardInterrupt, SystemExit)): + main(cli_env="dev", skip_verify=True, repo_root_override=tmp_fixture_root) + + +# Post-write verification + +class TestRunPostWriteVerification: + def _make_run(self, returncode: int, stdout: str = "", stderr: str = "") -> MagicMock: + m = MagicMock() + m.returncode = returncode + m.stdout = stdout + m.stderr = stderr + return m + + def test_build_success_offers_deploy( + self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch + ) -> None: + monkeypatch.setattr("builtins.input", lambda _: "n") # decline live deploy + with patch("subprocess.run") as mock_run: + mock_run.side_effect = [ + self._make_run(0, stdout="Build OK"), # cdf build + self._make_run(0, stdout="Dry-run OK"), # cdf deploy --dry-run + ] + run_post_write_verification(tmp_path, "dev", "config.dev.yaml") + assert mock_run.call_count == 2 + + def test_build_failure_prints_stderr( + self, tmp_path: Path, capsys: pytest.CaptureFixture[str] + ) -> None: + with patch("subprocess.run", return_value=self._make_run(1, stderr="boom")): + run_post_write_verification(tmp_path, "dev", "config.dev.yaml") + out = capsys.readouterr().out + assert "FAILED" in out + assert "cdf auth verify" in out + + def test_dry_run_failure_no_live_deploy( + self, tmp_path: Path, capsys: pytest.CaptureFixture[str] + ) -> None: + with patch("subprocess.run") as mock_run: + mock_run.side_effect = [ + self._make_run(0), # build OK + self._make_run(1), # dry-run fails + ] + run_post_write_verification(tmp_path, "dev", "config.dev.yaml") + out = capsys.readouterr().out + assert "failed" in out.lower() + assert mock_run.call_count == 2 + + +# _strip_yaml_quotes + +class TestStripYamlQuotes: + def test_no_quotes_unchanged(self) -> None: + assert _strip_yaml_quotes("my-project") == "my-project" + + def test_double_quoted(self) -> None: + assert _strip_yaml_quotes('"my-project"') == "my-project" + + def test_single_quoted(self) -> None: + assert _strip_yaml_quotes("'my-project'") == "my-project" + + def test_mismatched_quotes_unchanged(self) -> None: + assert _strip_yaml_quotes('"my-project\'') == '"my-project\'' + + def test_empty_string_unchanged(self) -> None: + assert _strip_yaml_quotes("") == "" + + def test_single_char_unchanged(self) -> None: + assert _strip_yaml_quotes('"') == '"' + + def test_only_quotes_stripped(self) -> None: + # value that itself contains a quote character inside + assert _strip_yaml_quotes('"hello world"') == "hello world" + + +# _get_org_dir + +class TestGetOrgDir: + def test_returns_none_when_no_cdf_toml(self, tmp_path: Path) -> None: + assert _get_org_dir(tmp_path) is None + + def test_returns_none_when_key_absent(self, tmp_path: Path) -> None: + (tmp_path / "cdf.toml").write_text("[module]\nversion = '1'\n", encoding="utf-8") + assert _get_org_dir(tmp_path) is None + + def test_double_quoted_value(self, tmp_path: Path) -> None: + (tmp_path / "cdf.toml").write_text( + '[module]\norganization_dir = "my_org"\n', encoding="utf-8" + ) + assert _get_org_dir(tmp_path) == "my_org" + + def test_single_quoted_value(self, tmp_path: Path) -> None: + (tmp_path / "cdf.toml").write_text( + "[module]\norganization_dir = 'my_org'\n", encoding="utf-8" + ) + assert _get_org_dir(tmp_path) == "my_org" + + def test_whitespace_around_equals(self, tmp_path: Path) -> None: + (tmp_path / "cdf.toml").write_text( + '[module]\norganization_dir = "spaced"\n', encoding="utf-8" + ) + assert _get_org_dir(tmp_path) == "spaced" diff --git a/modules/accelerators/quickstart/scripts/wizard/__init__.py b/modules/accelerators/quickstart/scripts/wizard/__init__.py new file mode 100644 index 00000000..e3115f80 --- /dev/null +++ b/modules/accelerators/quickstart/scripts/wizard/__init__.py @@ -0,0 +1,15 @@ +""" +Internal helper package for the Quickstart DP setup wizard. + +Modules +------- +_constants Constants, regexes, dataclasses, YAML paths, and module registries. +_messages User-facing prompt labels and static message strings. +_file_io File I/O: backups, line reads/writes, .env parsing. +_yaml YAML path building, value mutation. +_prompts Terminal prompts, email validation, change-table display. +_sql SQL mode switch (COMMON โ†’ FILE_ANNOTATION). +_preflight Toolkit version check, cdf.toml validation, org_dir lookup. +_verification Post-write cdf build / deploy verification. +_style ANSI terminal styling (colours, section headers, banners). +""" diff --git a/modules/accelerators/quickstart/scripts/wizard/_constants.py b/modules/accelerators/quickstart/scripts/wizard/_constants.py new file mode 100644 index 00000000..5f07bd36 --- /dev/null +++ b/modules/accelerators/quickstart/scripts/wizard/_constants.py @@ -0,0 +1,190 @@ +""" +Constants, compiled regexes, shared dataclasses, and static module registries +for the Quickstart DP setup wizard. +""" +from __future__ import annotations + +import re +from collections.abc import Sequence +from dataclasses import dataclass + +# Version constants + +MIN_TOOLKIT_VERSION: tuple[int, int, int] = (0, 7, 210) +# Toolkit 0.8.0 changed the build/deploy flag from --env= to -c . +_CONFIG_FLAG_VERSION: tuple[int, int, int] = (0, 8, 0) + +# Environment variable names + +ENV_VAR_GROUP_SOURCE_ID = "GROUP_SOURCE_ID" +ENV_VAR_OPEN_ID_CLIENT_SECRET = "OPEN_ID_CLIENT_SECRET" + +# Valid wizard target environments + +VALID_ENVIRONMENTS: frozenset[str] = frozenset({"dev", "prod", "staging"}) + +# YAML paths for fields the wizard writes + +ENV_PROJECT_YAML_PATH: tuple[str, ...] = ("environment", "project") + +APP_OWNER_YAML_PATH: tuple[str, ...] = ( + "variables", "modules", "accelerators", "contextualization", + "cdf_file_annotation", "ApplicationOwner", +) + +# SQL mode markers and block anchors (asset.Transformation.sql) + +SQL_COMMON_MODE_MARKER = "[COMMON MODE]" +SQL_FILE_ANNOTATION_MODE_MARKER = "[FILE_ANNOTATION MODE]" +SQL_COMMON_BLOCK_ANCHOR = "with parentLookup as (" +SQL_FILE_ANNOTATION_BLOCK_ANCHOR = "with root as (" + +# Synthetic data upload directories + +DATA_UPLOAD_DIRS: tuple[str, ...] = ( + "modules/sourcesystem/cdf_pi/upload_data", + "modules/sourcesystem/cdf_sap_assets/upload_data", + "modules/sourcesystem/cdf_sap_events/upload_data", + "modules/sourcesystem/cdf_sharepoint/upload_data", + "modules/accelerators/contextualization/cdf_entity_matching/upload_data", + "modules/accelerators/contextualization/cdf_file_annotation/upload_data", +) + +# Compiled regexes + +_EMAIL_RE = re.compile(r"^[a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,}$") +_YAML_LINE_RE = re.compile(r"^(\s*[A-Za-z0-9_]+:\s*)([^#\n]*)(\s*(?:#.*)?)$") + +# Data types + +@dataclass(frozen=True) +class GroupTarget: + label: str + path: tuple[str, ...] + default_env_var: str + module: str # human-readable module path (for per-module prompts) + description: str # what this group controls (for per-module prompts) + + +@dataclass +class ChangeRecord: + label: str + old_val: str # "" when the field was empty before + new_val: str + + @property + def changed(self) -> bool: + return self.old_val != self.new_val + + +# Module registry + +GROUP_TARGETS: Sequence[GroupTarget] = ( + GroupTarget( + label="cdf_ingestion.groupSourceId", + path=("variables", "modules", "accelerators", "cdf_ingestion", "groupSourceId"), + default_env_var="GROUP_SOURCE_ID_INGESTION", + module="accelerators/cdf_ingestion", + description="Data ingestion pipeline โ€” controls write access for ingestion jobs", + ), + GroupTarget( + label="cdf_entity_matching.entity_matching_processing_group_source_id", + path=( + "variables", "modules", "accelerators", "contextualization", + "cdf_entity_matching", "entity_matching_processing_group_source_id", + ), + default_env_var="GROUP_SOURCE_ID_ENTITY_MATCHING", + module="accelerators/contextualization/cdf_entity_matching", + description="Entity matching โ€” controls access for the matching processing service", + ), + GroupTarget( + label="cdf_file_annotation.groupSourceId", + path=( + "variables", "modules", "accelerators", "contextualization", + "cdf_file_annotation", "groupSourceId", + ), + default_env_var="GROUP_SOURCE_ID_FILE_ANNOTATION", + module="accelerators/contextualization/cdf_file_annotation", + description="File annotation โ€” controls access for the annotation processing service", + ), + GroupTarget( + label="open_industrial_data_sync.groupSourceId", + path=("variables", "modules", "accelerators", "open_industrial_data_sync", "groupSourceId"), + default_env_var="GROUP_SOURCE_ID_OID_SYNC", + module="accelerators/open_industrial_data_sync", + description="Open Industrial Data sync โ€” controls access for OID data synchronisation", + ), + GroupTarget( + label="rpt_quality.groupSourceId", + path=("variables", "modules", "dashboards", "rpt_quality", "groupSourceId"), + default_env_var="GROUP_SOURCE_ID_QUALITY", + module="dashboards/rpt_quality", + description="Quality reporting dashboard โ€” controls read access for quality metrics", + ), + GroupTarget( + label="cdf_pi.groupSourceId", + path=("variables", "modules", "sourcesystem", "cdf_pi", "groupSourceId"), + default_env_var="GROUP_SOURCE_ID_PI", + module="sourcesystem/cdf_pi", + description="PI system connector โ€” controls access for the OSIsoft PI data source", + ), + GroupTarget( + label="cdf_sap_assets.groupSourceId", + path=("variables", "modules", "sourcesystem", "cdf_sap_assets", "groupSourceId"), + default_env_var="GROUP_SOURCE_ID_SAP_ASSETS", + module="sourcesystem/cdf_sap_assets", + description="SAP assets connector โ€” controls access for SAP asset hierarchy ingestion", + ), + GroupTarget( + label="cdf_sap_events.groupSourceId", + path=("variables", "modules", "sourcesystem", "cdf_sap_events", "groupSourceId"), + default_env_var="GROUP_SOURCE_ID_SAP_EVENTS", + module="sourcesystem/cdf_sap_events", + description="SAP events connector โ€” controls access for SAP maintenance event ingestion", + ), + GroupTarget( + label="cdf_sharepoint.groupSourceId", + path=("variables", "modules", "sourcesystem", "cdf_sharepoint", "groupSourceId"), + default_env_var="GROUP_SOURCE_ID_SHAREPOINT", + module="sourcesystem/cdf_sharepoint", + description="SharePoint connector โ€” controls access for SharePoint document ingestion", + ), +) + +ENTITY_MATCHING_UPDATES: Sequence[tuple[tuple[str, ...], str]] = ( + ( + ( + "variables", "modules", "accelerators", "contextualization", + "cdf_entity_matching", "targetViewSearchProperty", + ), + "aliases", + ), + ( + ( + "variables", "modules", "accelerators", "contextualization", + "cdf_entity_matching", "AssetViewExternalId", + ), + "Asset", + ), + ( + ( + "variables", "modules", "accelerators", "contextualization", + "cdf_entity_matching", "TimeSeriesViewExternalId", + ), + "Enterprise_TimeSeries", + ), + ( + ( + "variables", "modules", "accelerators", "contextualization", + "cdf_entity_matching", "targetViewExternalId", + ), + "Asset", + ), + ( + ( + "variables", "modules", "accelerators", "contextualization", + "cdf_entity_matching", "entityViewExternalId", + ), + "Enterprise_TimeSeries", + ), +) diff --git a/modules/accelerators/quickstart/scripts/wizard/_file_io.py b/modules/accelerators/quickstart/scripts/wizard/_file_io.py new file mode 100644 index 00000000..4987a04c --- /dev/null +++ b/modules/accelerators/quickstart/scripts/wizard/_file_io.py @@ -0,0 +1,81 @@ +""" +File I/O helpers for the Quickstart DP setup wizard. + +Covers timestamped backups, line-based file reads/writes, and .env file +parsing / mutation. +""" +from __future__ import annotations + +import datetime +import shutil +from collections.abc import Sequence +from pathlib import Path + + +def ensure_backup(path: Path) -> Path: + """Create a timestamped backup of *path* on every call. Returns the backup path. + + For dotfiles such as `.env` (which have no conventional extension and may be + auto-discovered by AI tooling if they keep the leading dot), the backup is + written as ``qs_backup_.env`` in the same directory so that it is + not mistaken for an active secrets file. + + All other files receive the standard ``..bak.`` suffix. + """ + ts = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") + if path.name.startswith(".") and path.suffix == "": + # Dotfile with no extension, e.g. ".env" โ†’ qs_backup_.env + stem = path.name.lstrip(".") # "env" + backup_path = path.parent / f"qs_backup_{ts}.{stem}" + else: + backup_path = path.with_suffix(path.suffix + f".bak.{ts}") + shutil.copy2(path, backup_path) + return backup_path + + +def read_lines(path: Path) -> list[str]: + return path.read_text(encoding="utf-8").splitlines(keepends=True) + + +def write_lines(path: Path, lines: Sequence[str]) -> None: + path.write_text("".join(lines), encoding="utf-8") + + +def parse_env_file(path: Path) -> tuple[list[str], dict[str, str], dict[str, int]]: + """Parse a .env file into (lines, values_dict, key_to_line_index). + + Returns empty structures when the file does not exist. + """ + if not path.exists(): + return [], {}, {} + lines = read_lines(path) + values: dict[str, str] = {} + key_to_line: dict[str, int] = {} + for idx, line in enumerate(lines): + stripped = line.strip() + if not stripped or stripped.startswith("#") or "=" not in line: + continue + key, value = line.split("=", 1) + key = key.strip() + if not key: + continue + values[key] = value.rstrip("\n") + key_to_line[key] = idx + return lines, values, key_to_line + + +def upsert_env_var( + lines: list[str], + key_to_line: dict[str, int], + key: str, + value: str, +) -> None: + """Update an existing key or append a new ``KEY=value`` line.""" + new_line = f"{key}={value}\n" + if key in key_to_line: + lines[key_to_line[key]] = new_line + else: + if lines and not lines[-1].endswith("\n"): + lines[-1] = lines[-1] + "\n" + lines.append(new_line) + key_to_line[key] = len(lines) - 1 diff --git a/modules/accelerators/quickstart/scripts/wizard/_messages.py b/modules/accelerators/quickstart/scripts/wizard/_messages.py new file mode 100644 index 00000000..ef974490 --- /dev/null +++ b/modules/accelerators/quickstart/scripts/wizard/_messages.py @@ -0,0 +1,76 @@ +""" +User-facing prompt labels and static message strings for the Quickstart DP setup wizard. + +Keeping all terminal text here makes it easy to review the wizard's "script" in one +place and update wording without touching logic code. +""" +from __future__ import annotations + +# Section / banner titles + +SEC_CDF_PROJECT = "CDF Project" +SEC_APP_OWNER = "File Annotation: Streamlit Application Owner" +SEC_GROUP_SOURCE_IDS = "Group Source IDs" +SEC_OPENID_SECRET = "OpenID Client Secret" +SEC_POST_VERIFY = "Post-write verification" +BANNER_PENDING = "PENDING CHANGES โ€” review before applying" + +# Prompt labels + +PROMPT_ENV = "Target environment" +PROMPT_PROJECT = "CDF project name" +PROMPT_APP_OWNER = "Streamlit Application Owner email(s)" +PROMPT_SHARED_GROUP = "Use the same GROUP_SOURCE_ID for all modules?" +PROMPT_APPLY = "Apply all changes above?" + +# Environment selection + +ENV_SELECT_INTRO = ( + "\nThis wizard configures exactly one environment at a time.\n" + "Valid choices: dev, prod, staging\n" + " (default: dev โ€” press Enter to accept)" +) + +# Group Source IDs section + +# Use str.format(n=..., module_names=...) at call site. +GROUP_SOURCE_INTRO = ( + " {n} modules in Quickstart Deployment Pack need a groupSourceId โ€” the external ID\n" + " of the IdP group (e.g. Azure AD) that controls CDF access for that module.\n" + " Modules: {module_names}\n" +) + +GROUP_SOURCE_PER_MODULE_HINT = ( + "\n Prompting for each module (already-set values shown with keep/replace option).\n" +) + +# Hints shown in main() + +HINT_APP_OWNER_FORMAT = " Accepts one or more comma-separated email addresses." +HINT_CURRENT_VALUE = " Current value : {value}" +HINT_SQL_PENDING = " FILE_ANNOTATION mode will be enabled (COMMON MODE block commented out)." +HINT_BACKUPS = " Backups : timestamped .bak files created for each modified file." + +# Status / warning messages shown in main() + +WARN_ABORTED = "Aborted โ€” no files were modified." +MSG_DONE = "\nDone." + +# Post-write verification step labels + +VERIFY_BUILD_START = " [1/4] Verifying build ..." +VERIFY_BUILD_OK = " [1/4] Build succeeded." +VERIFY_BUILD_FAIL = " [1/4] Build FAILED." +VERIFY_DRY_START = "\n [2/4] Running dry-run deploy ..." +VERIFY_DRY_OK = " [2/4] Dry-run deploy succeeded." +VERIFY_DRY_FAIL = " [2/4] Dry-run deploy failed โ€” not proceeding to live deploy." +VERIFY_LIVE_OK = " [3/4] Live deploy complete." +VERIFY_LIVE_SKIP = " [3/4] Live deploy skipped." +VERIFY_DATA_INTRO = ( + "\n This Deployment Pack is bundled with synthetic data to test the file annotation pipeline.\n" + " Uploading it will populate your CDF project with sample assets, events, and documents." +) +VERIFY_DATA_UPLOAD = "\n [4/4] Uploading synthetic test data ..." +VERIFY_DATA_OK = " [4/4] Data upload complete." +VERIFY_DATA_SKIP = " [4/4] Data upload skipped." +VERIFY_DATA_FAIL = " [4/4] Data upload failed for one or more directories (see output above)." diff --git a/modules/accelerators/quickstart/scripts/wizard/_preflight.py b/modules/accelerators/quickstart/scripts/wizard/_preflight.py new file mode 100644 index 00000000..199cf29e --- /dev/null +++ b/modules/accelerators/quickstart/scripts/wizard/_preflight.py @@ -0,0 +1,182 @@ +""" +Pre-flight checks for the Quickstart DP setup wizard. + +Verifies that: +- The ``cdf`` CLI is installed and meets the minimum version requirement. +- ``cdf.toml`` exists and contains the required ``[alpha_flags]``. +- The organisation directory (if any) can be resolved from ``cdf.toml``. +- ``.env`` is present in ``.gitignore`` to avoid accidental secret commits. +""" +from __future__ import annotations + +import re +import subprocess +import sys +from pathlib import Path + +from ._constants import MIN_TOOLKIT_VERSION +from . import _style as style + + +# Version helpers + +def _parse_version(version_str: str) -> tuple[int, int, int] | None: + """Extract the first semver triplet from a string, e.g. ``'cdf 0.7.34'`` โ†’ ``(0, 7, 34)``.""" + m = re.search(r"(\d+)\.(\d+)\.(\d+)", version_str) + if not m: + return None + return int(m.group(1)), int(m.group(2)), int(m.group(3)) + + +def _version_str(v: tuple[int, int, int]) -> str: + return ".".join(str(x) for x in v) + + +# Toolkit version check + +def check_toolkit_version() -> tuple[int, int, int] | None: + """Shell out to ``cdf --version`` and enforce the minimum supported version. + + Exits with code 1 if the installed version is below the minimum. + Returns the parsed version tuple, or ``None`` if it could not be determined + (timeout or unparseable output โ€” execution continues with a warning). + """ + try: + result = subprocess.run( + ["cdf", "--version"], capture_output=True, text=True, timeout=10 + ) + raw = (result.stdout + result.stderr).strip() + except FileNotFoundError: + style.error( + "Error: 'cdf' command not found.\n" + f" Install Cognite Toolkit โ‰ฅ {_version_str(MIN_TOOLKIT_VERSION)} and retry.\n" + " See: https://developer.cognite.com/sdks/toolkit/" + ) + sys.exit(1) + except subprocess.TimeoutExpired: + style.warning("Warning: 'cdf --version' timed out โ€” skipping version check.") + return None + + parsed = _parse_version(raw) + if parsed is None: + style.warning( + f"Warning: could not parse Toolkit version from: {raw!r} โ€” skipping version check." + ) + return None + + min_s = _version_str(MIN_TOOLKIT_VERSION) + cur_s = _version_str(parsed) + + if parsed < MIN_TOOLKIT_VERSION: + style.error( + f"Error: Toolkit {cur_s} is below the minimum required version {min_s}.\n" + f" Upgrade with: pip install --upgrade cognite-toolkit>={min_s}\n" + " See: https://developer.cognite.com/sdks/toolkit/" + ) + sys.exit(1) + + style.success(f"Toolkit version {cur_s} โ€” OK (minimum: {min_s}).") + return parsed + + +# cdf.toml check + +def _ensure_toml_flag(lines: list[str], section_header: str, flag: str) -> str | None: + """Ensure ``flag = true`` exists in *section_header* of a TOML file (in-place on *lines*). + + - Already ``= true`` โ†’ no-op, returns ``None``. + - Exists with a different value โ†’ updated in place, returns change description. + - Missing from section โ†’ inserted after section header, returns change description. + - Section itself missing โ†’ section + flag appended at end of file, returns change description. + """ + section_idx: int | None = None + flag_idx: int | None = None + in_section = False + + for idx, line in enumerate(lines): + stripped = line.strip() + if stripped == section_header: + in_section = True + section_idx = idx + elif stripped.startswith("[") and stripped != section_header: + in_section = False + if in_section and re.match(rf"{re.escape(flag)}\s*=", stripped): + flag_idx = idx + break + + if flag_idx is not None: + if re.match(rf"{re.escape(flag)}\s*=\s*true", lines[flag_idx].strip()): + return None # already correct + lines[flag_idx] = f"{flag} = true\n" + return f"updated {flag} = true (in {section_header})" + + if section_idx is not None: + lines.insert(section_idx + 1, f"{flag} = true\n") + else: + if lines and not lines[-1].endswith("\n"): + lines[-1] += "\n" + lines.append(f"\n{section_header}\n{flag} = true\n") + return f"added {flag} = true (in {section_header})" + + +def check_cdf_toml(repo_root: Path) -> None: + """Verify ``cdf.toml`` exists and contains the required flags. + + Ensures ``deployment-pack = true`` under ``[alpha_flags]`` and + ``data = true`` under ``[plugins]``. Each flag is added if absent or + corrected if present with a non-true value. Exits with code 1 only + if the file is absent. + """ + toml_path = repo_root / "cdf.toml" + if not toml_path.exists(): + style.error( + f"Error: cdf.toml not found at {repo_root}\n" + " Ensure you are running this wizard from inside a valid Cognite Toolkit project." + ) + sys.exit(1) + + lines = toml_path.read_text(encoding="utf-8").splitlines(keepends=True) + + changes: list[str] = [] + for result in [ + _ensure_toml_flag(lines, "[alpha_flags]", "deployment-pack"), + _ensure_toml_flag(lines, "[plugins]", "data"), + ]: + if result: + changes.append(result) + + if changes: + toml_path.write_text("".join(lines), encoding="utf-8") + style.warning( + " Updated cdf.toml:\n" + + "".join(f" {c}\n" for c in changes) + ) + + +# cdf.toml helpers + +def _get_org_dir(repo_root: Path) -> str | None: + """Return the ``organization_dir`` value from ``cdf.toml``, or ``None``. + + Looks for ``organization_dir = "some_dir"`` (any section, any quote style). + """ + toml_path = repo_root / "cdf.toml" + if not toml_path.exists(): + return None + content = toml_path.read_text(encoding="utf-8") + m = re.search(r"""organization_dir\s*=\s*["']([^"']+)["']""", content) + return m.group(1) if m else None + + +# .gitignore safety check + +def _check_gitignore(repo_root: Path) -> None: + """Warn if ``.env`` is not listed in ``.gitignore``.""" + gitignore = repo_root / ".gitignore" + if gitignore.exists(): + content = gitignore.read_text(encoding="utf-8") + if not any(line.strip() in {".env", "*.env"} for line in content.splitlines()): + style.warning( + "Warning: .env does not appear to be in .gitignore " + "โ€” secrets may be committed accidentally." + ) diff --git a/modules/accelerators/quickstart/scripts/wizard/_prompts.py b/modules/accelerators/quickstart/scripts/wizard/_prompts.py new file mode 100644 index 00000000..cd335e8a --- /dev/null +++ b/modules/accelerators/quickstart/scripts/wizard/_prompts.py @@ -0,0 +1,138 @@ +""" +User-interaction helpers for the Quickstart DP setup wizard. + +Covers terminal prompts, email validation, secret masking, and the +pre-write change-table / .env summary display. +""" +from __future__ import annotations + +from ._constants import _EMAIL_RE, ChangeRecord +from . import _style as style + + +# Prompt helpers + +def prompt_text(message: str, default: str | None = None, allow_empty: bool = False) -> str: + """Prompt for a non-empty string, optionally falling back to a default.""" + while True: + suffix = f" [{default}]" if default is not None else "" + value = input(f"{message}{suffix}: ").strip() + if not value and default is not None: + return default + if value or allow_empty: + return value + style.error(" Error: input cannot be empty.") + + +def prompt_yes_no(message: str, default: bool = False) -> bool: + """Prompt for a yes/no answer. Returns *default* on empty input.""" + choice_hint = "Y/n" if default else "y/N" + while True: + answer = input(f"{message} ({choice_hint}): ").strip().lower() + if not answer: + return default + if answer in {"y", "yes"}: + return True + if answer in {"n", "no"}: + return False + style.error(" Error: please enter y/yes or n/no.") + + +def validate_emails(raw: str) -> tuple[bool, str]: + """Validate one or more comma-separated email addresses. + + Returns ``(True, "")`` on success or ``(False, error_message)`` on failure. + """ + emails = [e.strip() for e in raw.split(",") if e.strip()] + if not emails: + return False, "at least one email address is required." + invalid = [e for e in emails if not _EMAIL_RE.fullmatch(e)] + if invalid: + return False, f"invalid email address(es): {', '.join(invalid)}" + return True, "" + + +def prompt_email(message: str, default: str | None = None) -> str: + """Prompt for one or more comma-separated email addresses with regex validation.""" + while True: + suffix = f" [{default}]" if default is not None else "" + raw = input(f"{message}{suffix}: ").strip() + if not raw and default is not None: + return default + ok, err = validate_emails(raw) + if ok: + return raw + style.error(f" Error: {err}") + + +def mask_secret(value: str) -> str: + """Return a masked version showing only the first and last two characters.""" + if len(value) <= 6: + return "****" + return f"{value[:2]}****{value[-2:]}" + + +# Pre-write summary display + +def show_changes_table(records: list[ChangeRecord]) -> None: + """Print a styled table of pending field changes. + + - Header row : bold + - Changed rows : new value highlighted green + - Unchanged rows : entire row dimmed, note in yellow + """ + if not records: + style.hint(" (no config changes)") + return + + col_label = min(max(len(r.label) for r in records), 52) + col_old = min(max(len(r.old_val) for r in records), 28) + col_new = min(max(len(r.new_val) for r in records), 28) + + def _trunc(s: str, n: int) -> str: + return s if len(s) <= n else s[:n - 1] + "โ€ฆ" + + # Header + header = ( + f" {style.BOLD}" + f"{'Field':<{col_label}} {'Old value':<{col_old}} {'New value':<{col_new}}" + f"{style.RESET}" + ) + sep = f" {style.DIM}" + "โ”€" * (col_label + col_old + col_new + 4) + style.RESET + print(header) + print(sep) + + for r in records: + label = _trunc(r.label, col_label) + old_v = _trunc(r.old_val, col_old) + new_v = _trunc(r.new_val, col_new) + if r.changed: + print( + f" {label:<{col_label}}" + f" {old_v:<{col_old}}" + f" {style.GREEN}{new_v:<{col_new}}{style.RESET}" + ) + else: + print( + f" {style.DIM}{label:<{col_label}}" + f" {old_v:<{col_old}}" + f" {new_v:<{col_new}}{style.RESET}" + f" {style.YELLOW}(unchanged){style.RESET}" + ) + + +def show_env_summary(original_values: dict[str, str], new_values: dict[str, str]) -> None: + """Summarise .env changes without revealing secret values.""" + added = [k for k in new_values if k not in original_values] + changed = [ + k for k in new_values + if k in original_values and new_values[k] != original_values[k] + ] + if not added and not changed: + style.hint(" (no changes to .env)") + return + if added: + print(f" {style.GREEN}Keys to add{style.RESET} : {', '.join(added)}") + if changed: + print(f" {style.YELLOW}Keys to update{style.RESET} : {', '.join(changed)}") + style.hint(" (secret values are never shown)") diff --git a/modules/accelerators/quickstart/scripts/wizard/_sql.py b/modules/accelerators/quickstart/scripts/wizard/_sql.py new file mode 100644 index 00000000..654193a9 --- /dev/null +++ b/modules/accelerators/quickstart/scripts/wizard/_sql.py @@ -0,0 +1,99 @@ +""" +SQL transformation helper for the Quickstart DP setup wizard. + +Handles switching ``asset.Transformation.sql`` from COMMON MODE to +FILE_ANNOTATION MODE by commenting out the COMMON block and uncommenting +the FILE_ANNOTATION block. +""" +from __future__ import annotations + +from pathlib import Path + +from ._constants import ( + SQL_COMMON_BLOCK_ANCHOR, + SQL_COMMON_MODE_MARKER, + SQL_FILE_ANNOTATION_BLOCK_ANCHOR, + SQL_FILE_ANNOTATION_MODE_MARKER, +) +from ._file_io import ensure_backup, read_lines, write_lines + + +def enable_file_annotation_mode(sql_path: Path, skip_backup: bool = False) -> bool: + """Switch asset.Transformation.sql to FILE_ANNOTATION mode. + + Returns ``True`` if the file was changed, ``False`` if it was already in + FILE_ANNOTATION mode (idempotent). + + Pass ``skip_backup=True`` when the caller has already created a backup + (e.g. the wizard takes all backups upfront before any writes). + + Raises ``RuntimeError`` if the expected mode markers or SQL blocks cannot + be found in the file. + """ + lines = read_lines(sql_path) + updated = False + + common_marker_idx = next( + (i for i, t in enumerate(lines) if SQL_COMMON_MODE_MARKER in t), None + ) + file_marker_idx = next( + (i for i, t in enumerate(lines) if SQL_FILE_ANNOTATION_MODE_MARKER in t), None + ) + if ( + common_marker_idx is None + or file_marker_idx is None + or common_marker_idx >= file_marker_idx + ): + raise RuntimeError("Could not find expected mode markers in asset.Transformation.sql") + + common_sql_start = next( + ( + i for i in range(common_marker_idx, file_marker_idx) + if SQL_COMMON_BLOCK_ANCHOR in lines[i] + ), + None, + ) + if common_sql_start is None: + raise RuntimeError("Could not find COMMON MODE SQL block in asset.Transformation.sql") + + # Comment out every active (non-comment) line in the COMMON block. + for i in range(common_sql_start, file_marker_idx): + stripped = lines[i].strip() + if not stripped or stripped.startswith("--"): + continue + indent = len(lines[i]) - len(lines[i].lstrip(" ")) + lines[i] = (" " * indent) + "-- " + lines[i].lstrip(" ") + updated = True + + file_sql_start = next( + ( + i for i in range(file_marker_idx, len(lines)) + if SQL_FILE_ANNOTATION_BLOCK_ANCHOR in lines[i] + ), + None, + ) + if file_sql_start is None: + raise RuntimeError( + "Could not find FILE_ANNOTATION MODE SQL block in asset.Transformation.sql" + ) + + # Uncomment the FILE_ANNOTATION block if it is still commented out. + file_block_commented = lines[file_sql_start].lstrip().startswith("--") + if file_block_commented: + for i in range(file_sql_start, len(lines)): + line = lines[i] + stripped = line.lstrip() + if stripped.startswith("-- "): + prefix_len = len(line) - len(stripped) + lines[i] = (" " * prefix_len) + stripped[3:] + updated = True + elif stripped.startswith("--"): + prefix_len = len(line) - len(stripped) + lines[i] = (" " * prefix_len) + stripped[2:] + updated = True + + if updated: + if not skip_backup: + ensure_backup(sql_path) + write_lines(sql_path, lines) + return updated diff --git a/modules/accelerators/quickstart/scripts/wizard/_style.py b/modules/accelerators/quickstart/scripts/wizard/_style.py new file mode 100644 index 00000000..aea10c4c --- /dev/null +++ b/modules/accelerators/quickstart/scripts/wizard/_style.py @@ -0,0 +1,112 @@ +""" +Terminal styling using ANSI escape codes โ€” stdlib only, no external packages. + +Colour is automatically disabled when: + - stdout is not a TTY (e.g. redirected to a file or captured in tests) + - the ``NO_COLOR`` environment variable is set (https://no-color.org) + - ``TERM=dumb`` + +All public helpers fall back to plain ``print()`` output in those cases, so +tests that capture stdout with ``capsys`` see uncoloured text and need no +changes. +""" +from __future__ import annotations + +import os +import shutil +import sys + + +def _supports_color() -> bool: + if not hasattr(sys.stdout, "isatty") or not sys.stdout.isatty(): + return False + if os.environ.get("NO_COLOR"): + return False + if os.environ.get("TERM", "").lower() == "dumb": + return False + return True + + +_C = _supports_color() + +# ANSI codes (empty strings when colour is disabled) +RESET = "\033[0m" if _C else "" +BOLD = "\033[1m" if _C else "" +DIM = "\033[2m" if _C else "" +RED = "\033[91m" if _C else "" +GREEN = "\033[92m" if _C else "" +YELLOW = "\033[93m" if _C else "" +CYAN = "\033[96m" if _C else "" + + +def _width() -> int: + """Return current terminal width, defaulting to 80.""" + return shutil.get_terminal_size((80, 24)).columns + + +# Structural elements + +def section(title: str) -> None: + """Print a coloured section divider that fills the terminal width. + + Plain-text fallback (no TTY / NO_COLOR): + โ”€โ”€ CDF Project โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + + Colour TTY: + (same line, bold cyan) + """ + prefix = "โ”€โ”€ " + suffix = " " + fill_len = max(0, _width() - len(prefix) - len(title) - len(suffix) - 2) + fill = "โ”€" * fill_len + print(f"\n{BOLD}{CYAN}{prefix}{title}{suffix}{fill}{RESET}") + + +def banner(title: str) -> None: + """Print a bold banner box around *title*. + + Plain-text (or colour) output: + โ•”โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•— + โ•‘ PENDING CHANGES โ€” review before applying โ•‘ + โ•šโ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• + """ + box_w = min(_width() - 2, 68) # total width including corner chars + inner = box_w - 4 # space available for text (2 corners + 2 spaces) + top = "โ•”" + "โ•" * (box_w - 2) + "โ•—" + middle = "โ•‘ " + title[:inner].ljust(inner) + " โ•‘" + bottom = "โ•š" + "โ•" * (box_w - 2) + "โ•" + print(f"\n{BOLD}{top}") + print(middle) + print(f"{bottom}{RESET}") + + +# Message-level helpers + +def error(message: str) -> None: + """Print a red bold error message. Multi-line strings are handled gracefully.""" + lines = message.splitlines() + if not lines: + return + print(f"{RED}{BOLD}{lines[0]}{RESET}") + for line in lines[1:]: + print(f"{RED}{line}{RESET}") + + +def warning(message: str) -> None: + """Print a yellow warning message.""" + lines = message.splitlines() + if not lines: + return + print(f"{YELLOW}{lines[0]}{RESET}") + for line in lines[1:]: + print(f"{YELLOW}{line}{RESET}") + + +def success(message: str) -> None: + """Print a green bold success message.""" + print(f"{GREEN}{BOLD}{message}{RESET}") + + +def hint(message: str) -> None: + """Print a dimmed informational / hint line.""" + print(f"{DIM}{message}{RESET}") diff --git a/modules/accelerators/quickstart/scripts/wizard/_verification.py b/modules/accelerators/quickstart/scripts/wizard/_verification.py new file mode 100644 index 00000000..79abf853 --- /dev/null +++ b/modules/accelerators/quickstart/scripts/wizard/_verification.py @@ -0,0 +1,168 @@ +""" +Post-write verification helpers for the Quickstart DP setup wizard. + +Runs ``cdf build`` and optionally ``cdf deploy --dry-run`` / live deploy +after the wizard has written its changes, and prints a consolidated failure +summary when the build fails. +""" +from __future__ import annotations + +import re +import subprocess +from pathlib import Path + +from . import _style as style +from ._constants import _CONFIG_FLAG_VERSION, DATA_UPLOAD_DIRS +from ._messages import ( + SEC_POST_VERIFY, + VERIFY_BUILD_FAIL, + VERIFY_BUILD_OK, + VERIFY_BUILD_START, + VERIFY_DATA_FAIL, + VERIFY_DATA_INTRO, + VERIFY_DATA_OK, + VERIFY_DATA_SKIP, + VERIFY_DATA_UPLOAD, + VERIFY_DRY_FAIL, + VERIFY_DRY_OK, + VERIFY_DRY_START, + VERIFY_LIVE_OK, + VERIFY_LIVE_SKIP, +) +from ._prompts import prompt_yes_no + +# Flag selection + +def _cdf_env_args( + env: str, + config_arg: str, + toolkit_version: tuple[int, int, int] | None, +) -> list[str]: + """Return the env/config flag(s) for ``cdf build`` / ``cdf deploy`` commands. + + - Toolkit ``< 0.8.0`` โ†’ ``["--env="]`` + - Toolkit ``โ‰ฅ 0.8.0`` โ†’ ``["-c", ""]`` + - Version unknown (``None``) defaults to the newer ``-c`` form. + """ + if toolkit_version is not None and toolkit_version < _CONFIG_FLAG_VERSION: + return [f"--env={env}"] + return ["-c", config_arg] + + +# Failure summary + +def _summarise_build_failure(stderr: str, build_cmd: str) -> None: + """Print a consolidated build-failure summary from *stderr*. + + Shows up to ten lines that look like actionable errors. Falls back to + the last five non-empty lines when nothing recognisable is found. + Always ends with concrete suggested next steps. + """ + error_lines = [ + ln for ln in stderr.splitlines() + if re.search(r"error|Error|ERROR|failed|Failed|FAILED|invalid|Invalid|not found", ln) + and ln.strip() + ] + if error_lines: + style.error("\n Errors detected:") + for ln in error_lines[:10]: + style.error(f" {ln.strip()}") + if len(error_lines) > 10: + style.hint(f" โ€ฆ and {len(error_lines) - 10} more โ€” run with --verbose for full output.") + else: + tail = [ln for ln in stderr.splitlines() if ln.strip()][-5:] + if tail: + style.hint("\n Build output (last lines):") + for ln in tail: + style.hint(f" {ln.strip()}") + + print( + f"\n {style.BOLD}Suggested actions:{style.RESET}\n" + f" 1. Inspect full output : {build_cmd} --verbose\n" + f" 2. Verify credentials : cdf auth verify\n" + f" 3. Check alpha flags : [alpha_flags] deployment-pack = true in cdf.toml\n" + f" 4. Restore if needed : copy the timestamped .bak file over the config" + ) + + +# Post-write verification + +def run_post_write_verification( + repo_root: Path, + env: str, + config_arg: str, + toolkit_version: tuple[int, int, int] | None = None, + project_name: str | None = None, + org_dir: str | None = None, +) -> None: + """Run ``cdf build`` then offer ``cdf deploy --dry-run`` and live deploy. + + The env/config flag passed to ``cdf`` depends on the installed toolkit version: + + - ``< 0.8.0`` โ†’ ``--env=`` + - ``โ‰ฅ 0.8.0`` โ†’ ``-c `` + """ + env_args = _cdf_env_args(env, config_arg, toolkit_version) + build_cmd = "cdf build " + " ".join(env_args) + + style.section(SEC_POST_VERIFY) + + # --- Step 1: cdf build -------------------------------------------------- + style.hint(VERIFY_BUILD_START) + style.hint(f" Running: {build_cmd}") + build = subprocess.run( + ["cdf", "build"] + env_args, + capture_output=True, text=True, cwd=str(repo_root), + ) + if build.returncode != 0: + style.error(VERIFY_BUILD_FAIL) + _summarise_build_failure(build.stderr + build.stdout, build_cmd) + return + style.success(VERIFY_BUILD_OK) + + # --- Step 2: cdf deploy --dry-run --------------------------------------- + style.hint(VERIFY_DRY_START) + style.hint(" Running: cdf deploy --dry-run") + dry = subprocess.run( + ["cdf", "deploy", "--dry-run"], + capture_output=True, text=True, cwd=str(repo_root), + ) + if dry.stdout: + for line in dry.stdout.splitlines(): + print(f" {line}") + if dry.returncode != 0: + style.warning(VERIFY_DRY_FAIL) + return + style.success(VERIFY_DRY_OK) + + # --- Step 3: live deploy (optional) ------------------------------------- + deploy_target = project_name or env + style.hint(f"\n [3/4] Live deploy to '{deploy_target}' (optional).") + if prompt_yes_no(f" Proceed with live deploy to '{deploy_target}'?", default=False): + style.hint(" Running: cdf deploy") + subprocess.run(["cdf", "deploy"], cwd=str(repo_root)) + style.success(VERIFY_LIVE_OK) + else: + style.hint(VERIFY_LIVE_SKIP) + return + + # --- Step 4: synthetic data upload (optional) --------------------------- + style.hint(VERIFY_DATA_INTRO) + if prompt_yes_no("\n Upload synthetic test data now?", default=False): + style.hint(VERIFY_DATA_UPLOAD) + failed = False + for upload_dir in DATA_UPLOAD_DIRS: + full_dir = f"{org_dir}/{upload_dir}" if org_dir else upload_dir + style.hint(f" Running: cdf data upload dir {full_dir}") + result = subprocess.run( + ["cdf", "data", "upload", "dir", full_dir], + cwd=str(repo_root), + ) + if result.returncode != 0: + failed = True + if failed: + style.warning(VERIFY_DATA_FAIL) + else: + style.success(VERIFY_DATA_OK) + else: + style.hint(VERIFY_DATA_SKIP) diff --git a/modules/accelerators/quickstart/scripts/wizard/_yaml.py b/modules/accelerators/quickstart/scripts/wizard/_yaml.py new file mode 100644 index 00000000..9e812496 --- /dev/null +++ b/modules/accelerators/quickstart/scripts/wizard/_yaml.py @@ -0,0 +1,146 @@ +""" +YAML utility functions for the Quickstart DP setup wizard. + +Uses a lightweight line-based parser โ€” no external YAML library required โ€” +to locate, read, and mutate individual fields while preserving comments, +indentation, and surrounding content. +""" +from __future__ import annotations + +import re +from collections.abc import Sequence + +from ._constants import _YAML_LINE_RE + + +# Path building + +def yaml_key_match(line: str) -> tuple[int, str] | None: + """Return (indent_level, key) for a YAML key line, or None.""" + m = re.match(r"^(\s*)([A-Za-z0-9_]+):", line) + if not m: + return None + return len(m.group(1)), m.group(2) + + +def build_yaml_paths(lines: Sequence[str]) -> dict[tuple[str, ...], int]: + """Build a mapping of dotted-key-path โ†’ line-index for the given YAML lines.""" + key_line_map: dict[tuple[str, ...], int] = {} + stack: list[tuple[int, str]] = [] + for idx, line in enumerate(lines): + parsed = yaml_key_match(line) + if not parsed: + continue + indent, key = parsed + while stack and indent <= stack[-1][0]: + stack.pop() + current_path = tuple(k for _, k in stack) + (key,) + key_line_map[current_path] = idx + stack.append((indent, key)) + return key_line_map + + +# Value read / write + +def get_yaml_current_value( + lines: Sequence[str], + path: tuple[str, ...], + key_line_map: dict[tuple[str, ...], int], +) -> str | None: + """Return the raw current value for a YAML path, or None if absent/empty.""" + idx = key_line_map.get(path) + if idx is None: + return None + m = _YAML_LINE_RE.match(lines[idx].rstrip("\n")) + if not m: + return None + val = m.group(2).strip() + return val if val else None + + +def set_yaml_line_value(line: str, value: str) -> str: + """Replace the value on a single YAML line, preserving any trailing comment.""" + m = _YAML_LINE_RE.match(line.rstrip("\n")) + if not m: + raise ValueError(f"Cannot set value for YAML line: {line!r}") + comment_part = m.group(3) or "" + if comment_part.strip().startswith("#"): + comment_part = f" {comment_part.strip()}" + return f"{m.group(1)}{value}{comment_part}\n" + + +def _extract_yaml_value(line: str) -> str: + """Extract the value portion from a YAML key: value line, or return stripped line.""" + m = _YAML_LINE_RE.match(line.rstrip("\n")) + return m.group(2).strip() if m else line.strip() + + +def _strip_yaml_quotes(value: str) -> str: + """Remove a single layer of surrounding YAML quotes (single or double) from a value.""" + for q in ('"', "'"): + if len(value) >= 2 and value.startswith(q) and value.endswith(q): + return value[1:-1] + return value + + +def quote_yaml_string(value: str) -> str: + """Wrap a plain string in double quotes, escaping backslashes and inner quotes.""" + escaped = value.replace("\\", "\\\\").replace('"', '\\"') + return f'"{escaped}"' + + +def set_yaml_value_by_path( + lines: list[str], + path: tuple[str, ...], + value: str, + key_line_map: dict[tuple[str, ...], int] | None = None, +) -> tuple[str, str] | None: + """ + Set a YAML value by dotted path. + + Returns (old_value, new_value) if the path was found, None otherwise. + old_value / new_value use ``""`` when the field was empty. + """ + if key_line_map is None: + key_line_map = build_yaml_paths(lines) + idx = key_line_map.get(path) + if idx is None: + return None + old_line = lines[idx] + new_line = set_yaml_line_value(old_line, value) + lines[idx] = new_line + old_val = _extract_yaml_value(old_line) or "" + new_val = _extract_yaml_value(new_line) or "" + return old_val, new_val + + +def set_target_view_filter_values( + lines: list[str], + desired_value: str, + key_line_map: dict[tuple[str, ...], int] | None = None, +) -> tuple[str, str] | None: + """ + Update the first list item under targetViewFilterValues. + + Returns (old_value, new_value) if found, None otherwise. + """ + if key_line_map is None: + key_line_map = build_yaml_paths(lines) + base_path = ( + "variables", "modules", "accelerators", "contextualization", + "cdf_entity_matching", "targetViewFilterValues", + ) + idx = key_line_map.get(base_path) + if idx is None: + return None + if idx + 1 >= len(lines): + return None + list_line = lines[idx + 1] + list_match = re.match(r"^(\s*)-\s*(.*)\s*$", list_line) + if not list_match: + return None + indent = list_match.group(1) + old_val = list_match.group(2).strip() or "" + lines[idx + 1] = f"{indent}- {desired_value}\n" + return old_val, desired_value + diff --git a/modules/dashboards/rpt_quality/data_sets/dataset.yaml b/modules/dashboards/rpt_quality/data_sets/quality_reports.Dataset.yaml similarity index 100% rename from modules/dashboards/rpt_quality/data_sets/dataset.yaml rename to modules/dashboards/rpt_quality/data_sets/quality_reports.Dataset.yaml diff --git a/modules/dashboards/rpt_quality/default.config.yaml b/modules/dashboards/rpt_quality/default.config.yaml index fc63826c..2a8b8c39 100644 --- a/modules/dashboards/rpt_quality/default.config.yaml +++ b/modules/dashboards/rpt_quality/default.config.yaml @@ -9,7 +9,7 @@ annotation_db: db_file_annotation annotation_patterns_tbl: annotation_documents_patterns annotation_docs_tbl: annotation_documents_docs annotation_tags_tbl: annotation_documents_tags -contextualization_rate_workflow: "0 0 29 2 *" +contextualization_rate_workflow: "0 7 * * MON" workflow_timeout_per_job: 3600 reservedWordPrefix: Enterprise_ diff --git a/modules/dashboards/rpt_quality/raw/raw.Database.yaml b/modules/dashboards/rpt_quality/raw/raw.Database.yaml new file mode 100644 index 00000000..32b59186 --- /dev/null +++ b/modules/dashboards/rpt_quality/raw/raw.Database.yaml @@ -0,0 +1 @@ +dbName: db_quality_reports \ No newline at end of file diff --git a/modules/packages.toml b/modules/packages.toml index f76d19db..621da9c0 100644 --- a/modules/packages.toml +++ b/modules/packages.toml @@ -87,6 +87,7 @@ modules = [ "accelerators/contextualization/cdf_connection_sql", "accelerators/industrial_tools/cdf_search", "accelerators/open_industrial_data_sync", + "accelerators/quickstart", "sourcesystem/cdf_pi", "sourcesystem/cdf_sap_assets", "sourcesystem/cdf_sap_events", diff --git a/modules/sourcesystem/cdf_pi/workflows/trigger.WorkflowTrigger.yaml b/modules/sourcesystem/cdf_pi/workflows/trigger.WorkflowTrigger.yaml index f78c8097..5f3d6627 100644 --- a/modules/sourcesystem/cdf_pi/workflows/trigger.WorkflowTrigger.yaml +++ b/modules/sourcesystem/cdf_pi/workflows/trigger.WorkflowTrigger.yaml @@ -1,7 +1,7 @@ externalId: {{ workflow }}_trigger triggerRule: triggerType: schedule - cronExpression: "0 4 * * MON" + cronExpression: "0 1 * * MON" workflowExternalId: {{ workflow }} workflowVersion: v1 authentication: diff --git a/modules/sourcesystem/cdf_sap_assets/workflows/trigger.WorkflowTrigger.yaml b/modules/sourcesystem/cdf_sap_assets/workflows/trigger.WorkflowTrigger.yaml index f78c8097..5f3d6627 100644 --- a/modules/sourcesystem/cdf_sap_assets/workflows/trigger.WorkflowTrigger.yaml +++ b/modules/sourcesystem/cdf_sap_assets/workflows/trigger.WorkflowTrigger.yaml @@ -1,7 +1,7 @@ externalId: {{ workflow }}_trigger triggerRule: triggerType: schedule - cronExpression: "0 4 * * MON" + cronExpression: "0 1 * * MON" workflowExternalId: {{ workflow }} workflowVersion: v1 authentication: diff --git a/modules/sourcesystem/cdf_sharepoint/workflows/trigger.WorkflowTrigger.yaml b/modules/sourcesystem/cdf_sharepoint/workflows/trigger.WorkflowTrigger.yaml index f78c8097..5f3d6627 100644 --- a/modules/sourcesystem/cdf_sharepoint/workflows/trigger.WorkflowTrigger.yaml +++ b/modules/sourcesystem/cdf_sharepoint/workflows/trigger.WorkflowTrigger.yaml @@ -1,7 +1,7 @@ externalId: {{ workflow }}_trigger triggerRule: triggerType: schedule - cronExpression: "0 4 * * MON" + cronExpression: "0 1 * * MON" workflowExternalId: {{ workflow }} workflowVersion: v1 authentication: diff --git a/packages.zip b/packages.zip new file mode 100644 index 00000000..65d6da56 Binary files /dev/null and b/packages.zip differ