From 0174badabe6f8ea5a5493b13c5e1c1a66ccb4076 Mon Sep 17 00:00:00 2001 From: Siddharth Suresh Date: Wed, 20 May 2026 04:54:55 +0530 Subject: [PATCH 01/37] feat(modules): ecs_cluster + ecs_service ravion_domains for V2 control plane MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds the Terraform module surface that's the customer-facing entry point for the Ravion domain control plane. Matches the V2 (DI) shape — three separate resources instead of the old bundled `ravion_domain`. compute/ecs_cluster/ravion_domains.tf: - ravion_domain.cluster — wildcard allocation (*.) - aws_acm_certificate.cluster — wildcard cert in customer's AWS - ravion_dns_records.cluster_validation — ACM validation CNAMEs into Ravion's Route53 (sync) - ravion_dns_records.cluster_routing — ALIAS to the cluster ALB - aws_acm_certificate_validation — blocks until ACM verifies (~30s) - ravion_managed_certificate.cluster — UI metadata sink compute/ecs_service/ravion_domains.tf: - ravion_domain.auto — child allocation under cluster - ravion_dns_records.auto_routing — service-leaf ALIAS to cluster ALB - aws_lb_listener_rule.ravion — host-header rule on cluster's HTTPS listener. No per-service cert — cluster wildcard covers via SNI. Cluster outputs four values the service module wires up to: - ravion_cluster_domain_allocation_id (the SNI parent) - ravion_cluster_managed_domain_id - ravion_cluster_fqdn - ravion_cluster_certificate_arn Service variables (all defaulted null — module is opt-in): - ravion_dns_zone_id - ravion_parent_domain_allocation_id - ravion_cluster_alb_dns_name / _zone_id / _https_listener_arn - ravion_service_slug / _listener_rule_priority versions.tf in both modules declares `ravion = ravion.com/ravion/domains >= 1.0.0`. terraform fmt clean. All AWS resources live in the customer's account; Ravion holds zero customer credentials. Co-Authored-By: Claude Opus 4.7 (1M context) --- compute/ecs_cluster/outputs.tf | 27 ++++++++ compute/ecs_cluster/ravion_domains.tf | 99 +++++++++++++++++++++++++++ compute/ecs_cluster/variables.tf | 16 +++++ compute/ecs_cluster/versions.tf | 6 +- compute/ecs_service/ravion_domains.tf | 81 ++++++++++++++++++++++ compute/ecs_service/variables.tf | 51 ++++++++++++++ compute/ecs_service/versions.tf | 6 +- 7 files changed, 282 insertions(+), 4 deletions(-) create mode 100644 compute/ecs_cluster/ravion_domains.tf create mode 100644 compute/ecs_service/ravion_domains.tf diff --git a/compute/ecs_cluster/outputs.tf b/compute/ecs_cluster/outputs.tf index 8d5a5cd..5aef3ef 100644 --- a/compute/ecs_cluster/outputs.tf +++ b/compute/ecs_cluster/outputs.tf @@ -240,3 +240,30 @@ output "region" { description = "The AWS region where the resources are deployed." value = local.region } + +################################################################################ +# Ravion domain control plane outputs +# +# Consumed by sibling ecs_service modules to allocate child FQDNs that +# inherit the cluster's wildcard cert via SNI. +################################################################################ + +output "ravion_cluster_domain_allocation_id" { + description = "DomainAllocation id of the cluster's wildcard. Pass to ecs_service.ravion_parent_domain_allocation_id so service FQDNs sit under the wildcard." + value = local.enable_ravion_domain ? ravion_domain.cluster[0].id : null +} + +output "ravion_cluster_managed_domain_id" { + description = "ManagedDomain id of the cluster's wildcard. The UI links the cluster cert to this." + value = local.enable_ravion_domain ? ravion_domain.cluster[0].managed_domain_id : null +} + +output "ravion_cluster_fqdn" { + description = "Cluster wildcard FQDN, e.g. `*.cluster-abc.ravion.app`." + value = local.enable_ravion_domain ? ravion_domain.cluster[0].fqdn : null +} + +output "ravion_cluster_certificate_arn" { + description = "ACM ARN of the cluster's wildcard cert. Use as the listener's default cert or as an extra cert via aws_lb_listener_certificate." + value = local.enable_ravion_domain ? aws_acm_certificate_validation.cluster[0].certificate_arn : null +} diff --git a/compute/ecs_cluster/ravion_domains.tf b/compute/ecs_cluster/ravion_domains.tf new file mode 100644 index 0000000..4cd8a44 --- /dev/null +++ b/compute/ecs_cluster/ravion_domains.tf @@ -0,0 +1,99 @@ +################################################################################ +# Ravion domain control plane — cluster wildcard +# +# Allocates `*.` under Ravion's apex (e.g. `*.-.ravion.app`) +# and issues a wildcard ACM cert covering it. Service modules under this +# cluster create child allocations whose FQDNs sit under , +# so they inherit the wildcard cert via SNI without their own ACM work. +# +# Resources (per the DI design in +# packages/shared-go/domain/domains/DOMAIN_CONTROL_PLANE_DI_DESIGN.md): +# +# ravion_domain.cluster — allocates the wildcard FQDN +# aws_acm_certificate.cluster — issues the cert (customer's AWS account) +# ravion_dns_records.cluster_* — writes the validation + apex routing +# records into Ravion's Route53 (the +# api-go's RavionRoute53Writer) +# aws_acm_certificate_validation — blocks ~30s until ACM verifies +# ravion_managed_certificate.cluster — registers cert metadata at Ravion +# for the UI badge +# +# All AWS resources live in the customer's account, applied by their TF +# runner with their IAM. Ravion never holds customer credentials. +################################################################################ + +locals { + enable_ravion_domain = var.ravion_dns_zone_id != null && var.ravion_dns_zone_id != "" +} + +# 1. Allocate the cluster's wildcard FQDN. +resource "ravion_domain" "cluster" { + count = local.enable_ravion_domain ? 1 : 0 + dns_zone_id = var.ravion_dns_zone_id + slug = coalesce(var.ravion_cluster_slug, var.name) + wildcard = true +} + +# 2. ACM wildcard cert. Lives in the customer's AWS account. +resource "aws_acm_certificate" "cluster" { + count = local.enable_ravion_domain ? 1 : 0 + + domain_name = ravion_domain.cluster[0].fqdn + validation_method = "DNS" + + lifecycle { + create_before_destroy = true + } + + tags = var.tags +} + +# 3. Validation CNAME(s) into Ravion's Route53. Synchronous — the +# RavionRoute53Writer issues a Route53 ChangeResourceRecordSets call +# inline with our POST and returns when AWS accepts the change. +resource "ravion_dns_records" "cluster_validation" { + count = local.enable_ravion_domain ? 1 : 0 + managed_domain_id = ravion_domain.cluster[0].id + records = [ + for opt in aws_acm_certificate.cluster[0].domain_validation_options : { + name = opt.resource_record_name + type = opt.resource_record_type + value = opt.resource_record_value + ttl = 60 + } + ] +} + +# 4. Apex routing record — wildcard FQDN points at the cluster's public ALB. +# Uses ALIAS so apex-style routing works (Route53 expands to A + AliasTarget). +resource "ravion_dns_records" "cluster_routing" { + count = local.enable_ravion_domain && var.enable_public_alb ? 1 : 0 + managed_domain_id = ravion_domain.cluster[0].id + records = [{ + name = ravion_domain.cluster[0].fqdn + type = "ALIAS" + value = jsonencode({ + dns_name = module.public_alb[0].alb_dns_name + zone_id = module.public_alb[0].alb_zone_id + }) + }] +} + +# 5. Block until ACM has validated the cert. With Ravion's Route53 zone +# under our IAM, the validation CNAME goes live in seconds — this step +# typically completes in well under 60s. +resource "aws_acm_certificate_validation" "cluster" { + count = local.enable_ravion_domain ? 1 : 0 + certificate_arn = aws_acm_certificate.cluster[0].arn + validation_record_fqdns = ravion_dns_records.cluster_validation[0].fqdns +} + +# 6. Tell Ravion about the cert so the UI shows the cert badge on the +# cluster's domain row. +resource "ravion_managed_certificate" "cluster" { + count = local.enable_ravion_domain ? 1 : 0 + cert_arn = aws_acm_certificate_validation.cluster[0].certificate_arn + status = "ISSUED" + scope = "CLUSTER_WILDCARD" + managed_domain_ids = [ravion_domain.cluster[0].managed_domain_id] +} diff --git a/compute/ecs_cluster/variables.tf b/compute/ecs_cluster/variables.tf index c7e37d7..e52e0d1 100644 --- a/compute/ecs_cluster/variables.tf +++ b/compute/ecs_cluster/variables.tf @@ -629,3 +629,19 @@ variable "region" { description = "AWS region. When null, the provider's configured region is used." default = null } + +################################################################################ +# Ravion domain control plane +################################################################################ + +variable "ravion_dns_zone_id" { + type = string + description = "Ravion DnsZone id (dzn_*) the cluster's wildcard allocation lives under. Typically the platform-owned Ravion apex zone, looked up via the Ravion API or injected by tower-go. When null/empty, the Ravion-domain plumbing is skipped entirely." + default = null +} + +variable "ravion_cluster_slug" { + type = string + description = "Human-readable slug used to derive the cluster's FQDN (`-.ravion.app`). Defaults to var.name when null." + default = null +} diff --git a/compute/ecs_cluster/versions.tf b/compute/ecs_cluster/versions.tf index bec739b..33c6394 100644 --- a/compute/ecs_cluster/versions.tf +++ b/compute/ecs_cluster/versions.tf @@ -12,7 +12,9 @@ terraform { source = "hashicorp/aws" version = ">= 6.0" } + ravion = { + source = "ravion.com/ravion/domains" + version = ">= 1.0.0" + } } } - - diff --git a/compute/ecs_service/ravion_domains.tf b/compute/ecs_service/ravion_domains.tf new file mode 100644 index 0000000..1ea52c8 --- /dev/null +++ b/compute/ecs_service/ravion_domains.tf @@ -0,0 +1,81 @@ +################################################################################ +# Ravion domain control plane — service auto-domain +# +# Allocates a child FQDN under the cluster's wildcard (e.g. +# `-.`) so the service inherits the +# cluster's wildcard cert via SNI without needing its own ACM cert. +# +# Resources: +# ravion_domain.auto — child allocation under cluster +# ravion_dns_records.auto_routing — CNAME pointing the FQDN at the +# cluster's public ALB +# aws_lb_listener_rule.ravion — host-header rule routing the FQDN +# to this service's target group +# +# No per-service ACM cert — the cluster's wildcard covers this FQDN. +# No ravion_managed_certificate either — the cluster's +# ravion_managed_certificate already advertises the cert. +################################################################################ + +locals { + ravion_managed = var.ravion_parent_domain_allocation_id != null && var.ravion_parent_domain_allocation_id != "" + ravion_has_listener = var.ravion_cluster_https_listener_arn != null && var.ravion_cluster_https_listener_arn != "" + # Deterministic per-service priority so two services in the same + # cluster don't collide on listener-rule priority. SHA-256 → 16-bit + # hex digest → mod 49000 + offset 1000 to stay clear of the lower + # reserved range. + ravion_priority = var.ravion_listener_rule_priority > 0 ? var.ravion_listener_rule_priority : (parseint(substr(sha256(var.name), 0, 4), 16) % 49000) + 1000 +} + +# 1. Allocate the child FQDN under the cluster. +resource "ravion_domain" "auto" { + count = local.ravion_managed ? 1 : 0 + dns_zone_id = var.ravion_dns_zone_id + slug = coalesce(var.ravion_service_slug, var.name) + parent_domain_allocation_id = var.ravion_parent_domain_allocation_id +} + +# 2. Routing CNAME — FQDN points at the cluster's public ALB. The +# cluster's ravion_dns_records.cluster_routing handles the wildcard +# apex; this is the per-service leaf so DNS resolution hits the ALB +# directly. +resource "ravion_dns_records" "auto_routing" { + count = local.ravion_managed ? 1 : 0 + managed_domain_id = ravion_domain.auto[0].id + records = [{ + name = ravion_domain.auto[0].fqdn + type = "ALIAS" + value = jsonencode({ + dns_name = var.ravion_cluster_alb_dns_name + zone_id = var.ravion_cluster_alb_zone_id + }) + }] +} + +# 3. Listener rule — host-header match routes this service's FQDN to +# the service's target group on the cluster's HTTPS listener. The +# cluster cert covers `*.` so SNI handshake succeeds +# without an explicit aws_lb_listener_certificate attachment. +resource "aws_lb_listener_rule" "ravion" { + count = local.ravion_managed && local.ravion_has_listener ? 1 : 0 + + listener_arn = var.ravion_cluster_https_listener_arn + priority = local.ravion_priority + + condition { + host_header { + values = [ravion_domain.auto[0].fqdn] + } + } + + action { + type = "forward" + target_group_arn = aws_lb_target_group.this[0].arn + } + + lifecycle { + ignore_changes = [action] + } + + tags = var.tags +} diff --git a/compute/ecs_service/variables.tf b/compute/ecs_service/variables.tf index 13f70cd..a1efc09 100644 --- a/compute/ecs_service/variables.tf +++ b/compute/ecs_service/variables.tf @@ -599,3 +599,54 @@ variable "region" { description = "AWS region. When null, the provider's configured region is used." default = null } + +################################################################################ +# Ravion domain control plane +# +# When the parent cluster module is configured with ravion_dns_zone_id, +# pass its outputs into the service module via these variables to +# allocate a child FQDN that inherits the cluster's wildcard cert via +# SNI. Set ravion_parent_domain_allocation_id = null/empty to opt out. +################################################################################ + +variable "ravion_dns_zone_id" { + type = string + description = "Ravion DnsZone id (dzn_*) the allocation lives under. Same value as the cluster's ravion_dns_zone_id." + default = null +} + +variable "ravion_parent_domain_allocation_id" { + type = string + description = "Cluster's DomainAllocation id, from `module.ecs_cluster.ravion_cluster_domain_allocation_id`. When null/empty, no Ravion FQDN is allocated." + default = null +} + +variable "ravion_cluster_alb_dns_name" { + type = string + description = "Cluster ALB DNS name, from `module.ecs_cluster.public_alb_dns_name`. Required when ravion_parent_domain_allocation_id is set." + default = null +} + +variable "ravion_cluster_alb_zone_id" { + type = string + description = "Cluster ALB hosted-zone id, from `module.ecs_cluster.public_alb_zone_id`. Required when ravion_parent_domain_allocation_id is set." + default = null +} + +variable "ravion_cluster_https_listener_arn" { + type = string + description = "Cluster HTTPS listener ARN, from `module.ecs_cluster.public_alb_https_listener_arn`. Required when ravion_parent_domain_allocation_id is set so the host-header rule can be created." + default = null +} + +variable "ravion_service_slug" { + type = string + description = "Human-readable slug used to derive the service FQDN under the cluster wildcard. Defaults to var.name." + default = null +} + +variable "ravion_listener_rule_priority" { + type = number + description = "Explicit listener-rule priority. 0 → derived deterministically from var.name (sha256-based) so two services in the same cluster don't collide." + default = 0 +} diff --git a/compute/ecs_service/versions.tf b/compute/ecs_service/versions.tf index bec739b..33c6394 100644 --- a/compute/ecs_service/versions.tf +++ b/compute/ecs_service/versions.tf @@ -12,7 +12,9 @@ terraform { source = "hashicorp/aws" version = ">= 6.0" } + ravion = { + source = "ravion.com/ravion/domains" + version = ">= 1.0.0" + } } } - - From f0e497a4ea42dcf20566a9fa1c75d276ce6ddd63 Mon Sep 17 00:00:00 2001 From: Siddharth Suresh Date: Wed, 20 May 2026 13:25:46 +0530 Subject: [PATCH 02/37] feat: cluster cert-source toggle + service domains list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit compute/ecs_cluster/module.yml gets the certificate source toggle — "Ravion managed (wildcard cert + FQDN auto-allocated)" vs "Bring your own ACM cert". Ravion-managed mode reveals a DNS Zone dropdown (populated via \$values:ravion/dns-zones — picks the platform-managed Ravion apex or any customer-owned zone) and an optional FQDN slug. When the cluster is in Ravion-managed mode: - ravion_domain.cluster allocates `*.-.` - aws_acm_certificate.cluster issues the wildcard cert - ravion_dns_records.cluster_validation lands the ACM validation CNAMEs in the configured zone (synchronous via the RavionRoute53Writer) - aws_acm_certificate_validation blocks ~30s on ACM - The validated cert ARN gets prepended to the ALB listener's certificate_arns (default cert); customer BYO arns can still be added for SNI on custom domains - ravion_managed_certificate.cluster records the metadata for the UI compute/ecs_cluster outputs add: ravion_managed_domains_enabled (bool) ← gates the service module ravion_dns_zone_id ravion_cluster_domain_allocation_id ← parent for service alloc ravion_cluster_managed_domain_id ravion_cluster_fqdn ravion_cluster_certificate_arn compute/ecs_service: - New `ravion_domains` variable (list of slugs, DNS-validated). - ravion_domains.tf now iterates the list — one ravion_domain + ravion_dns_records + aws_lb_listener_rule per entry, all keyed by toset(domains) so add/remove is a clean diff. - Listener-rule priorities derived deterministically from sha256("\${var.name}:\${domain}") so two services in the same cluster never collide. Caller can pin a base via ravion_listener_rule_priority_base. - Empty list = service is reachable via the cluster's apex wildcard only; no per-service FQDN. Same gating as before (ravion_parent_domain_allocation_id null/empty). - Outputs: ravion_domain_fqdns map + ravion_domain_allocation_ids map. No per-service ACM cert — the cluster's wildcard covers every FQDN allocated under it via SNI. (api-go \$values endpoint lives in the matching commit on the flightcontrol branch.) Co-Authored-By: Claude Opus 4.7 (1M context) --- compute/ecs_cluster/load_balancers.tf | 6 +- compute/ecs_cluster/locals.tf | 21 ++++++ compute/ecs_cluster/module.yml | 40 +++++++++++ compute/ecs_cluster/outputs.tf | 10 +++ compute/ecs_cluster/ravion_domains.tf | 6 +- compute/ecs_cluster/variables.tf | 15 ++++- compute/ecs_service/outputs.tf | 14 ++++ compute/ecs_service/ravion_domains.tf | 95 ++++++++++++++++----------- compute/ecs_service/variables.tf | 17 ++++- 9 files changed, 174 insertions(+), 50 deletions(-) diff --git a/compute/ecs_cluster/load_balancers.tf b/compute/ecs_cluster/load_balancers.tf index 9eaec97..4a90630 100644 --- a/compute/ecs_cluster/load_balancers.tf +++ b/compute/ecs_cluster/load_balancers.tf @@ -19,8 +19,10 @@ module "public_alb" { enable_https_listener = var.public_alb_enable_https http_to_https_redirect = var.public_alb_enable_https - # SSL/TLS - certificate_arns = var.public_alb_certificate_arns + # SSL/TLS — when the cluster manages domains via Ravion, the wildcard + # cert issued in ravion_domains.tf is wired in as the default; the + # BYO list is appended for SNI on customer-supplied domains. + certificate_arns = local.public_alb_effective_certificate_arns ssl_policy = var.public_alb_ssl_policy # ALB settings diff --git a/compute/ecs_cluster/locals.tf b/compute/ecs_cluster/locals.tf index 4db1e88..ec2db7b 100644 --- a/compute/ecs_cluster/locals.tf +++ b/compute/ecs_cluster/locals.tf @@ -1,5 +1,26 @@ locals { region = coalesce(var.region, data.aws_region.current.id) + + # Ravion-managed domains gate. When true the cluster allocates a + # wildcard FQDN + issues a wildcard ACM cert in ravion_domains.tf; + # service modules under this cluster inherit the wildcard via SNI. + enable_ravion_domain = ( + var.enable_public_alb && + var.public_alb_enable_https && + var.public_alb_cert_source == "ravion_managed" && + var.ravion_dns_zone_id != null && + var.ravion_dns_zone_id != "" + ) + + # The ALB's HTTPS listener takes a single default cert + N SNI extras. + # Ravion-managed mode puts the wildcard first (default); BYO mode uses + # the customer's list verbatim. Using the validation resource's output + # ensures the listener depends on ACM validation completing. + public_alb_effective_certificate_arns = ( + local.enable_ravion_domain + ? concat([aws_acm_certificate_validation.cluster[0].certificate_arn], var.public_alb_certificate_arns) + : var.public_alb_certificate_arns + ) } ################################################################################ diff --git a/compute/ecs_cluster/module.yml b/compute/ecs_cluster/module.yml index 7232a55..c76fe6d 100644 --- a/compute/ecs_cluster/module.yml +++ b/compute/ecs_cluster/module.yml @@ -324,6 +324,38 @@ input: default: false show_when: enable_public_alb: true + - public_alb_cert_source: + type: string + label: "Certificate Source" + description: "Where the HTTPS listener's default certificate comes from. \"Ravion managed\" allocates a wildcard FQDN under your selected DNS zone and issues a wildcard ACM cert covering services in this cluster. \"Bring your own\" expects you to supply ACM ARNs directly." + default: "ravion_managed" + show_when: + enable_public_alb: true + public_alb_enable_https: true + values: + - value: "ravion_managed" + label: "Ravion managed (wildcard cert + FQDN auto-allocated)" + - value: "byo" + label: "Bring your own ACM cert" + - ravion_dns_zone_id: + type: string + label: "DNS Zone" + description: "Ravion DnsZone the cluster's wildcard FQDN lives under. Choose the platform-managed Ravion apex (default) or a customer-owned zone registered on the DNS Zones settings page." + show_when: + enable_public_alb: true + public_alb_enable_https: true + public_alb_cert_source: "ravion_managed" + validation: + required: true + values: "$values:ravion/dns-zones" + - ravion_cluster_slug: + type: string + label: "FQDN Slug" + description: "Human-readable slug used to derive the cluster's FQDN (-.). Defaults to the cluster name when empty." + show_when: + enable_public_alb: true + public_alb_enable_https: true + public_alb_cert_source: "ravion_managed" - public_alb_certificate_arns: type: list label: "ACM Certificate ARNs" @@ -331,6 +363,7 @@ input: show_when: enable_public_alb: true public_alb_enable_https: true + public_alb_cert_source: "byo" validation: required: true min_length: 1 @@ -515,6 +548,13 @@ output: public_alb_security_group_id: string public_alb_http_listener_arn: string public_alb_https_listener_arn: string + # Ravion managed domains + ravion_managed_domains_enabled: boolean + ravion_cluster_domain_allocation_id: string + ravion_cluster_managed_domain_id: string + ravion_cluster_fqdn: string + ravion_cluster_certificate_arn: string + ravion_dns_zone_id: string # Private ALB private_alb_arn: string private_alb_id: string diff --git a/compute/ecs_cluster/outputs.tf b/compute/ecs_cluster/outputs.tf index 5aef3ef..c157a6d 100644 --- a/compute/ecs_cluster/outputs.tf +++ b/compute/ecs_cluster/outputs.tf @@ -248,6 +248,16 @@ output "region" { # inherit the cluster's wildcard cert via SNI. ################################################################################ +output "ravion_managed_domains_enabled" { + description = "True when this cluster's HTTPS listener uses a Ravion-managed wildcard cert. Service modules under the cluster use this to decide whether to allocate child FQDNs + create host-header listener rules." + value = local.enable_ravion_domain +} + +output "ravion_dns_zone_id" { + description = "DnsZone id the cluster's wildcard lives under. Passes through to service modules so they allocate under the same zone." + value = local.enable_ravion_domain ? var.ravion_dns_zone_id : null +} + output "ravion_cluster_domain_allocation_id" { description = "DomainAllocation id of the cluster's wildcard. Pass to ecs_service.ravion_parent_domain_allocation_id so service FQDNs sit under the wildcard." value = local.enable_ravion_domain ? ravion_domain.cluster[0].id : null diff --git a/compute/ecs_cluster/ravion_domains.tf b/compute/ecs_cluster/ravion_domains.tf index 4cd8a44..8474e44 100644 --- a/compute/ecs_cluster/ravion_domains.tf +++ b/compute/ecs_cluster/ravion_domains.tf @@ -22,11 +22,9 @@ # runner with their IAM. Ravion never holds customer credentials. ################################################################################ -locals { - enable_ravion_domain = var.ravion_dns_zone_id != null && var.ravion_dns_zone_id != "" -} - # 1. Allocate the cluster's wildcard FQDN. +# The local.enable_ravion_domain gate lives in locals.tf next to the +# ALB-cert-source toggle since both branches need to agree. resource "ravion_domain" "cluster" { count = local.enable_ravion_domain ? 1 : 0 dns_zone_id = var.ravion_dns_zone_id diff --git a/compute/ecs_cluster/variables.tf b/compute/ecs_cluster/variables.tf index e52e0d1..3030d75 100644 --- a/compute/ecs_cluster/variables.tf +++ b/compute/ecs_cluster/variables.tf @@ -634,14 +634,25 @@ variable "region" { # Ravion domain control plane ################################################################################ +variable "public_alb_cert_source" { + type = string + description = "Where the public ALB's default HTTPS certificate comes from. `ravion_managed` allocates a wildcard FQDN under ravion_dns_zone_id and issues a wildcard ACM cert that the listener uses as its default; service modules under this cluster inherit the wildcard via SNI. `byo` expects public_alb_certificate_arns instead." + default = "ravion_managed" + + validation { + condition = contains(["ravion_managed", "byo"], var.public_alb_cert_source) + error_message = "public_alb_cert_source must be one of: ravion_managed, byo." + } +} + variable "ravion_dns_zone_id" { type = string - description = "Ravion DnsZone id (dzn_*) the cluster's wildcard allocation lives under. Typically the platform-owned Ravion apex zone, looked up via the Ravion API or injected by tower-go. When null/empty, the Ravion-domain plumbing is skipped entirely." + description = "Ravion DnsZone id (dzn_*) the cluster's wildcard allocation lives under. Required when public_alb_cert_source = \"ravion_managed\". Pick the platform-owned Ravion apex zone or a customer-owned zone registered on the DNS Zones settings page." default = null } variable "ravion_cluster_slug" { type = string - description = "Human-readable slug used to derive the cluster's FQDN (`-.ravion.app`). Defaults to var.name when null." + description = "Human-readable slug used to derive the cluster's FQDN (`-.`). Defaults to var.name when null." default = null } diff --git a/compute/ecs_service/outputs.tf b/compute/ecs_service/outputs.tf index a21ab18..537f04e 100644 --- a/compute/ecs_service/outputs.tf +++ b/compute/ecs_service/outputs.tf @@ -249,3 +249,17 @@ output "region" { } + +################################################################################ +# Ravion domain control plane outputs +################################################################################ + +output "ravion_domain_fqdns" { + description = "Map of (domain slug → resolved FQDN) for every allocation made under the cluster wildcard. Empty when ravion_managed = false or var.ravion_domains is empty." + value = { for slug, alloc in ravion_domain.this : slug => alloc.fqdn } +} + +output "ravion_domain_allocation_ids" { + description = "Map of (domain slug → DomainAllocation id) so downstream resources (e.g. a ravion_managed_certificate for a SAN cert) can reference each allocation." + value = { for slug, alloc in ravion_domain.this : slug => alloc.id } +} diff --git a/compute/ecs_service/ravion_domains.tf b/compute/ecs_service/ravion_domains.tf index 1ea52c8..3000df9 100644 --- a/compute/ecs_service/ravion_domains.tf +++ b/compute/ecs_service/ravion_domains.tf @@ -1,49 +1,66 @@ ################################################################################ -# Ravion domain control plane — service auto-domain +# Ravion domain control plane — per-service domain allocations # -# Allocates a child FQDN under the cluster's wildcard (e.g. -# `-.`) so the service inherits the -# cluster's wildcard cert via SNI without needing its own ACM cert. +# When the parent cluster module is configured for Ravion-managed +# domains (`module.ecs_cluster.ravion_managed_domains_enabled`), each +# entry in var.ravion_domains gets: # -# Resources: -# ravion_domain.auto — child allocation under cluster -# ravion_dns_records.auto_routing — CNAME pointing the FQDN at the -# cluster's public ALB -# aws_lb_listener_rule.ravion — host-header rule routing the FQDN -# to this service's target group +# ravion_domain.this[d] — child allocation under the cluster +# ravion_dns_records.this[d] — CNAME pointing the FQDN at the ALB +# aws_lb_listener_rule.ravion[d] — host-header rule on the cluster's +# HTTPS listener # -# No per-service ACM cert — the cluster's wildcard covers this FQDN. -# No ravion_managed_certificate either — the cluster's -# ravion_managed_certificate already advertises the cert. +# No per-domain ACM cert — the cluster's wildcard covers every FQDN +# allocated under it via SNI. No ravion_managed_certificate either — +# the cluster's ravion_managed_certificate already advertises the cert. +# +# Empty var.ravion_domains = the service is reachable via the cluster's +# apex wildcard only; nothing is allocated here. ################################################################################ locals { - ravion_managed = var.ravion_parent_domain_allocation_id != null && var.ravion_parent_domain_allocation_id != "" - ravion_has_listener = var.ravion_cluster_https_listener_arn != null && var.ravion_cluster_https_listener_arn != "" - # Deterministic per-service priority so two services in the same - # cluster don't collide on listener-rule priority. SHA-256 → 16-bit - # hex digest → mod 49000 + offset 1000 to stay clear of the lower - # reserved range. - ravion_priority = var.ravion_listener_rule_priority > 0 ? var.ravion_listener_rule_priority : (parseint(substr(sha256(var.name), 0, 4), 16) % 49000) + 1000 + ravion_managed = ( + var.ravion_parent_domain_allocation_id != null && + var.ravion_parent_domain_allocation_id != "" + ) + ravion_has_listener = ( + var.ravion_cluster_https_listener_arn != null && + var.ravion_cluster_https_listener_arn != "" + ) + ravion_domain_set = local.ravion_managed ? toset(var.ravion_domains) : toset([]) + + # Deterministic per-(service, domain) priority so two services in the + # same cluster don't collide on listener-rule priority. SHA-256 → + # 16-bit hex digest → mod 49000 + offset 1000 to stay clear of the + # lower reserved range. When the caller pins a base, sort the domain + # slugs and assign +0, +1, +2, ... so re-applies stay stable. + ravion_sorted_domains = sort(var.ravion_domains) + ravion_priority_for_domain = { + for idx, d in local.ravion_sorted_domains : + d => ( + var.ravion_listener_rule_priority_base > 0 + ? var.ravion_listener_rule_priority_base + idx + : (parseint(substr(sha256("${var.name}:${d}"), 0, 4), 16) % 49000) + 1000 + ) + } } -# 1. Allocate the child FQDN under the cluster. -resource "ravion_domain" "auto" { - count = local.ravion_managed ? 1 : 0 +# 1. Allocate one child FQDN per entry in var.ravion_domains. +resource "ravion_domain" "this" { + for_each = local.ravion_domain_set + dns_zone_id = var.ravion_dns_zone_id - slug = coalesce(var.ravion_service_slug, var.name) + slug = each.value parent_domain_allocation_id = var.ravion_parent_domain_allocation_id } -# 2. Routing CNAME — FQDN points at the cluster's public ALB. The -# cluster's ravion_dns_records.cluster_routing handles the wildcard -# apex; this is the per-service leaf so DNS resolution hits the ALB -# directly. -resource "ravion_dns_records" "auto_routing" { - count = local.ravion_managed ? 1 : 0 - managed_domain_id = ravion_domain.auto[0].id +# 2. Routing CNAME — each FQDN points at the cluster's public ALB. +resource "ravion_dns_records" "this" { + for_each = local.ravion_domain_set + + managed_domain_id = ravion_domain.this[each.value].id records = [{ - name = ravion_domain.auto[0].fqdn + name = ravion_domain.this[each.value].fqdn type = "ALIAS" value = jsonencode({ dns_name = var.ravion_cluster_alb_dns_name @@ -52,19 +69,19 @@ resource "ravion_dns_records" "auto_routing" { }] } -# 3. Listener rule — host-header match routes this service's FQDN to -# the service's target group on the cluster's HTTPS listener. The -# cluster cert covers `*.` so SNI handshake succeeds -# without an explicit aws_lb_listener_certificate attachment. +# 3. Listener rule — host-header match routes each FQDN to this service's +# target group on the cluster's HTTPS listener. The cluster cert covers +# `*.` so SNI handshake succeeds without an explicit +# aws_lb_listener_certificate attachment. resource "aws_lb_listener_rule" "ravion" { - count = local.ravion_managed && local.ravion_has_listener ? 1 : 0 + for_each = local.ravion_has_listener ? local.ravion_domain_set : toset([]) listener_arn = var.ravion_cluster_https_listener_arn - priority = local.ravion_priority + priority = local.ravion_priority_for_domain[each.value] condition { host_header { - values = [ravion_domain.auto[0].fqdn] + values = [ravion_domain.this[each.value].fqdn] } } diff --git a/compute/ecs_service/variables.tf b/compute/ecs_service/variables.tf index a1efc09..38d5cbf 100644 --- a/compute/ecs_service/variables.tf +++ b/compute/ecs_service/variables.tf @@ -641,12 +641,23 @@ variable "ravion_cluster_https_listener_arn" { variable "ravion_service_slug" { type = string - description = "Human-readable slug used to derive the service FQDN under the cluster wildcard. Defaults to var.name." + description = "Human-readable slug used to derive the service FQDN under the cluster wildcard. Defaults to var.name. Applied to every domain in ravion_domains (each gets its own `-.` allocation)." default = null } -variable "ravion_listener_rule_priority" { +variable "ravion_domains" { + type = list(string) + description = "Domain slugs to allocate under the cluster wildcard. Each entry becomes a child DomainAllocation, a routing record (FQDN → cluster ALB), and a host-header listener rule. Empty list (default) means the service is reachable only via the cluster's apex wildcard; no per-service FQDN is allocated. Each entry is a slug — Ravion derives `-.` deterministically so re-applies converge." + default = [] + + validation { + condition = alltrue([for d in var.ravion_domains : can(regex("^[a-z0-9]([a-z0-9-]*[a-z0-9])?$", d)) && length(d) <= 63]) + error_message = "Each ravion_domains entry must be a DNS-safe slug ([a-z0-9-], up to 63 chars)." + } +} + +variable "ravion_listener_rule_priority_base" { type = number - description = "Explicit listener-rule priority. 0 → derived deterministically from var.name (sha256-based) so two services in the same cluster don't collide." + description = "Base for the per-domain listener-rule priority. 0 → derived deterministically from (var.name, domain-slug) via sha256 so two services in the same cluster don't collide. Non-zero values are used as-is for the first domain; subsequent domains increment by 1." default = 0 } From 6dbc42cc02de099cc38b330db3bfe1354eff6845 Mon Sep 17 00:00:00 2001 From: Siddharth Suresh Date: Tue, 19 May 2026 21:56:28 +0530 Subject: [PATCH 03/37] Revert try(..., true) removals from variables.tf files Restores the validation conditions in compute/{autoscaling,ecs_cluster,ecs_service} and networking/{alb,nlb} variables.tf to their pre-03f87c3 state. The other changes in 03f87c3 (Ravion module.yml + listener wiring fixes) remain. Co-Authored-By: Claude Opus 4.7 (1M context) --- compute/autoscaling/variables.tf | 128 ++++++++++++++++++++++--------- compute/ecs_cluster/variables.tf | 12 +-- compute/ecs_service/variables.tf | 6 +- networking/alb/variables.tf | 6 +- networking/nlb/variables.tf | 8 +- 5 files changed, 108 insertions(+), 52 deletions(-) diff --git a/compute/autoscaling/variables.tf b/compute/autoscaling/variables.tf index 7bce8be..edc1337 100644 --- a/compute/autoscaling/variables.tf +++ b/compute/autoscaling/variables.tf @@ -50,7 +50,7 @@ variable "desired_capacity" { default = null validation { - condition = var.desired_capacity == null || var.desired_capacity >= 0 + condition = try(var.desired_capacity == null || var.desired_capacity >= 0, true) error_message = "The desired_capacity must be null or 0 or greater." } } @@ -91,7 +91,7 @@ variable "default_instance_warmup" { default = null validation { - condition = var.default_instance_warmup == null || var.default_instance_warmup >= 0 + condition = try(var.default_instance_warmup == null || var.default_instance_warmup >= 0, true) error_message = "The default_instance_warmup must be null or 0 or greater." } } @@ -118,7 +118,7 @@ variable "max_instance_lifetime" { default = null validation { - condition = var.max_instance_lifetime == null || var.max_instance_lifetime == 0 || (var.max_instance_lifetime >= 86400 && var.max_instance_lifetime <= 31536000) + condition = try(var.max_instance_lifetime == null || var.max_instance_lifetime == 0 || (var.max_instance_lifetime >= 86400 && var.max_instance_lifetime <= 31536000), true) error_message = "The max_instance_lifetime must be null, 0, or between 86400 (1 day) and 31536000 (365 days)." } } @@ -238,7 +238,7 @@ variable "service_linked_role_arn" { default = null validation { - condition = var.service_linked_role_arn == null || can(regex("^arn:aws:iam::", var.service_linked_role_arn)) + condition = try(var.service_linked_role_arn == null || can(regex("^arn:aws:iam::", var.service_linked_role_arn)), true) error_message = "The service_linked_role_arn must be null or a valid IAM role ARN starting with 'arn:aws:iam::'." } } @@ -297,7 +297,7 @@ variable "launch_template_id" { default = null validation { - condition = var.launch_template_id == null || can(regex("^lt-", var.launch_template_id)) + condition = try(var.launch_template_id == null || can(regex("^lt-", var.launch_template_id)), true) error_message = "The launch_template_id must be null or a valid launch template ID starting with 'lt-'." } } @@ -594,36 +594,44 @@ variable "mixed_instances_policy" { default = null validation { - condition = var.mixed_instances_policy == null || ( + condition = try( +var.mixed_instances_policy == null || ( var.mixed_instances_policy.instances_distribution == null || contains(["prioritized", "lowest-price"], coalesce(var.mixed_instances_policy.instances_distribution.on_demand_allocation_strategy, "prioritized")) ) + , true) error_message = "The on_demand_allocation_strategy must be 'prioritized' or 'lowest-price'." } validation { - condition = var.mixed_instances_policy == null || ( + condition = try( +var.mixed_instances_policy == null || ( var.mixed_instances_policy.instances_distribution == null || contains(["capacity-optimized", "capacity-optimized-prioritized", "lowest-price", "price-capacity-optimized"], coalesce(var.mixed_instances_policy.instances_distribution.spot_allocation_strategy, "capacity-optimized")) ) + , true) error_message = "The spot_allocation_strategy must be 'capacity-optimized', 'capacity-optimized-prioritized', 'lowest-price', or 'price-capacity-optimized'." } validation { - condition = var.mixed_instances_policy == null || ( + condition = try( +var.mixed_instances_policy == null || ( var.mixed_instances_policy.instances_distribution == null || ( coalesce(var.mixed_instances_policy.instances_distribution.on_demand_percentage_above_base_capacity, 100) >= 0 && coalesce(var.mixed_instances_policy.instances_distribution.on_demand_percentage_above_base_capacity, 100) <= 100 ) ) + , true) error_message = "The on_demand_percentage_above_base_capacity must be between 0 and 100." } validation { - condition = var.mixed_instances_policy == null || ( + condition = try( +var.mixed_instances_policy == null || ( var.mixed_instances_policy.instances_distribution == null || coalesce(var.mixed_instances_policy.instances_distribution.on_demand_base_capacity, 0) >= 0 ) + , true) error_message = "The on_demand_base_capacity must be 0 or greater." } } @@ -678,47 +686,59 @@ variable "instance_refresh" { default = null validation { - condition = var.instance_refresh == null || ( + condition = try( +var.instance_refresh == null || ( contains(["Rolling"], coalesce(var.instance_refresh.strategy, "Rolling")) ) + , true) error_message = "The instance_refresh strategy must be 'Rolling'." } validation { - condition = var.instance_refresh == null || var.instance_refresh.preferences == null || ( + condition = try( +var.instance_refresh == null || var.instance_refresh.preferences == null || ( coalesce(var.instance_refresh.preferences.min_healthy_percentage, 90) >= 0 && coalesce(var.instance_refresh.preferences.min_healthy_percentage, 90) <= 100 ) + , true) error_message = "The min_healthy_percentage must be between 0 and 100." } validation { - condition = var.instance_refresh == null || var.instance_refresh.preferences == null || ( + condition = try( +var.instance_refresh == null || var.instance_refresh.preferences == null || ( coalesce(var.instance_refresh.preferences.max_healthy_percentage, 100) >= 100 && coalesce(var.instance_refresh.preferences.max_healthy_percentage, 100) <= 200 ) + , true) error_message = "The max_healthy_percentage must be between 100 and 200." } validation { - condition = var.instance_refresh == null || var.instance_refresh.preferences == null || ( + condition = try( +var.instance_refresh == null || var.instance_refresh.preferences == null || ( contains(["Refresh", "Ignore", "Wait"], coalesce(var.instance_refresh.preferences.scale_in_protected_instances, "Ignore")) ) + , true) error_message = "The scale_in_protected_instances must be 'Refresh', 'Ignore', or 'Wait'." } validation { - condition = var.instance_refresh == null || var.instance_refresh.preferences == null || ( + condition = try( +var.instance_refresh == null || var.instance_refresh.preferences == null || ( contains(["Terminate", "Ignore", "Wait"], coalesce(var.instance_refresh.preferences.standby_instances, "Ignore")) ) + , true) error_message = "The standby_instances must be 'Terminate', 'Ignore', or 'Wait'." } validation { - condition = var.instance_refresh == null || var.instance_refresh.preferences == null || ( + condition = try( +var.instance_refresh == null || var.instance_refresh.preferences == null || ( var.instance_refresh.preferences.checkpoint_percentages == null || alltrue([for p in var.instance_refresh.preferences.checkpoint_percentages : p >= 0 && p <= 100]) ) + , true) error_message = "All checkpoint_percentages must be between 0 and 100." } } @@ -752,24 +772,30 @@ variable "warm_pool" { default = null validation { - condition = var.warm_pool == null || ( + condition = try( +var.warm_pool == null || ( contains(["Stopped", "Running", "Hibernated"], coalesce(var.warm_pool.pool_state, "Stopped")) ) + , true) error_message = "The pool_state must be 'Stopped', 'Running', or 'Hibernated'." } validation { - condition = var.warm_pool == null || ( + condition = try( +var.warm_pool == null || ( coalesce(var.warm_pool.min_size, 0) >= 0 ) + , true) error_message = "The warm_pool min_size must be 0 or greater." } validation { - condition = var.warm_pool == null || ( + condition = try( +var.warm_pool == null || ( var.warm_pool.max_group_prepared_capacity == null || var.warm_pool.max_group_prepared_capacity >= 0 ) + , true) error_message = "The max_group_prepared_capacity must be null or 0 or greater." } } @@ -834,18 +860,22 @@ variable "lifecycle_hooks" { } validation { - condition = alltrue([ + condition = try( +alltrue([ for hook in var.lifecycle_hooks : hook.notification_target_arn == null || can(regex("^arn:aws:(sns|sqs):", hook.notification_target_arn)) ]) + , true) error_message = "Each notification_target_arn must be null or a valid SNS topic or SQS queue ARN." } validation { - condition = alltrue([ + condition = try( +alltrue([ for hook in var.lifecycle_hooks : hook.role_arn == null || can(regex("^arn:aws:iam::", hook.role_arn)) ]) + , true) error_message = "Each role_arn must be null or a valid IAM role ARN." } } @@ -1087,23 +1117,28 @@ variable "scaling_policies" { } validation { - condition = alltrue([ + condition = try( +alltrue([ for policy in var.scaling_policies : policy.adjustment_type == null || contains(["ChangeInCapacity", "ExactCapacity", "PercentChangeInCapacity"], policy.adjustment_type) ]) + , true) error_message = "Each adjustment_type must be 'ChangeInCapacity', 'ExactCapacity', or 'PercentChangeInCapacity'." } validation { - condition = alltrue([ + condition = try( +alltrue([ for policy in var.scaling_policies : policy.metric_aggregation_type == null || contains(["Minimum", "Maximum", "Average"], policy.metric_aggregation_type) ]) + , true) error_message = "Each metric_aggregation_type must be 'Minimum', 'Maximum', or 'Average'." } validation { - condition = alltrue([ + condition = try( +alltrue([ for policy in var.scaling_policies : policy.target_tracking_configuration == null || policy.target_tracking_configuration.predefined_metric_specification == null || @@ -1112,44 +1147,53 @@ variable "scaling_policies" { policy.target_tracking_configuration.predefined_metric_specification.predefined_metric_type ) ]) + , true) error_message = "Each predefined_metric_type for target tracking must be 'ASGAverageCPUUtilization', 'ASGAverageNetworkIn', 'ASGAverageNetworkOut', or 'ALBRequestCountPerTarget'." } validation { - condition = alltrue([ + condition = try( +alltrue([ for policy in var.scaling_policies : policy.predictive_scaling_configuration == null || contains(["ForecastAndScale", "ForecastOnly"], coalesce(policy.predictive_scaling_configuration.mode, "ForecastOnly")) ]) + , true) error_message = "Each predictive scaling mode must be 'ForecastAndScale' or 'ForecastOnly'." } validation { - condition = alltrue([ + condition = try( +alltrue([ for policy in var.scaling_policies : policy.predictive_scaling_configuration == null || contains(["IncreaseMaxCapacity", "HonorMaxCapacity"], coalesce(policy.predictive_scaling_configuration.max_capacity_breach_behavior, "HonorMaxCapacity")) ]) + , true) error_message = "Each max_capacity_breach_behavior must be 'IncreaseMaxCapacity' or 'HonorMaxCapacity'." } validation { - condition = alltrue([ + condition = try( +alltrue([ for policy in var.scaling_policies : policy.predictive_scaling_configuration == null || policy.predictive_scaling_configuration.scheduling_buffer_time == null || (policy.predictive_scaling_configuration.scheduling_buffer_time >= 0 && policy.predictive_scaling_configuration.scheduling_buffer_time <= 3600) ]) + , true) error_message = "Each scheduling_buffer_time must be between 0 and 3600 seconds." } validation { - condition = alltrue([ + condition = try( +alltrue([ for policy in var.scaling_policies : policy.predictive_scaling_configuration == null || policy.predictive_scaling_configuration.max_capacity_buffer == null || (policy.predictive_scaling_configuration.max_capacity_buffer >= 0 && policy.predictive_scaling_configuration.max_capacity_buffer <= 100) ]) + , true) error_message = "Each max_capacity_buffer must be between 0 and 100." } } @@ -1181,14 +1225,17 @@ variable "notifications" { default = null validation { - condition = var.notifications == null || ( + condition = try( +var.notifications == null || ( can(regex("^arn:aws:sns:", var.notifications.topic_arn)) ) + , true) error_message = "The topic_arn must be a valid SNS topic ARN starting with 'arn:aws:sns:'." } validation { - condition = var.notifications == null || ( + condition = try( +var.notifications == null || ( alltrue([ for notification in coalesce(var.notifications.notifications, []) : contains([ @@ -1200,6 +1247,7 @@ variable "notifications" { ], notification) ]) ) + , true) error_message = "Each notification must be one of: 'autoscaling:EC2_INSTANCE_LAUNCH', 'autoscaling:EC2_INSTANCE_LAUNCH_ERROR', 'autoscaling:EC2_INSTANCE_TERMINATE', 'autoscaling:EC2_INSTANCE_TERMINATE_ERROR', 'autoscaling:TEST_NOTIFICATION'." } } @@ -1294,26 +1342,32 @@ variable "schedules" { } validation { - condition = alltrue([ + condition = try( +alltrue([ for schedule in var.schedules : schedule.min_size == null || schedule.min_size >= 0 ]) + , true) error_message = "Each schedule min_size must be null or 0 or greater." } validation { - condition = alltrue([ + condition = try( +alltrue([ for schedule in var.schedules : schedule.max_size == null || schedule.max_size >= 1 ]) + , true) error_message = "Each schedule max_size must be null or at least 1." } validation { - condition = alltrue([ + condition = try( +alltrue([ for schedule in var.schedules : schedule.desired_capacity == null || schedule.desired_capacity >= 0 ]) + , true) error_message = "Each schedule desired_capacity must be null or 0 or greater." } } @@ -1336,17 +1390,19 @@ variable "instance_maintenance_policy" { default = null validation { - condition = var.instance_maintenance_policy == null || ( + condition = try( coalesce(var.instance_maintenance_policy.min_healthy_percentage, 90) >= 0 && - coalesce(var.instance_maintenance_policy.min_healthy_percentage, 90) <= 100 + coalesce(var.instance_maintenance_policy.min_healthy_percentage, 90) <= 100, + true ) error_message = "The min_healthy_percentage must be between 0 and 100." } validation { - condition = var.instance_maintenance_policy == null || ( + condition = try( coalesce(var.instance_maintenance_policy.max_healthy_percentage, 120) >= 100 && - coalesce(var.instance_maintenance_policy.max_healthy_percentage, 120) <= 200 + coalesce(var.instance_maintenance_policy.max_healthy_percentage, 120) <= 200, + true ) error_message = "The max_healthy_percentage must be between 100 and 200." } diff --git a/compute/ecs_cluster/variables.tf b/compute/ecs_cluster/variables.tf index 3030d75..2ff616f 100644 --- a/compute/ecs_cluster/variables.tf +++ b/compute/ecs_cluster/variables.tf @@ -169,7 +169,7 @@ variable "ec2_ami_id" { default = null validation { - condition = var.ec2_ami_id == null || can(regex("^ami-", var.ec2_ami_id)) + condition = try(var.ec2_ami_id == null || can(regex("^ami-", var.ec2_ami_id)), true) error_message = "The ec2_ami_id must be a valid AMI ID starting with 'ami-'." } } @@ -414,7 +414,7 @@ variable "public_alb_access_logs_bucket_arn" { default = null validation { - condition = var.public_alb_access_logs_bucket_arn == null || can(regex("^arn:aws:s3:::", var.public_alb_access_logs_bucket_arn)) + condition = try(var.public_alb_access_logs_bucket_arn == null || can(regex("^arn:aws:s3:::", var.public_alb_access_logs_bucket_arn)), true) error_message = "The public_alb_access_logs_bucket_arn must be a valid S3 bucket ARN." } } @@ -425,7 +425,7 @@ variable "public_alb_web_acl_arn" { default = null validation { - condition = var.public_alb_web_acl_arn == null || can(regex("^arn:aws:wafv2:", var.public_alb_web_acl_arn)) + condition = try(var.public_alb_web_acl_arn == null || can(regex("^arn:aws:wafv2:", var.public_alb_web_acl_arn)), true) error_message = "The public_alb_web_acl_arn must be a valid WAFv2 Web ACL ARN." } } @@ -497,7 +497,7 @@ variable "private_alb_access_logs_bucket_arn" { default = null validation { - condition = var.private_alb_access_logs_bucket_arn == null || can(regex("^arn:aws:s3:::", var.private_alb_access_logs_bucket_arn)) + condition = try(var.private_alb_access_logs_bucket_arn == null || can(regex("^arn:aws:s3:::", var.private_alb_access_logs_bucket_arn)), true) error_message = "The private_alb_access_logs_bucket_arn must be a valid S3 bucket ARN." } } @@ -541,7 +541,7 @@ variable "public_nlb_access_logs_bucket_arn" { default = null validation { - condition = var.public_nlb_access_logs_bucket_arn == null || can(regex("^arn:aws:s3:::", var.public_nlb_access_logs_bucket_arn)) + condition = try(var.public_nlb_access_logs_bucket_arn == null || can(regex("^arn:aws:s3:::", var.public_nlb_access_logs_bucket_arn)), true) error_message = "The public_nlb_access_logs_bucket_arn must be a valid S3 bucket ARN." } } @@ -602,7 +602,7 @@ variable "private_nlb_access_logs_bucket_arn" { default = null validation { - condition = var.private_nlb_access_logs_bucket_arn == null || can(regex("^arn:aws:s3:::", var.private_nlb_access_logs_bucket_arn)) + condition = try(var.private_nlb_access_logs_bucket_arn == null || can(regex("^arn:aws:s3:::", var.private_nlb_access_logs_bucket_arn)), true) error_message = "The private_nlb_access_logs_bucket_arn must be a valid S3 bucket ARN." } } diff --git a/compute/ecs_service/variables.tf b/compute/ecs_service/variables.tf index 38d5cbf..4268c44 100644 --- a/compute/ecs_service/variables.tf +++ b/compute/ecs_service/variables.tf @@ -201,7 +201,7 @@ variable "execution_role_arn" { default = null validation { - condition = var.execution_role_arn == null || can(regex("^arn:aws:iam::", var.execution_role_arn)) + condition = try(var.execution_role_arn == null || can(regex("^arn:aws:iam::", var.execution_role_arn)), true) error_message = "The execution_role_arn must be a valid IAM role ARN." } } @@ -212,7 +212,7 @@ variable "task_role_arn" { default = null validation { - condition = var.task_role_arn == null || can(regex("^arn:aws:iam::", var.task_role_arn)) + condition = try(var.task_role_arn == null || can(regex("^arn:aws:iam::", var.task_role_arn)), true) error_message = "The task_role_arn must be a valid IAM role ARN." } } @@ -376,7 +376,7 @@ variable "load_balancer_security_group_id" { default = null validation { - condition = var.load_balancer_security_group_id == null || can(regex("^sg-", var.load_balancer_security_group_id)) + condition = try(var.load_balancer_security_group_id == null || can(regex("^sg-", var.load_balancer_security_group_id)), true) error_message = "The load_balancer_security_group_id must be a valid security group ID starting with 'sg-'." } } diff --git a/networking/alb/variables.tf b/networking/alb/variables.tf index f3118fd..da1b8aa 100644 --- a/networking/alb/variables.tf +++ b/networking/alb/variables.tf @@ -259,7 +259,7 @@ variable "access_logs_bucket_arn" { default = null validation { - condition = var.access_logs_bucket_arn == null || can(regex("^arn:aws:s3:::", var.access_logs_bucket_arn)) + condition = try(var.access_logs_bucket_arn == null || can(regex("^arn:aws:s3:::", var.access_logs_bucket_arn)), true) error_message = "The access_logs_bucket_arn must be a valid S3 bucket ARN." } } @@ -287,7 +287,7 @@ variable "access_logs_kms_key_id" { default = null validation { - condition = var.access_logs_kms_key_id == null || can(regex("^(arn:aws:kms:|alias/)", var.access_logs_kms_key_id)) + condition = try(var.access_logs_kms_key_id == null || can(regex("^(arn:aws:kms:|alias/)", var.access_logs_kms_key_id)), true) error_message = "The access_logs_kms_key_id must be a valid KMS key ARN or alias." } } @@ -314,7 +314,7 @@ variable "web_acl_arn" { default = null validation { - condition = var.web_acl_arn == null || can(regex("^arn:aws:wafv2:", var.web_acl_arn)) + condition = try(var.web_acl_arn == null || can(regex("^arn:aws:wafv2:", var.web_acl_arn)), true) error_message = "The web_acl_arn must be a valid WAFv2 Web ACL ARN." } } diff --git a/networking/nlb/variables.tf b/networking/nlb/variables.tf index a0f02b7..f2996cc 100644 --- a/networking/nlb/variables.tf +++ b/networking/nlb/variables.tf @@ -130,7 +130,7 @@ variable "dns_record_client_routing_policy" { default = null validation { - condition = var.dns_record_client_routing_policy == null || contains(["any_availability_zone", "availability_zone_affinity", "partial_availability_zone_affinity"], var.dns_record_client_routing_policy) + condition = try(contains(["any_availability_zone", "availability_zone_affinity", "partial_availability_zone_affinity"], var.dns_record_client_routing_policy), true) error_message = "The dns_record_client_routing_policy must be 'any_availability_zone', 'availability_zone_affinity', or 'partial_availability_zone_affinity'." } } @@ -141,7 +141,7 @@ variable "enforce_security_group_inbound_rules_on_private_link_traffic" { default = null validation { - condition = var.enforce_security_group_inbound_rules_on_private_link_traffic == null || contains(["on", "off"], var.enforce_security_group_inbound_rules_on_private_link_traffic) + condition = try(contains(["on", "off"], var.enforce_security_group_inbound_rules_on_private_link_traffic), true) error_message = "The enforce_security_group_inbound_rules_on_private_link_traffic must be 'on' or 'off'." } } @@ -183,7 +183,7 @@ variable "access_logs_bucket_arn" { default = null validation { - condition = var.access_logs_bucket_arn == null || can(regex("^arn:aws:s3:::", var.access_logs_bucket_arn)) + condition = try(var.access_logs_bucket_arn == null || can(regex("^arn:aws:s3:::", var.access_logs_bucket_arn)), true) error_message = "The access_logs_bucket_arn must be a valid S3 bucket ARN." } } @@ -211,7 +211,7 @@ variable "access_logs_kms_key_id" { default = null validation { - condition = var.access_logs_kms_key_id == null || can(regex("^(arn:aws:kms:|alias/)", var.access_logs_kms_key_id)) + condition = try(var.access_logs_kms_key_id == null || can(regex("^(arn:aws:kms:|alias/)", var.access_logs_kms_key_id)), true) error_message = "The access_logs_kms_key_id must be a valid KMS key ARN or alias." } } From 0da47ca4288787f87f0263edd487a9eef86fd31d Mon Sep 17 00:00:00 2001 From: Siddharth Suresh Date: Wed, 20 May 2026 22:53:04 +0530 Subject: [PATCH 04/37] ecs_service: wrap null-tolerant validations in try() for TF 1.10 --- compute/ecs_service/locals.tf | 2 +- compute/ecs_service/variables.tf | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/compute/ecs_service/locals.tf b/compute/ecs_service/locals.tf index c0fe0ad..d741dc9 100644 --- a/compute/ecs_service/locals.tf +++ b/compute/ecs_service/locals.tf @@ -105,7 +105,7 @@ locals { ]) # Auto scaling settings - enable_auto_scaling = var.auto_scaling != null && var.auto_scaling.enabled + enable_auto_scaling = try(var.auto_scaling.enabled, false) # Service discovery settings enable_service_discovery = var.service_discovery != null diff --git a/compute/ecs_service/variables.tf b/compute/ecs_service/variables.tf index 4268c44..8b42df7 100644 --- a/compute/ecs_service/variables.tf +++ b/compute/ecs_service/variables.tf @@ -458,19 +458,19 @@ variable "load_balancer_attachment" { default = null validation { - condition = var.load_balancer_attachment == null || contains( + condition = try(contains( ["HTTP", "HTTPS", "TCP", "UDP", "TLS", "TCP_UDP", "GENEVE"], var.load_balancer_attachment.target_group.protocol - ) + ), true) error_message = "The protocol must be one of: HTTP, HTTPS (for ALB), or TCP, UDP, TLS, TCP_UDP, GENEVE (for NLB/GWLB)." } validation { - condition = var.load_balancer_attachment == null || var.load_balancer_attachment.target_group.stickiness == null || ( + condition = try(var.load_balancer_attachment.target_group.stickiness == null || ( contains(["HTTP", "HTTPS"], var.load_balancer_attachment.target_group.protocol) ? contains(["lb_cookie", "app_cookie"], var.load_balancer_attachment.target_group.stickiness.type) : var.load_balancer_attachment.target_group.stickiness.type == "source_ip" - ) + ), true) error_message = "Stickiness type must be 'lb_cookie' or 'app_cookie' for ALB (HTTP/HTTPS), or 'source_ip' for NLB (TCP/UDP/TLS)." } } From a701eb62087780c2516436cd351267d03133a073 Mon Sep 17 00:00:00 2001 From: Siddharth Suresh Date: Wed, 20 May 2026 22:57:01 +0530 Subject: [PATCH 05/37] ecs_service/locals: wrap remaining null-attribute access in try() --- compute/ecs_service/locals.tf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/compute/ecs_service/locals.tf b/compute/ecs_service/locals.tf index d741dc9..13e309c 100644 --- a/compute/ecs_service/locals.tf +++ b/compute/ecs_service/locals.tf @@ -19,10 +19,10 @@ locals { deployment_controller_type = var.deployment_type == "blue_green" ? "CODE_DEPLOY" : "ECS" # Determine if load balancer is configured - enable_load_balancer = var.load_balancer_attachment != null && var.load_balancer_attachment.enabled + enable_load_balancer = try(var.load_balancer_attachment.enabled, false) # Determine if NLB listener should be created (vs ALB listener rules) - enable_nlb_listener = local.enable_load_balancer && var.load_balancer_attachment.nlb_listener != null + enable_nlb_listener = local.enable_load_balancer && try(var.load_balancer_attachment.nlb_listener, null) != null # Placeholder container name and port placeholder_container_name = "app" From 140000c3bf33f3550dcc48df4108253154ae300b Mon Sep 17 00:00:00 2001 From: Siddharth Suresh Date: Wed, 20 May 2026 23:03:18 +0530 Subject: [PATCH 06/37] ecs_service/auto_scaling: try() around null var.auto_scaling.scheduled --- compute/ecs_service/auto_scaling.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compute/ecs_service/auto_scaling.tf b/compute/ecs_service/auto_scaling.tf index a52651b..f5d10c0 100644 --- a/compute/ecs_service/auto_scaling.tf +++ b/compute/ecs_service/auto_scaling.tf @@ -71,7 +71,7 @@ resource "aws_appautoscaling_policy" "target_tracking" { ################################################################################ resource "aws_appautoscaling_scheduled_action" "this" { - for_each = local.enable_auto_scaling && var.auto_scaling.scheduled != null ? { + for_each = local.enable_auto_scaling && try(var.auto_scaling.scheduled, null) != null ? { for action in var.auto_scaling.scheduled : action.name => action } : {} From 06787c55761fb09476976f276e2445c10f5f4c87 Mon Sep 17 00:00:00 2001 From: Siddharth Suresh Date: Thu, 21 May 2026 02:38:45 +0530 Subject: [PATCH 07/37] feat: Update ECS cluster and service modules for Ravion DNS provider integration - Added support for the new Ravion DNS provider in both cluster and service modules. - Replaced `ravion_dns_zone_id` with `ravion_dns_provider_id` and `ravion_dns_provider_given_id` for improved flexibility. - Updated local variables and data sources to handle DNS provider lookups and routing logic. - Enhanced routing record management for Route53 and Cloudflare based on the selected DNS provider. - Adjusted ACM certificate issuance and validation processes to accommodate the new provider structure. - Updated module documentation and variable descriptions for clarity on the new configuration options. --- compute/ecs_cluster/.terraform.lock.hcl | 27 ++- compute/ecs_cluster/data.tf | 16 +- compute/ecs_cluster/locals.tf | 41 ++++- compute/ecs_cluster/module.yml | 34 +--- compute/ecs_cluster/outputs.tf | 12 +- compute/ecs_cluster/provider.tf | 15 ++ compute/ecs_cluster/ravion_domains.tf | 225 +++++++++++++++++++----- compute/ecs_cluster/variables.tf | 27 +-- compute/ecs_cluster/versions.tf | 18 +- compute/ecs_service/.terraform.lock.hcl | 27 ++- compute/ecs_service/data.tf | 14 ++ compute/ecs_service/locals.tf | 19 ++ compute/ecs_service/provider.tf | 9 + compute/ecs_service/ravion_domains.tf | 87 +++++++-- compute/ecs_service/variables.tf | 21 ++- compute/ecs_service/versions.tf | 11 +- 16 files changed, 490 insertions(+), 113 deletions(-) diff --git a/compute/ecs_cluster/.terraform.lock.hcl b/compute/ecs_cluster/.terraform.lock.hcl index 28ffb2f..ef959ee 100644 --- a/compute/ecs_cluster/.terraform.lock.hcl +++ b/compute/ecs_cluster/.terraform.lock.hcl @@ -1,9 +1,34 @@ # This file is maintained automatically by "tofu init". # Manual edits may be lost in future updates. +provider "ravion.com/ravion/domains" { + version = "2.0.0" + constraints = ">= 2.0.0" + hashes = [ + "h1:OLB6BBvlsZr1SwIdNtZ07ihAJFmjd/qVX8UzJ8xuZcw=", + ] +} + +provider "registry.opentofu.org/cloudflare/cloudflare" { + version = "5.19.1" + constraints = ">= 4.0.0" + hashes = [ + "h1:HkKPMZ/n+QiExkRUSLjGMTGnuIaph+k932LiTp7CKZM=", + "zh:0651618000db705564dab5a25322b9d76ea54b7dd78931ed3565497b559babeb", + "zh:1a7847e9479fb6d21a65ef933ffae1416b1e4b44ca940c0d6c50fc4248cc4a0d", + "zh:5597cee5854131045eb9f201ae3a70b59c51955d31a647d9616863c746d902cb", + "zh:580786830d93e35b957754fd4c62d4681a3b19abc28b757e41acba26455663b1", + "zh:83c4bdfb0e74fd50e56fff3c461d76c1c1ec61af3f679e4de1aa70b5ed05a09f", + "zh:abb4d1052cee61d80f9cb51e5421e3c118312403afb7104b98bd7e310ac736ee", + "zh:b0aeeb3d66ea4d719989875e778c477065ba941e3a76e9a8caacc3be08208dd9", + "zh:e43b4b2dfcec1ce2115f5a5c86042d432deb49bee8eae103eb56d97ea02e2e3b", + "zh:f809ab383cca0a5f83072981c64208cbd7fa67e986a86ee02dd2c82333221e32", + ] +} + provider "registry.opentofu.org/hashicorp/aws" { version = "6.39.0" - constraints = ">= 5.0.0" + constraints = ">= 6.0.0" hashes = [ "h1:c9SG8ZdYgzqpxORpTqeLFeXW4qQQ8GMGCcUkU+FAfQM=", "zh:00a6c0d8b5b86833087e367b632e9ab73fb8db9c43569020ebd0489dc2c919ce", diff --git a/compute/ecs_cluster/data.tf b/compute/ecs_cluster/data.tf index 9b78bc4..d201bf8 100644 --- a/compute/ecs_cluster/data.tf +++ b/compute/ecs_cluster/data.tf @@ -15,4 +15,18 @@ data "aws_region" "current" {} # Get current AWS account ID data "aws_caller_identity" "current" {} - +# Resolve the registered Ravion DnsProvider that the cluster's +# wildcard FQDN + cert hang off. Accepts an opaque id +# (`ravion_dns_provider_id = "dnsprov_..."`) OR a per-org stable +# `given_id` — the api-go handler does a dual lookup. Exactly one of +# the per-variant attribute groups (`route53_ravion`, `route53`, +# `cloudflare`, `external`) is non-null on the returned row; the count +# gating in ravion_domains.tf dispatches on those. +# +# The count = 0 branch (no provider configured) is the BYO-cert path — +# `var.public_alb_certificate_arns` is consumed directly. +data "ravion_dns_provider" "this" { + count = local.dns_provider_lookup_key == "" ? 0 : 1 + id = var.ravion_dns_provider_id != null && var.ravion_dns_provider_id != "" ? var.ravion_dns_provider_id : null + given_id = var.ravion_dns_provider_given_id != null && var.ravion_dns_provider_given_id != "" ? var.ravion_dns_provider_given_id : null +} diff --git a/compute/ecs_cluster/locals.tf b/compute/ecs_cluster/locals.tf index ec2db7b..0026b3a 100644 --- a/compute/ecs_cluster/locals.tf +++ b/compute/ecs_cluster/locals.tf @@ -1,23 +1,56 @@ locals { region = coalesce(var.region, data.aws_region.current.id) + # Either input form (id or given_id) drives the lookup. The data + # source's count is gated on this string being non-empty. + dns_provider_lookup_key = coalesce( + var.ravion_dns_provider_id, + var.ravion_dns_provider_given_id, + "", + ) + + # The resolved DnsProvider row (only present when the data source's + # count == 1). Per-variant attribute groups (`route53_ravion`, + # `route53`, `cloudflare`, `external`) are how the ravion_domains.tf + # blocks dispatch — exactly one is non-null per row. + dns_provider = local.dns_provider_lookup_key != "" ? data.ravion_dns_provider.this[0] : null + # Ravion-managed domains gate. When true the cluster allocates a # wildcard FQDN + issues a wildcard ACM cert in ravion_domains.tf; # service modules under this cluster inherit the wildcard via SNI. + # Implicit: setting either provider input + enabling HTTPS implies + # "use Ravion-managed cert"; nothing else picks the path. enable_ravion_domain = ( var.enable_public_alb && var.public_alb_enable_https && - var.public_alb_cert_source == "ravion_managed" && - var.ravion_dns_zone_id != null && - var.ravion_dns_zone_id != "" + local.dns_provider != null ) + # Per-variant flags — count gating on these decides which writer + # path validation + apex routing records take. Mutually exclusive: + # exactly one is true when enable_ravion_domain is true (except + # EXTERNAL — see note below). + is_route53_ravion = local.enable_ravion_domain && local.dns_provider.route53_ravion != null + is_route53 = local.enable_ravion_domain && local.dns_provider.route53 != null + is_cloudflare = local.enable_ravion_domain && local.dns_provider.cloudflare != null + # EXTERNAL: the customer brings their own DNS + cert flow entirely. + # Ravion-managed cert is NOT available for this variant — module + # allocates the FQDN row for tracking but skips ACM. The cluster + # must be configured with public_alb_certificate_arns in this mode. + is_external = local.enable_ravion_domain && local.dns_provider.external != null + + # ACM cert is issued for variants where Ravion (or the customer's + # TF) can write DNS validation records the cert validation block + # can wait on. EXTERNAL is excluded because we don't have a + # variant-specific writer for arbitrary registrars. + enable_acm_cert = local.is_route53_ravion || local.is_route53 || local.is_cloudflare + # The ALB's HTTPS listener takes a single default cert + N SNI extras. # Ravion-managed mode puts the wildcard first (default); BYO mode uses # the customer's list verbatim. Using the validation resource's output # ensures the listener depends on ACM validation completing. public_alb_effective_certificate_arns = ( - local.enable_ravion_domain + local.enable_acm_cert ? concat([aws_acm_certificate_validation.cluster[0].certificate_arn], var.public_alb_certificate_arns) : var.public_alb_certificate_arns ) diff --git a/compute/ecs_cluster/module.yml b/compute/ecs_cluster/module.yml index c76fe6d..88e946d 100644 --- a/compute/ecs_cluster/module.yml +++ b/compute/ecs_cluster/module.yml @@ -324,30 +324,14 @@ input: default: false show_when: enable_public_alb: true - - public_alb_cert_source: + - ravion_dns_provider_id: type: string - label: "Certificate Source" - description: "Where the HTTPS listener's default certificate comes from. \"Ravion managed\" allocates a wildcard FQDN under your selected DNS zone and issues a wildcard ACM cert covering services in this cluster. \"Bring your own\" expects you to supply ACM ARNs directly." - default: "ravion_managed" + label: "DNS Provider" + description: "Ravion DnsProvider the cluster's wildcard FQDN lives under. Register providers (Route53, Cloudflare, or other) on the DNS Providers settings page; the variant (cert source, validation path) is implicit in the provider's type. Leave empty to use BYO certificate ARNs instead." show_when: enable_public_alb: true public_alb_enable_https: true - values: - - value: "ravion_managed" - label: "Ravion managed (wildcard cert + FQDN auto-allocated)" - - value: "byo" - label: "Bring your own ACM cert" - - ravion_dns_zone_id: - type: string - label: "DNS Zone" - description: "Ravion DnsZone the cluster's wildcard FQDN lives under. Choose the platform-managed Ravion apex (default) or a customer-owned zone registered on the DNS Zones settings page." - show_when: - enable_public_alb: true - public_alb_enable_https: true - public_alb_cert_source: "ravion_managed" - validation: - required: true - values: "$values:ravion/dns-zones" + values: "$values:ravion/dns-providers" - ravion_cluster_slug: type: string label: "FQDN Slug" @@ -355,18 +339,16 @@ input: show_when: enable_public_alb: true public_alb_enable_https: true - public_alb_cert_source: "ravion_managed" + ravion_dns_provider_id: "!=" - public_alb_certificate_arns: type: list - label: "ACM Certificate ARNs" - description: "ACM certificate ARNs for HTTPS. The first ARN is the default certificate; additional ARNs are attached for SNI" + label: "ACM Certificate ARNs (BYO)" + description: "ACM certificate ARNs for HTTPS. Used directly when no DNS provider is selected, or appended after the Ravion-managed wildcard cert as additional SNI certificates. The first ARN is the listener's default in BYO mode." + default: [] show_when: enable_public_alb: true public_alb_enable_https: true - public_alb_cert_source: "byo" validation: - required: true - min_length: 1 patterns: - pattern: "^arn:aws:acm:[a-z0-9-]+:[0-9]+:certificate/[a-z0-9-]+$" message: "Must be a valid ACM certificate ARN" diff --git a/compute/ecs_cluster/outputs.tf b/compute/ecs_cluster/outputs.tf index c157a6d..dcb5fe7 100644 --- a/compute/ecs_cluster/outputs.tf +++ b/compute/ecs_cluster/outputs.tf @@ -253,9 +253,9 @@ output "ravion_managed_domains_enabled" { value = local.enable_ravion_domain } -output "ravion_dns_zone_id" { - description = "DnsZone id the cluster's wildcard lives under. Passes through to service modules so they allocate under the same zone." - value = local.enable_ravion_domain ? var.ravion_dns_zone_id : null +output "ravion_dns_provider_id" { + description = "DnsProvider id the cluster's wildcard lives under. Passes through to service modules so they allocate under the same provider — preserves the same dns_provider for all FQDNs in the cluster regardless of which variant (Route53/Cloudflare/etc.) is in use." + value = local.enable_ravion_domain ? local.dns_provider.id : null } output "ravion_cluster_domain_allocation_id" { @@ -269,11 +269,11 @@ output "ravion_cluster_managed_domain_id" { } output "ravion_cluster_fqdn" { - description = "Cluster wildcard FQDN, e.g. `*.cluster-abc.ravion.app`." + description = "Cluster wildcard FQDN, e.g. `*.cluster-abc.acme.com`." value = local.enable_ravion_domain ? ravion_domain.cluster[0].fqdn : null } output "ravion_cluster_certificate_arn" { - description = "ACM ARN of the cluster's wildcard cert. Use as the listener's default cert or as an extra cert via aws_lb_listener_certificate." - value = local.enable_ravion_domain ? aws_acm_certificate_validation.cluster[0].certificate_arn : null + description = "ACM ARN of the cluster's wildcard cert. Use as the listener's default cert or as an extra cert via aws_lb_listener_certificate. Null when the cluster's DnsProvider is EXTERNAL (no Ravion-managed cert)." + value = local.enable_acm_cert ? aws_acm_certificate_validation.cluster[0].certificate_arn : null } diff --git a/compute/ecs_cluster/provider.tf b/compute/ecs_cluster/provider.tf index dc58d9a..781fe53 100644 --- a/compute/ecs_cluster/provider.tf +++ b/compute/ecs_cluster/provider.tf @@ -1,3 +1,18 @@ provider "aws" { region = var.region } + +# Cloudflare provider — used only when the registered DnsProvider is +# CLOUDFLARE (count gating on `data.ravion_dns_provider.this[0].cloudflare` +# in ravion_domains.tf decides whether any `cloudflare_record` resources +# are actually planned). The api_token attribute is the plaintext +# token Ravion's data source dereferences from WorkOS Vault +# server-side — the token never lands in HCL or TF state in +# unencrypted form because the schema marks it Sensitive. +# +# When the DnsProvider is anything other than CLOUDFLARE the data +# source's cloudflare attribute is null; the provider config still +# evaluates but no `cloudflare_record` resources reference it (count = 0). +provider "cloudflare" { + api_token = try(data.ravion_dns_provider.this[0].cloudflare.api_token, null) +} diff --git a/compute/ecs_cluster/ravion_domains.tf b/compute/ecs_cluster/ravion_domains.tf index 8474e44..5d8186f 100644 --- a/compute/ecs_cluster/ravion_domains.tf +++ b/compute/ecs_cluster/ravion_domains.tf @@ -1,40 +1,43 @@ ################################################################################ -# Ravion domain control plane — cluster wildcard +# Ravion domain control plane — cluster wildcard (V2) # -# Allocates `*.` under Ravion's apex (e.g. `*.-.ravion.app`) -# and issues a wildcard ACM cert covering it. Service modules under this -# cluster create child allocations whose FQDNs sit under , -# so they inherit the wildcard cert via SNI without their own ACM work. +# Allocates `*.` under the registered DnsProvider's apex +# and issues a wildcard ACM cert covering it. Service modules under +# this cluster create child allocations whose FQDNs sit under +# , so they inherit the wildcard cert via SNI without +# their own ACM work. # -# Resources (per the DI design in -# packages/shared-go/domain/domains/DOMAIN_CONTROL_PLANE_DI_DESIGN.md): +# Variant dispatch (count = local.is_X ? 1 : 0): +# ROUTE53_RAVION → Ravion's own Route53. RavionRoute53Writer issues +# the ChangeResourceRecordSets call inline. +# ROUTE53 → Customer's Route53. Customer's `aws_route53_record` +# in their AWS account writes the record; Ravion +# persists metadata via `ravion_dns_records` after- +# the-fact (depends_on). +# CLOUDFLARE → Customer's Cloudflare zone. `cloudflare_dns_record` +# writes the record using the api_token sourced +# from WorkOS Vault via the data source; Ravion +# metadata after-the-fact. +# EXTERNAL → Skipped — module assumes BYO cert in this mode. # -# ravion_domain.cluster — allocates the wildcard FQDN -# aws_acm_certificate.cluster — issues the cert (customer's AWS account) -# ravion_dns_records.cluster_* — writes the validation + apex routing -# records into Ravion's Route53 (the -# api-go's RavionRoute53Writer) -# aws_acm_certificate_validation — blocks ~30s until ACM verifies -# ravion_managed_certificate.cluster — registers cert metadata at Ravion -# for the UI badge -# -# All AWS resources live in the customer's account, applied by their TF -# runner with their IAM. Ravion never holds customer credentials. +# All AWS / Cloudflare resources live in the customer's accounts, +# applied by their TF runner with their IAM. Ravion never holds +# customer credentials. ################################################################################ -# 1. Allocate the cluster's wildcard FQDN. -# The local.enable_ravion_domain gate lives in locals.tf next to the -# ALB-cert-source toggle since both branches need to agree. +# ---- 1. Allocate the cluster's wildcard FQDN ------------------------------- +# Single ravion_domain resource regardless of variant — the API knows +# which provider it lives under from dns_provider_id. resource "ravion_domain" "cluster" { - count = local.enable_ravion_domain ? 1 : 0 - dns_zone_id = var.ravion_dns_zone_id - slug = coalesce(var.ravion_cluster_slug, var.name) - wildcard = true + count = local.enable_ravion_domain ? 1 : 0 + dns_provider_id = local.dns_provider.id + slug = coalesce(var.ravion_cluster_slug, var.name) + wildcard = true } -# 2. ACM wildcard cert. Lives in the customer's AWS account. +# ---- 2. ACM wildcard cert (skipped for EXTERNAL) --------------------------- resource "aws_acm_certificate" "cluster" { - count = local.enable_ravion_domain ? 1 : 0 + count = local.enable_acm_cert ? 1 : 0 domain_name = ravion_domain.cluster[0].fqdn validation_method = "DNS" @@ -46,11 +49,12 @@ resource "aws_acm_certificate" "cluster" { tags = var.tags } -# 3. Validation CNAME(s) into Ravion's Route53. Synchronous — the -# RavionRoute53Writer issues a Route53 ChangeResourceRecordSets call -# inline with our POST and returns when AWS accepts the change. -resource "ravion_dns_records" "cluster_validation" { - count = local.enable_ravion_domain ? 1 : 0 +# ---- 3a. ROUTE53_RAVION validation records --------------------------------- +# Synchronous — the RavionRoute53Writer issues a Route53 +# ChangeResourceRecordSets call inline with our POST and returns when +# AWS accepts the change. No customer-side resources needed. +resource "ravion_dns_records" "cluster_validation_ravion" { + count = local.is_route53_ravion ? 1 : 0 managed_domain_id = ravion_domain.cluster[0].id records = [ for opt in aws_acm_certificate.cluster[0].domain_validation_options : { @@ -62,10 +66,104 @@ resource "ravion_dns_records" "cluster_validation" { ] } -# 4. Apex routing record — wildcard FQDN points at the cluster's public ALB. -# Uses ALIAS so apex-style routing works (Route53 expands to A + AliasTarget). -resource "ravion_dns_records" "cluster_routing" { - count = local.enable_ravion_domain && var.enable_public_alb ? 1 : 0 +# ---- 3b. ROUTE53 (customer-owned) validation records ----------------------- +# Customer's AWS account, customer's IAM. The for_each fan-out per +# validation option is the customer's actual write; the +# `ravion_dns_records.cluster_validation_metadata_r53` block below +# depends_on this so Ravion's metadata row lands after the record is +# live. +resource "aws_route53_record" "cluster_validation_r53" { + for_each = local.is_route53 ? { + for opt in aws_acm_certificate.cluster[0].domain_validation_options : opt.domain_name => opt + } : {} + + zone_id = local.dns_provider.route53.hosted_zone_id + name = each.value.resource_record_name + type = each.value.resource_record_type + records = [each.value.resource_record_value] + ttl = 60 +} + +resource "ravion_dns_records" "cluster_validation_metadata_r53" { + count = local.is_route53 ? 1 : 0 + managed_domain_id = ravion_domain.cluster[0].id + records = [ + for opt in aws_acm_certificate.cluster[0].domain_validation_options : { + name = opt.resource_record_name + type = opt.resource_record_type + value = opt.resource_record_value + ttl = 60 + } + ] + depends_on = [aws_route53_record.cluster_validation_r53] +} + +# ---- 3c. CLOUDFLARE validation records ------------------------------------- +# Customer's Cloudflare zone. The cloudflare provider's api_token is +# resolved from WorkOS Vault by data.ravion_dns_provider — see +# provider.tf for the provider block. +resource "cloudflare_dns_record" "cluster_validation_cf" { + for_each = local.is_cloudflare ? { + for opt in aws_acm_certificate.cluster[0].domain_validation_options : opt.domain_name => opt + } : {} + + zone_id = local.dns_provider.cloudflare.zone_id + name = trimsuffix(each.value.resource_record_name, ".") + type = each.value.resource_record_type + content = trimsuffix(each.value.resource_record_value, ".") + ttl = 60 + proxied = false +} + +resource "ravion_dns_records" "cluster_validation_metadata_cf" { + count = local.is_cloudflare ? 1 : 0 + managed_domain_id = ravion_domain.cluster[0].id + records = [ + for opt in aws_acm_certificate.cluster[0].domain_validation_options : { + name = opt.resource_record_name + type = opt.resource_record_type + value = opt.resource_record_value + ttl = 60 + } + ] + depends_on = [cloudflare_dns_record.cluster_validation_cf] +} + +# ---- 4a. ROUTE53_RAVION apex routing record ------------------------------- +# Wildcard FQDN points at the cluster's public ALB. ALIAS works because +# Route53 supports apex-style routing. +resource "ravion_dns_records" "cluster_routing_ravion" { + count = local.is_route53_ravion && var.enable_public_alb ? 1 : 0 + managed_domain_id = ravion_domain.cluster[0].id + records = [{ + name = ravion_domain.cluster[0].fqdn + type = "ALIAS" + value = jsonencode({ + dns_name = module.public_alb[0].alb_dns_name + zone_id = module.public_alb[0].alb_zone_id + }) + }] +} + +# ---- 4b. ROUTE53 (customer) apex routing record --------------------------- +# AWS A-record alias targeting the cluster's ALB. Customer's IAM +# writes it; Ravion records metadata. +resource "aws_route53_record" "cluster_routing_r53" { + count = local.is_route53 && var.enable_public_alb ? 1 : 0 + + zone_id = local.dns_provider.route53.hosted_zone_id + name = ravion_domain.cluster[0].fqdn + type = "A" + + alias { + name = module.public_alb[0].alb_dns_name + zone_id = module.public_alb[0].alb_zone_id + evaluate_target_health = true + } +} + +resource "ravion_dns_records" "cluster_routing_metadata_r53" { + count = local.is_route53 && var.enable_public_alb ? 1 : 0 managed_domain_id = ravion_domain.cluster[0].id records = [{ name = ravion_domain.cluster[0].fqdn @@ -75,23 +173,60 @@ resource "ravion_dns_records" "cluster_routing" { zone_id = module.public_alb[0].alb_zone_id }) }] + depends_on = [aws_route53_record.cluster_routing_r53] +} + +# ---- 4c. CLOUDFLARE apex routing record ----------------------------------- +# Cloudflare doesn't do AWS ALIAS records. A CNAME at the wildcard +# apex pointing at the ALB DNS name is functionally equivalent here +# (the cluster FQDN is `-.`, not the apex itself — +# CNAMEs at non-apex labels are allowed). +resource "cloudflare_dns_record" "cluster_routing_cf" { + count = local.is_cloudflare && var.enable_public_alb ? 1 : 0 + + zone_id = local.dns_provider.cloudflare.zone_id + name = ravion_domain.cluster[0].fqdn + type = "CNAME" + content = module.public_alb[0].alb_dns_name + ttl = 1 # 1 == automatic, required when proxied = true; ALB doesn't accept CF proxying for arbitrary L7 so leave proxied false + ttl=60. + proxied = false +} + +resource "ravion_dns_records" "cluster_routing_metadata_cf" { + count = local.is_cloudflare && var.enable_public_alb ? 1 : 0 + managed_domain_id = ravion_domain.cluster[0].id + records = [{ + name = ravion_domain.cluster[0].fqdn + type = "CNAME" + value = module.public_alb[0].alb_dns_name + ttl = 60 + }] + depends_on = [cloudflare_dns_record.cluster_routing_cf] } -# 5. Block until ACM has validated the cert. With Ravion's Route53 zone -# under our IAM, the validation CNAME goes live in seconds — this step -# typically completes in well under 60s. +# ---- 5. Block until ACM has validated the cert ---------------------------- +# Pulls validation_record_fqdns from whichever ravion_dns_records +# branch fired. Only one is non-null in any given plan; the +# alternative branches return empty lists. concat() flattens. resource "aws_acm_certificate_validation" "cluster" { - count = local.enable_ravion_domain ? 1 : 0 - certificate_arn = aws_acm_certificate.cluster[0].arn - validation_record_fqdns = ravion_dns_records.cluster_validation[0].fqdns + count = local.enable_acm_cert ? 1 : 0 + certificate_arn = aws_acm_certificate.cluster[0].arn + validation_record_fqdns = concat( + local.is_route53_ravion ? ravion_dns_records.cluster_validation_ravion[0].fqdns : [], + local.is_route53 ? ravion_dns_records.cluster_validation_metadata_r53[0].fqdns : [], + local.is_cloudflare ? ravion_dns_records.cluster_validation_metadata_cf[0].fqdns : [], + ) } -# 6. Tell Ravion about the cert so the UI shows the cert badge on the -# cluster's domain row. +# ---- 6. Register cert metadata at Ravion ---------------------------------- +# Same for every variant — the UI cares about cert status, not where +# the validation records live. resource "ravion_managed_certificate" "cluster" { - count = local.enable_ravion_domain ? 1 : 0 + count = local.enable_acm_cert ? 1 : 0 cert_arn = aws_acm_certificate_validation.cluster[0].certificate_arn status = "ISSUED" scope = "CLUSTER_WILDCARD" managed_domain_ids = [ravion_domain.cluster[0].managed_domain_id] + issued_at = aws_acm_certificate.cluster[0].not_before + expires_at = aws_acm_certificate.cluster[0].not_after } diff --git a/compute/ecs_cluster/variables.tf b/compute/ecs_cluster/variables.tf index 2ff616f..888d94e 100644 --- a/compute/ecs_cluster/variables.tf +++ b/compute/ecs_cluster/variables.tf @@ -632,22 +632,29 @@ variable "region" { ################################################################################ # Ravion domain control plane +# +# V2: cert source is implicit — when EITHER ravion_dns_provider_id OR +# ravion_dns_provider_given_id is set AND the public ALB has HTTPS +# enabled, the module allocates a wildcard FQDN under that provider +# and issues a wildcard ACM cert. Otherwise BYO mode kicks in and +# var.public_alb_certificate_arns is consumed directly. +# +# Variant dispatch happens in ravion_domains.tf via the +# `data.ravion_dns_provider` data source — per-variant attribute +# groups (`route53_ravion`, `route53`, `cloudflare`, `external`) +# decide which writer path the validation + apex routing records +# take. Enum strings never appear in this module's HCL. ################################################################################ -variable "public_alb_cert_source" { +variable "ravion_dns_provider_id" { type = string - description = "Where the public ALB's default HTTPS certificate comes from. `ravion_managed` allocates a wildcard FQDN under ravion_dns_zone_id and issues a wildcard ACM cert that the listener uses as its default; service modules under this cluster inherit the wildcard via SNI. `byo` expects public_alb_certificate_arns instead." - default = "ravion_managed" - - validation { - condition = contains(["ravion_managed", "byo"], var.public_alb_cert_source) - error_message = "public_alb_cert_source must be one of: ravion_managed, byo." - } + description = "Opaque Ravion DnsProvider id (`dnsprov_*`) the cluster's wildcard allocation lives under. Provide EITHER this or ravion_dns_provider_given_id; if both are set, this wins. Leave both null to opt out of Ravion-managed certs and supply public_alb_certificate_arns directly." + default = null } -variable "ravion_dns_zone_id" { +variable "ravion_dns_provider_given_id" { type = string - description = "Ravion DnsZone id (dzn_*) the cluster's wildcard allocation lives under. Required when public_alb_cert_source = \"ravion_managed\". Pick the platform-owned Ravion apex zone or a customer-owned zone registered on the DNS Zones settings page." + description = "Per-org stable identifier for the Ravion DnsProvider — same dual-lookup as ravion_dns_provider_id. Module HCL prefers this form so cluster definitions stay portable across orgs that share the same provider naming." default = null } diff --git a/compute/ecs_cluster/versions.tf b/compute/ecs_cluster/versions.tf index 33c6394..627d35e 100644 --- a/compute/ecs_cluster/versions.tf +++ b/compute/ecs_cluster/versions.tf @@ -12,9 +12,25 @@ terraform { source = "hashicorp/aws" version = ">= 6.0" } + # Bumped to v2 — the V2 provider drops the `ravion_dns_zone_id` + # field name in favor of `ravion_dns_provider_id`, and exposes + # the new `data.ravion_dns_provider` discriminated data source + # that this module's per-variant HCL gates on. ravion = { source = "ravion.com/ravion/domains" - version = ">= 1.0.0" + version = ">= 2.0.0" + } + # Cloudflare provider is needed when the registered DnsProvider + # is CLOUDFLARE — the customer's TF writes acme validation + + # apex routing records via `cloudflare_record`, and Ravion + # records them after-the-fact via `ravion_dns_records` for the + # UI. Provider config below reads `data.ravion_dns_provider`'s + # cloudflare attribute group; api_token is sourced from + # WorkOS Vault server-side and returned to the runner as a + # sensitive computed attribute. + cloudflare = { + source = "cloudflare/cloudflare" + version = ">= 4.0" } } } diff --git a/compute/ecs_service/.terraform.lock.hcl b/compute/ecs_service/.terraform.lock.hcl index 28ffb2f..ef959ee 100644 --- a/compute/ecs_service/.terraform.lock.hcl +++ b/compute/ecs_service/.terraform.lock.hcl @@ -1,9 +1,34 @@ # This file is maintained automatically by "tofu init". # Manual edits may be lost in future updates. +provider "ravion.com/ravion/domains" { + version = "2.0.0" + constraints = ">= 2.0.0" + hashes = [ + "h1:OLB6BBvlsZr1SwIdNtZ07ihAJFmjd/qVX8UzJ8xuZcw=", + ] +} + +provider "registry.opentofu.org/cloudflare/cloudflare" { + version = "5.19.1" + constraints = ">= 4.0.0" + hashes = [ + "h1:HkKPMZ/n+QiExkRUSLjGMTGnuIaph+k932LiTp7CKZM=", + "zh:0651618000db705564dab5a25322b9d76ea54b7dd78931ed3565497b559babeb", + "zh:1a7847e9479fb6d21a65ef933ffae1416b1e4b44ca940c0d6c50fc4248cc4a0d", + "zh:5597cee5854131045eb9f201ae3a70b59c51955d31a647d9616863c746d902cb", + "zh:580786830d93e35b957754fd4c62d4681a3b19abc28b757e41acba26455663b1", + "zh:83c4bdfb0e74fd50e56fff3c461d76c1c1ec61af3f679e4de1aa70b5ed05a09f", + "zh:abb4d1052cee61d80f9cb51e5421e3c118312403afb7104b98bd7e310ac736ee", + "zh:b0aeeb3d66ea4d719989875e778c477065ba941e3a76e9a8caacc3be08208dd9", + "zh:e43b4b2dfcec1ce2115f5a5c86042d432deb49bee8eae103eb56d97ea02e2e3b", + "zh:f809ab383cca0a5f83072981c64208cbd7fa67e986a86ee02dd2c82333221e32", + ] +} + provider "registry.opentofu.org/hashicorp/aws" { version = "6.39.0" - constraints = ">= 5.0.0" + constraints = ">= 6.0.0" hashes = [ "h1:c9SG8ZdYgzqpxORpTqeLFeXW4qQQ8GMGCcUkU+FAfQM=", "zh:00a6c0d8b5b86833087e367b632e9ab73fb8db9c43569020ebd0489dc2c919ce", diff --git a/compute/ecs_service/data.tf b/compute/ecs_service/data.tf index 7d2d538..9f4771c 100644 --- a/compute/ecs_service/data.tf +++ b/compute/ecs_service/data.tf @@ -12,4 +12,18 @@ data "aws_vpc" "this" { id = var.vpc_id } +# Resolve the registered Ravion DnsProvider — same shape as the +# cluster module's data block. Per-variant attribute groups +# (`route53_ravion`, `route53`, `cloudflare`, `external`) drive the +# count gating in ravion_domains.tf. Skipped (count = 0) when no +# provider is configured at the service level, which is the common +# case — services typically inherit the cluster's wildcard via the +# parent_domain_allocation_id link and don't need to dispatch on +# variant themselves except for the routing CNAME write path. +data "ravion_dns_provider" "this" { + count = local.dns_provider_lookup_key == "" ? 0 : 1 + id = var.ravion_dns_provider_id != null && var.ravion_dns_provider_id != "" ? var.ravion_dns_provider_id : null + given_id = var.ravion_dns_provider_given_id != null && var.ravion_dns_provider_given_id != "" ? var.ravion_dns_provider_given_id : null +} + diff --git a/compute/ecs_service/locals.tf b/compute/ecs_service/locals.tf index 13e309c..a3a1d45 100644 --- a/compute/ecs_service/locals.tf +++ b/compute/ecs_service/locals.tf @@ -1,5 +1,24 @@ locals { region = coalesce(var.region, data.aws_region.current.id) + + # Either input form (id or given_id) drives the lookup. The data + # source's count is gated on this string being non-empty. + dns_provider_lookup_key = coalesce( + var.ravion_dns_provider_id, + var.ravion_dns_provider_given_id, + "", + ) + + # The resolved DnsProvider row (only present when the data source's + # count == 1). Per-variant attribute groups (`route53_ravion`, + # `route53`, `cloudflare`, `external`) drive the routing-record + # write path dispatch in ravion_domains.tf. + dns_provider = local.dns_provider_lookup_key != "" ? data.ravion_dns_provider.this[0] : null + + # Per-variant flags. Mutually exclusive when set. + is_route53_ravion = local.dns_provider != null && local.dns_provider.route53_ravion != null + is_route53 = local.dns_provider != null && local.dns_provider.route53 != null + is_cloudflare = local.dns_provider != null && local.dns_provider.cloudflare != null } ################################################################################ diff --git a/compute/ecs_service/provider.tf b/compute/ecs_service/provider.tf index dc58d9a..d5ee066 100644 --- a/compute/ecs_service/provider.tf +++ b/compute/ecs_service/provider.tf @@ -1,3 +1,12 @@ provider "aws" { region = var.region } + +# Cloudflare provider — used only when the parent cluster's +# DnsProvider is CLOUDFLARE (count gating on +# `data.ravion_dns_provider.this[0].cloudflare` in ravion_domains.tf). +# Same api_token resolution path as the cluster module: WorkOS Vault +# deref via Ravion's data source. +provider "cloudflare" { + api_token = try(data.ravion_dns_provider.this[0].cloudflare.api_token, null) +} diff --git a/compute/ecs_service/ravion_domains.tf b/compute/ecs_service/ravion_domains.tf index 3000df9..5948e11 100644 --- a/compute/ecs_service/ravion_domains.tf +++ b/compute/ecs_service/ravion_domains.tf @@ -1,20 +1,25 @@ ################################################################################ -# Ravion domain control plane — per-service domain allocations +# Ravion domain control plane — per-service domain allocations (V2) # # When the parent cluster module is configured for Ravion-managed # domains (`module.ecs_cluster.ravion_managed_domains_enabled`), each # entry in var.ravion_domains gets: # -# ravion_domain.this[d] — child allocation under the cluster -# ravion_dns_records.this[d] — CNAME pointing the FQDN at the ALB -# aws_lb_listener_rule.ravion[d] — host-header rule on the cluster's -# HTTPS listener +# ravion_domain.this[d] — child allocation under +# the cluster +# _record.routing_ — actual CNAME write +# (Route53 / Cloudflare) +# ravion_dns_records.this[d] — metadata-only sibling +# (depends_on the real +# record write) +# aws_lb_listener_rule.ravion[d] — host-header rule on the +# cluster's HTTPS listener # # No per-domain ACM cert — the cluster's wildcard covers every FQDN # allocated under it via SNI. No ravion_managed_certificate either — # the cluster's ravion_managed_certificate already advertises the cert. # -# Empty var.ravion_domains = the service is reachable via the cluster's +# Empty var.ravion_domains = service is reachable via the cluster's # apex wildcard only; nothing is allocated here. ################################################################################ @@ -49,14 +54,15 @@ locals { resource "ravion_domain" "this" { for_each = local.ravion_domain_set - dns_zone_id = var.ravion_dns_zone_id + dns_provider_id = var.ravion_dns_provider_id slug = each.value parent_domain_allocation_id = var.ravion_parent_domain_allocation_id } -# 2. Routing CNAME — each FQDN points at the cluster's public ALB. -resource "ravion_dns_records" "this" { - for_each = local.ravion_domain_set +# ---- 2a. ROUTE53_RAVION routing records ---------------------------------- +# Ravion's own Route53 — RavionRoute53Writer writes the ALIAS inline. +resource "ravion_dns_records" "ravion" { + for_each = local.is_route53_ravion ? local.ravion_domain_set : toset([]) managed_domain_id = ravion_domain.this[each.value].id records = [{ @@ -69,10 +75,69 @@ resource "ravion_dns_records" "this" { }] } +# ---- 2b. ROUTE53 (customer) routing records ------------------------------ +resource "aws_route53_record" "this_r53" { + for_each = local.is_route53 ? local.ravion_domain_set : toset([]) + + zone_id = local.dns_provider.route53.hosted_zone_id + name = ravion_domain.this[each.value].fqdn + type = "A" + + alias { + name = var.ravion_cluster_alb_dns_name + zone_id = var.ravion_cluster_alb_zone_id + evaluate_target_health = true + } +} + +resource "ravion_dns_records" "metadata_r53" { + for_each = local.is_route53 ? local.ravion_domain_set : toset([]) + + managed_domain_id = ravion_domain.this[each.value].id + records = [{ + name = ravion_domain.this[each.value].fqdn + type = "ALIAS" + value = jsonencode({ + dns_name = var.ravion_cluster_alb_dns_name + zone_id = var.ravion_cluster_alb_zone_id + }) + }] + depends_on = [aws_route53_record.this_r53] +} + +# ---- 2c. CLOUDFLARE routing records -------------------------------------- +# Cloudflare doesn't do AWS ALIAS records — CNAME at the child FQDN +# pointing at the cluster's ALB is functionally equivalent (each +# service FQDN is a non-apex label under the cluster's wildcard). +resource "cloudflare_dns_record" "this_cf" { + for_each = local.is_cloudflare ? local.ravion_domain_set : toset([]) + + zone_id = local.dns_provider.cloudflare.zone_id + name = ravion_domain.this[each.value].fqdn + type = "CNAME" + content = var.ravion_cluster_alb_dns_name + ttl = 60 + proxied = false +} + +resource "ravion_dns_records" "metadata_cf" { + for_each = local.is_cloudflare ? local.ravion_domain_set : toset([]) + + managed_domain_id = ravion_domain.this[each.value].id + records = [{ + name = ravion_domain.this[each.value].fqdn + type = "CNAME" + value = var.ravion_cluster_alb_dns_name + ttl = 60 + }] + depends_on = [cloudflare_dns_record.this_cf] +} + # 3. Listener rule — host-header match routes each FQDN to this service's # target group on the cluster's HTTPS listener. The cluster cert covers # `*.` so SNI handshake succeeds without an explicit -# aws_lb_listener_certificate attachment. +# aws_lb_listener_certificate attachment. Same rule per variant — +# host-header matching is variant-agnostic. resource "aws_lb_listener_rule" "ravion" { for_each = local.ravion_has_listener ? local.ravion_domain_set : toset([]) diff --git a/compute/ecs_service/variables.tf b/compute/ecs_service/variables.tf index 8b42df7..a7c9879 100644 --- a/compute/ecs_service/variables.tf +++ b/compute/ecs_service/variables.tf @@ -603,15 +603,24 @@ variable "region" { ################################################################################ # Ravion domain control plane # -# When the parent cluster module is configured with ravion_dns_zone_id, -# pass its outputs into the service module via these variables to -# allocate a child FQDN that inherits the cluster's wildcard cert via -# SNI. Set ravion_parent_domain_allocation_id = null/empty to opt out. +# V2: pass the parent cluster's outputs in via ravion_dns_provider_id +# (`module.ecs_cluster.ravion_dns_provider_id`) and the existing +# ravion_parent_domain_allocation_id, ravion_cluster_alb_*, and +# ravion_cluster_https_listener_arn knobs. The data source in data.tf +# resolves the provider's discriminated config so the routing-record +# write path (Route53 vs Cloudflare vs metadata-only) picks the right +# variant. ################################################################################ -variable "ravion_dns_zone_id" { +variable "ravion_dns_provider_id" { type = string - description = "Ravion DnsZone id (dzn_*) the allocation lives under. Same value as the cluster's ravion_dns_zone_id." + description = "Opaque Ravion DnsProvider id (`dnsprov_*`) the service's child allocation lives under. Same value as the cluster's `ravion_dns_provider_id` output. Provide EITHER this or ravion_dns_provider_given_id; if both are set, this wins." + default = null +} + +variable "ravion_dns_provider_given_id" { + type = string + description = "Per-org stable identifier for the Ravion DnsProvider — same dual-lookup as ravion_dns_provider_id. Use this when modules reference providers by stable name across orgs." default = null } diff --git a/compute/ecs_service/versions.tf b/compute/ecs_service/versions.tf index 33c6394..6f390ac 100644 --- a/compute/ecs_service/versions.tf +++ b/compute/ecs_service/versions.tf @@ -12,9 +12,18 @@ terraform { source = "hashicorp/aws" version = ">= 6.0" } + # Bumped to v2 — see compute/ecs_cluster/versions.tf for rationale. ravion = { source = "ravion.com/ravion/domains" - version = ">= 1.0.0" + version = ">= 2.0.0" + } + # Cloudflare provider for per-service CNAMEs when the parent + # cluster's DnsProvider is CLOUDFLARE. Count-gated in + # ravion_domains.tf — no cloudflare_record resources plan when + # the provider variant is different. + cloudflare = { + source = "cloudflare/cloudflare" + version = ">= 4.0" } } } From ac13e1fd5c65265c3e40451a42d4365028ed1060 Mon Sep 17 00:00:00 2001 From: Siddharth Suresh Date: Thu, 21 May 2026 02:48:28 +0530 Subject: [PATCH 08/37] feat(ecs_service): per-service cert groups (additive to cluster wildcard) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The cluster wildcard cert covers every FQDN under the cluster apex via SNI, but services that need FQDNs outside that apex (multi-zone setups, vanity hostnames) had no way to get their own cert. Adds opt-in certificate groups. variables.tf: Adds var.ravion_certificate_groups — list of objects carrying: name — stable per-service identifier dns_provider_id (opt) — per-group provider override, falls back dns_provider_given_id — to the service-level provider when null domains — up to 10 slugs (ACM default SAN limit; validated; per-domain length + charset) Group names + domain slugs are validated; duplicate group names and groups with > 10 domains are rejected at plan time. data.tf: Per-group ravion_dns_provider lookup so each group can target a different zone than the cluster. Falls back to the service-level provider when the group's overrides are null. ravion_domains.tf: One ACM cert per group (primary + SANs derived from ravion_domain[group/slug].fqdn). Per-variant validation writers (route53_ravion / route53 / cloudflare) keyed by precomputed per-variant subsets of the (group, domain) pair map. ONE ravion_managed_certificate registration per group, scoped SERVICE. Per-domain routing record + aws_lb_listener_rule with deterministic priorities seeded "g:" to avoid collision with the existing ungrouped ravion_domains priority space. The cluster's HTTPS listener picks up each group's cert via aws_lb_listener_certificate; SNI handshake selects the most specific match at request time. --- compute/ecs_service/data.tf | 23 ++ compute/ecs_service/ravion_domains.tf | 340 ++++++++++++++++++++++++++ compute/ecs_service/variables.tf | 46 ++++ 3 files changed, 409 insertions(+) diff --git a/compute/ecs_service/data.tf b/compute/ecs_service/data.tf index 9f4771c..9ff6e54 100644 --- a/compute/ecs_service/data.tf +++ b/compute/ecs_service/data.tf @@ -26,4 +26,27 @@ data "ravion_dns_provider" "this" { given_id = var.ravion_dns_provider_given_id != null && var.ravion_dns_provider_given_id != "" ? var.ravion_dns_provider_given_id : null } +# Per-cert-group DnsProvider lookups. Each group can target a different +# provider than the service's top-level one (multi-zone setups, e.g. +# acme.com on Cloudflare + app.acme.com on Route53). Falls back to the +# service-level provider when the group doesn't specify its own. +data "ravion_dns_provider" "groups" { + for_each = { for g in var.ravion_certificate_groups : g.name => g } + + id = coalesce( + each.value.dns_provider_id, + var.ravion_dns_provider_id, + "", + ) != "" ? coalesce(each.value.dns_provider_id, var.ravion_dns_provider_id) : null + + given_id = coalesce( + each.value.dns_provider_id, + var.ravion_dns_provider_id, + "", + ) != "" ? null : coalesce( + each.value.dns_provider_given_id, + var.ravion_dns_provider_given_id, + ) +} + diff --git a/compute/ecs_service/ravion_domains.tf b/compute/ecs_service/ravion_domains.tf index 5948e11..2284a83 100644 --- a/compute/ecs_service/ravion_domains.tf +++ b/compute/ecs_service/ravion_domains.tf @@ -161,3 +161,343 @@ resource "aws_lb_listener_rule" "ravion" { tags = var.tags } + +################################################################################ +# Per-service certificate groups +# +# Independent of the cluster wildcard. Each group: +# * Allocates ONE ravion_domain per domain slug under the group's +# resolved DnsProvider (apex derived server-side from the provider). +# * Issues ONE ACM cert covering all the group's FQDNs (primary + +# SANs). Capped at 10 names per group (ACM default; raise the AWS +# quota first if you need more). +# * Writes per-domain validation records via the group's variant +# (route53_ravion / route53 / cloudflare). Customer-owned variants +# write the actual records; Ravion records metadata after-the-fact. +# * Attaches the cert to the cluster's HTTPS listener via +# aws_lb_listener_certificate so SNI handshake finds it. +# * Adds host-header listener rules routing each FQDN to this +# service's target group. +# +# Groups are ADDITIVE to var.ravion_domains — ungrouped slugs still +# inherit the cluster wildcard via SNI as before. +################################################################################ + +locals { + # Flatten (group, domain) into a single map for nested for_each + # against per-domain resources. The key is "/" so two + # groups can use the same slug without collision. + group_domain_pairs = merge([ + for g in var.ravion_certificate_groups : { + for d in g.domains : "${g.name}/${d}" => { + group_name = g.name + slug = d + } + } + ]...) + + # Per-group resolved provider record, indexed by group name. Used to + # dispatch the per-variant validation + routing writes. + group_providers = { + for g in var.ravion_certificate_groups : + g.name => data.ravion_dns_provider.groups[g.name] + } + + # Per-(group, domain) deterministic listener-rule priority. Layered + # on top of the existing ravion_priority_for_domain offset so group + # rules and ungrouped-domain rules don't collide. Uses a different + # hash seed ("g:") to avoid accidental overlap when the same slug + # appears in both var.ravion_domains and a group. + group_priority_for_pair = { + for k, pair in local.group_domain_pairs : + k => (parseint(substr(sha256("g:${var.name}:${pair.group_name}:${pair.slug}"), 0, 4), 16) % 49000) + 1000 + } +} + +# 1. Per-domain allocations under the group's provider. +resource "ravion_domain" "group" { + for_each = local.group_domain_pairs + + dns_provider_id = local.group_providers[each.value.group_name].id + slug = each.value.slug +} + +# 2. ONE ACM cert per group (primary + SANs). Customer's AWS account, +# applied by their TF runner. The first FQDN is the primary name; +# the rest become subject_alternative_names. +resource "aws_acm_certificate" "group" { + for_each = { for g in var.ravion_certificate_groups : g.name => g } + + domain_name = ravion_domain.group["${each.key}/${each.value.domains[0]}"].fqdn + subject_alternative_names = [ + for d in slice(each.value.domains, 1, length(each.value.domains)) : + ravion_domain.group["${each.key}/${d}"].fqdn + ] + validation_method = "DNS" + + lifecycle { + create_before_destroy = true + } + + tags = merge(var.tags, { + "ravion:cert_group" = each.key + }) +} + +# Helper local: per-(group, domain) validation options, flattened the +# same way as group_domain_pairs so a nested for_each can fan out to +# concrete writer resources without TF map-key gymnastics. +locals { + group_validation_pairs = merge([ + for g in var.ravion_certificate_groups : { + for opt in aws_acm_certificate.group[g.name].domain_validation_options : + "${g.name}/${opt.domain_name}" => { + group_name = g.name + domain_key = "${g.name}/${opt.domain_name}" + opt = opt + provider = local.group_providers[g.name] + } + } + ]...) + + # Per-variant subsets used as for_each. Switching on a member + # attribute would force unknown-at-plan-time iteration; precomputing + # the keyed subsets keeps plans deterministic. + group_validation_pairs_route53_ravion = { + for k, v in local.group_validation_pairs : k => v + if v.provider.route53_ravion != null + } + group_validation_pairs_route53 = { + for k, v in local.group_validation_pairs : k => v + if v.provider.route53 != null + } + group_validation_pairs_cloudflare = { + for k, v in local.group_validation_pairs : k => v + if v.provider.cloudflare != null + } +} + +# 3a. ROUTE53_RAVION validation — Ravion's Route53 inline write. +resource "ravion_dns_records" "group_validation_ravion" { + for_each = local.group_validation_pairs_route53_ravion + + managed_domain_id = ravion_domain.group[each.value.domain_key].id + records = [{ + name = each.value.opt.resource_record_name + type = each.value.opt.resource_record_type + value = each.value.opt.resource_record_value + ttl = 60 + }] +} + +# 3b. ROUTE53 (customer) validation — customer's AWS write + Ravion metadata. +resource "aws_route53_record" "group_validation_r53" { + for_each = local.group_validation_pairs_route53 + + zone_id = each.value.provider.route53.hosted_zone_id + name = each.value.opt.resource_record_name + type = each.value.opt.resource_record_type + records = [each.value.opt.resource_record_value] + ttl = 60 +} + +resource "ravion_dns_records" "group_validation_metadata_r53" { + for_each = local.group_validation_pairs_route53 + + managed_domain_id = ravion_domain.group[each.value.domain_key].id + records = [{ + name = each.value.opt.resource_record_name + type = each.value.opt.resource_record_type + value = each.value.opt.resource_record_value + ttl = 60 + }] + depends_on = [aws_route53_record.group_validation_r53] +} + +# 3c. CLOUDFLARE validation — customer's CF write + Ravion metadata. +resource "cloudflare_dns_record" "group_validation_cf" { + for_each = local.group_validation_pairs_cloudflare + + zone_id = each.value.provider.cloudflare.zone_id + name = trimsuffix(each.value.opt.resource_record_name, ".") + type = each.value.opt.resource_record_type + content = trimsuffix(each.value.opt.resource_record_value, ".") + ttl = 60 + proxied = false +} + +resource "ravion_dns_records" "group_validation_metadata_cf" { + for_each = local.group_validation_pairs_cloudflare + + managed_domain_id = ravion_domain.group[each.value.domain_key].id + records = [{ + name = each.value.opt.resource_record_name + type = each.value.opt.resource_record_type + value = each.value.opt.resource_record_value + ttl = 60 + }] + depends_on = [cloudflare_dns_record.group_validation_cf] +} + +# 4. Cert validation — collects validation FQDNs from whichever writer +# handled each domain in the group, waits for ACM. +resource "aws_acm_certificate_validation" "group" { + for_each = { for g in var.ravion_certificate_groups : g.name => g } + + certificate_arn = aws_acm_certificate.group[each.key].arn + validation_record_fqdns = concat( + [ + for k, v in local.group_validation_pairs_route53_ravion : ravion_dns_records.group_validation_ravion[k].fqdns[0] + if v.group_name == each.key + ], + [ + for k, v in local.group_validation_pairs_route53 : ravion_dns_records.group_validation_metadata_r53[k].fqdns[0] + if v.group_name == each.key + ], + [ + for k, v in local.group_validation_pairs_cloudflare : ravion_dns_records.group_validation_metadata_cf[k].fqdns[0] + if v.group_name == each.key + ], + ) +} + +# 5. Register cert metadata at Ravion (one per group). +resource "ravion_managed_certificate" "group" { + for_each = { for g in var.ravion_certificate_groups : g.name => g } + + cert_arn = aws_acm_certificate_validation.group[each.key].certificate_arn + status = "ISSUED" + scope = "SERVICE" + managed_domain_ids = [ + for d in each.value.domains : + ravion_domain.group["${each.key}/${d}"].managed_domain_id + ] + issued_at = aws_acm_certificate.group[each.key].not_before + expires_at = aws_acm_certificate.group[each.key].not_after +} + +# 6. Attach each group's cert to the cluster's HTTPS listener as an +# SNI cert. The cluster already owns the default cert (wildcard), +# so these are additive — SNI handshake picks the most specific +# match. +resource "aws_lb_listener_certificate" "group" { + for_each = local.ravion_has_listener ? { for g in var.ravion_certificate_groups : g.name => g } : {} + + listener_arn = var.ravion_cluster_https_listener_arn + certificate_arn = aws_acm_certificate_validation.group[each.key].certificate_arn +} + +# 7a. Per-domain ROUTE53_RAVION routing record. +resource "ravion_dns_records" "group_routing_ravion" { + for_each = { + for k, v in local.group_domain_pairs : k => v + if local.group_providers[v.group_name].route53_ravion != null + } + + managed_domain_id = ravion_domain.group[each.key].id + records = [{ + name = ravion_domain.group[each.key].fqdn + type = "ALIAS" + value = jsonencode({ + dns_name = var.ravion_cluster_alb_dns_name + zone_id = var.ravion_cluster_alb_zone_id + }) + }] +} + +# 7b. Per-domain ROUTE53 (customer) routing record. +resource "aws_route53_record" "group_routing_r53" { + for_each = { + for k, v in local.group_domain_pairs : k => v + if local.group_providers[v.group_name].route53 != null + } + + zone_id = local.group_providers[each.value.group_name].route53.hosted_zone_id + name = ravion_domain.group[each.key].fqdn + type = "A" + + alias { + name = var.ravion_cluster_alb_dns_name + zone_id = var.ravion_cluster_alb_zone_id + evaluate_target_health = true + } +} + +resource "ravion_dns_records" "group_routing_metadata_r53" { + for_each = { + for k, v in local.group_domain_pairs : k => v + if local.group_providers[v.group_name].route53 != null + } + + managed_domain_id = ravion_domain.group[each.key].id + records = [{ + name = ravion_domain.group[each.key].fqdn + type = "ALIAS" + value = jsonencode({ + dns_name = var.ravion_cluster_alb_dns_name + zone_id = var.ravion_cluster_alb_zone_id + }) + }] + depends_on = [aws_route53_record.group_routing_r53] +} + +# 7c. Per-domain CLOUDFLARE routing record. +resource "cloudflare_dns_record" "group_routing_cf" { + for_each = { + for k, v in local.group_domain_pairs : k => v + if local.group_providers[v.group_name].cloudflare != null + } + + zone_id = local.group_providers[each.value.group_name].cloudflare.zone_id + name = ravion_domain.group[each.key].fqdn + type = "CNAME" + content = var.ravion_cluster_alb_dns_name + ttl = 60 + proxied = false +} + +resource "ravion_dns_records" "group_routing_metadata_cf" { + for_each = { + for k, v in local.group_domain_pairs : k => v + if local.group_providers[v.group_name].cloudflare != null + } + + managed_domain_id = ravion_domain.group[each.key].id + records = [{ + name = ravion_domain.group[each.key].fqdn + type = "CNAME" + value = var.ravion_cluster_alb_dns_name + ttl = 60 + }] + depends_on = [cloudflare_dns_record.group_routing_cf] +} + +# 8. Per-domain host-header listener rules pointing at this service's +# target group. Variant-agnostic — once the cert is attached + the +# DNS routing record points at the ALB, the listener rule does the +# HTTP-level routing. +resource "aws_lb_listener_rule" "group" { + for_each = local.ravion_has_listener ? local.group_domain_pairs : {} + + listener_arn = var.ravion_cluster_https_listener_arn + priority = local.group_priority_for_pair[each.key] + + condition { + host_header { + values = [ravion_domain.group[each.key].fqdn] + } + } + + action { + type = "forward" + target_group_arn = aws_lb_target_group.this[0].arn + } + + lifecycle { + ignore_changes = [action] + } + + tags = merge(var.tags, { + "ravion:cert_group" = each.value.group_name + }) +} diff --git a/compute/ecs_service/variables.tf b/compute/ecs_service/variables.tf index a7c9879..d8e37d9 100644 --- a/compute/ecs_service/variables.tf +++ b/compute/ecs_service/variables.tf @@ -670,3 +670,49 @@ variable "ravion_listener_rule_priority_base" { description = "Base for the per-domain listener-rule priority. 0 → derived deterministically from (var.name, domain-slug) via sha256 so two services in the same cluster don't collide. Non-zero values are used as-is for the first domain; subsequent domains increment by 1." default = 0 } + +variable "ravion_certificate_groups" { + type = list(object({ + # Group name — used as a stable identifier in TF state keys + # (ravion_domain.group[\"/\"]) and as a tag on AWS + # resources. Pick something short + url-safe per group; never + # reuse across groups in the same service. + name = string + + # Per-group DnsProvider override. Either id or given_id wins + # (id first if both set). Leave both null to inherit from the + # service's top-level ravion_dns_provider_* vars. + dns_provider_id = optional(string) + dns_provider_given_id = optional(string) + + # Domain slugs to cover with this cert. Each becomes a child + # ravion_domain allocation under the group's provider apex (FQDN + # derived as -.). Capped at 10 to match ACM's + # default cert SAN limit — increase only after raising the AWS + # account quota. + domains = list(string) + })) + description = "Per-service certificate groups. Each group issues ONE ACM cert covering up to 10 domains, validated via the group's DnsProvider variant, and attached as an SNI cert to the cluster's HTTPS listener. Use when service FQDNs need their own cert (multi-zone setups, non-wildcard apexes) instead of inheriting the cluster's wildcard. Groups are additive — ungrouped ravion_domains keep inheriting the cluster wildcard." + default = [] + + validation { + condition = alltrue([for g in var.ravion_certificate_groups : length(g.domains) <= 10]) + error_message = "Each certificate group can contain at most 10 domains (ACM default SAN limit)." + } + validation { + condition = alltrue([for g in var.ravion_certificate_groups : length(g.domains) >= 1]) + error_message = "Each certificate group must contain at least 1 domain." + } + validation { + condition = length(distinct([for g in var.ravion_certificate_groups : g.name])) == length(var.ravion_certificate_groups) + error_message = "Certificate group names must be unique within a service." + } + validation { + condition = alltrue([ + for g in var.ravion_certificate_groups : alltrue([ + for d in g.domains : can(regex("^[a-z0-9]([a-z0-9-]*[a-z0-9])?$", d)) && length(d) <= 63 + ]) + ]) + error_message = "Each domain slug must match ^[a-z0-9]([a-z0-9-]*[a-z0-9])?$ and be <= 63 chars." + } +} From b61ee3c2d5c1960f8af053bcaabc7d7d68c98cac Mon Sep 17 00:00:00 2001 From: Siddharth Suresh Date: Thu, 21 May 2026 02:49:01 +0530 Subject: [PATCH 09/37] refactor: clean up comments and improve readability in provider and domain files --- compute/autoscaling/variables.tf | 244 +++++++++++++------------- compute/ecs_cluster/provider.tf | 5 +- compute/ecs_cluster/ravion_domains.tf | 7 +- compute/ecs_cluster/versions.tf | 5 +- compute/ecs_service/provider.tf | 2 - 5 files changed, 127 insertions(+), 136 deletions(-) diff --git a/compute/autoscaling/variables.tf b/compute/autoscaling/variables.tf index edc1337..169a504 100644 --- a/compute/autoscaling/variables.tf +++ b/compute/autoscaling/variables.tf @@ -595,42 +595,42 @@ variable "mixed_instances_policy" { validation { condition = try( -var.mixed_instances_policy == null || ( - var.mixed_instances_policy.instances_distribution == null || - contains(["prioritized", "lowest-price"], coalesce(var.mixed_instances_policy.instances_distribution.on_demand_allocation_strategy, "prioritized")) - ) + var.mixed_instances_policy == null || ( + var.mixed_instances_policy.instances_distribution == null || + contains(["prioritized", "lowest-price"], coalesce(var.mixed_instances_policy.instances_distribution.on_demand_allocation_strategy, "prioritized")) + ) , true) error_message = "The on_demand_allocation_strategy must be 'prioritized' or 'lowest-price'." } validation { condition = try( -var.mixed_instances_policy == null || ( - var.mixed_instances_policy.instances_distribution == null || - contains(["capacity-optimized", "capacity-optimized-prioritized", "lowest-price", "price-capacity-optimized"], coalesce(var.mixed_instances_policy.instances_distribution.spot_allocation_strategy, "capacity-optimized")) - ) + var.mixed_instances_policy == null || ( + var.mixed_instances_policy.instances_distribution == null || + contains(["capacity-optimized", "capacity-optimized-prioritized", "lowest-price", "price-capacity-optimized"], coalesce(var.mixed_instances_policy.instances_distribution.spot_allocation_strategy, "capacity-optimized")) + ) , true) error_message = "The spot_allocation_strategy must be 'capacity-optimized', 'capacity-optimized-prioritized', 'lowest-price', or 'price-capacity-optimized'." } validation { condition = try( -var.mixed_instances_policy == null || ( - var.mixed_instances_policy.instances_distribution == null || ( - coalesce(var.mixed_instances_policy.instances_distribution.on_demand_percentage_above_base_capacity, 100) >= 0 && - coalesce(var.mixed_instances_policy.instances_distribution.on_demand_percentage_above_base_capacity, 100) <= 100 + var.mixed_instances_policy == null || ( + var.mixed_instances_policy.instances_distribution == null || ( + coalesce(var.mixed_instances_policy.instances_distribution.on_demand_percentage_above_base_capacity, 100) >= 0 && + coalesce(var.mixed_instances_policy.instances_distribution.on_demand_percentage_above_base_capacity, 100) <= 100 + ) ) - ) , true) error_message = "The on_demand_percentage_above_base_capacity must be between 0 and 100." } validation { condition = try( -var.mixed_instances_policy == null || ( - var.mixed_instances_policy.instances_distribution == null || - coalesce(var.mixed_instances_policy.instances_distribution.on_demand_base_capacity, 0) >= 0 - ) + var.mixed_instances_policy == null || ( + var.mixed_instances_policy.instances_distribution == null || + coalesce(var.mixed_instances_policy.instances_distribution.on_demand_base_capacity, 0) >= 0 + ) , true) error_message = "The on_demand_base_capacity must be 0 or greater." } @@ -687,57 +687,57 @@ variable "instance_refresh" { validation { condition = try( -var.instance_refresh == null || ( - contains(["Rolling"], coalesce(var.instance_refresh.strategy, "Rolling")) - ) + var.instance_refresh == null || ( + contains(["Rolling"], coalesce(var.instance_refresh.strategy, "Rolling")) + ) , true) error_message = "The instance_refresh strategy must be 'Rolling'." } validation { condition = try( -var.instance_refresh == null || var.instance_refresh.preferences == null || ( - coalesce(var.instance_refresh.preferences.min_healthy_percentage, 90) >= 0 && - coalesce(var.instance_refresh.preferences.min_healthy_percentage, 90) <= 100 - ) + var.instance_refresh == null || var.instance_refresh.preferences == null || ( + coalesce(var.instance_refresh.preferences.min_healthy_percentage, 90) >= 0 && + coalesce(var.instance_refresh.preferences.min_healthy_percentage, 90) <= 100 + ) , true) error_message = "The min_healthy_percentage must be between 0 and 100." } validation { condition = try( -var.instance_refresh == null || var.instance_refresh.preferences == null || ( - coalesce(var.instance_refresh.preferences.max_healthy_percentage, 100) >= 100 && - coalesce(var.instance_refresh.preferences.max_healthy_percentage, 100) <= 200 - ) + var.instance_refresh == null || var.instance_refresh.preferences == null || ( + coalesce(var.instance_refresh.preferences.max_healthy_percentage, 100) >= 100 && + coalesce(var.instance_refresh.preferences.max_healthy_percentage, 100) <= 200 + ) , true) error_message = "The max_healthy_percentage must be between 100 and 200." } validation { condition = try( -var.instance_refresh == null || var.instance_refresh.preferences == null || ( - contains(["Refresh", "Ignore", "Wait"], coalesce(var.instance_refresh.preferences.scale_in_protected_instances, "Ignore")) - ) + var.instance_refresh == null || var.instance_refresh.preferences == null || ( + contains(["Refresh", "Ignore", "Wait"], coalesce(var.instance_refresh.preferences.scale_in_protected_instances, "Ignore")) + ) , true) error_message = "The scale_in_protected_instances must be 'Refresh', 'Ignore', or 'Wait'." } validation { condition = try( -var.instance_refresh == null || var.instance_refresh.preferences == null || ( - contains(["Terminate", "Ignore", "Wait"], coalesce(var.instance_refresh.preferences.standby_instances, "Ignore")) - ) + var.instance_refresh == null || var.instance_refresh.preferences == null || ( + contains(["Terminate", "Ignore", "Wait"], coalesce(var.instance_refresh.preferences.standby_instances, "Ignore")) + ) , true) error_message = "The standby_instances must be 'Terminate', 'Ignore', or 'Wait'." } validation { condition = try( -var.instance_refresh == null || var.instance_refresh.preferences == null || ( - var.instance_refresh.preferences.checkpoint_percentages == null || - alltrue([for p in var.instance_refresh.preferences.checkpoint_percentages : p >= 0 && p <= 100]) - ) + var.instance_refresh == null || var.instance_refresh.preferences == null || ( + var.instance_refresh.preferences.checkpoint_percentages == null || + alltrue([for p in var.instance_refresh.preferences.checkpoint_percentages : p >= 0 && p <= 100]) + ) , true) error_message = "All checkpoint_percentages must be between 0 and 100." } @@ -773,28 +773,28 @@ variable "warm_pool" { validation { condition = try( -var.warm_pool == null || ( - contains(["Stopped", "Running", "Hibernated"], coalesce(var.warm_pool.pool_state, "Stopped")) - ) + var.warm_pool == null || ( + contains(["Stopped", "Running", "Hibernated"], coalesce(var.warm_pool.pool_state, "Stopped")) + ) , true) error_message = "The pool_state must be 'Stopped', 'Running', or 'Hibernated'." } validation { condition = try( -var.warm_pool == null || ( - coalesce(var.warm_pool.min_size, 0) >= 0 - ) + var.warm_pool == null || ( + coalesce(var.warm_pool.min_size, 0) >= 0 + ) , true) error_message = "The warm_pool min_size must be 0 or greater." } validation { condition = try( -var.warm_pool == null || ( - var.warm_pool.max_group_prepared_capacity == null || - var.warm_pool.max_group_prepared_capacity >= 0 - ) + var.warm_pool == null || ( + var.warm_pool.max_group_prepared_capacity == null || + var.warm_pool.max_group_prepared_capacity >= 0 + ) , true) error_message = "The max_group_prepared_capacity must be null or 0 or greater." } @@ -861,20 +861,20 @@ variable "lifecycle_hooks" { validation { condition = try( -alltrue([ - for hook in var.lifecycle_hooks : - hook.notification_target_arn == null || can(regex("^arn:aws:(sns|sqs):", hook.notification_target_arn)) - ]) + alltrue([ + for hook in var.lifecycle_hooks : + hook.notification_target_arn == null || can(regex("^arn:aws:(sns|sqs):", hook.notification_target_arn)) + ]) , true) error_message = "Each notification_target_arn must be null or a valid SNS topic or SQS queue ARN." } validation { condition = try( -alltrue([ - for hook in var.lifecycle_hooks : - hook.role_arn == null || can(regex("^arn:aws:iam::", hook.role_arn)) - ]) + alltrue([ + for hook in var.lifecycle_hooks : + hook.role_arn == null || can(regex("^arn:aws:iam::", hook.role_arn)) + ]) , true) error_message = "Each role_arn must be null or a valid IAM role ARN." } @@ -1118,81 +1118,81 @@ variable "scaling_policies" { validation { condition = try( -alltrue([ - for policy in var.scaling_policies : - policy.adjustment_type == null || contains(["ChangeInCapacity", "ExactCapacity", "PercentChangeInCapacity"], policy.adjustment_type) - ]) + alltrue([ + for policy in var.scaling_policies : + policy.adjustment_type == null || contains(["ChangeInCapacity", "ExactCapacity", "PercentChangeInCapacity"], policy.adjustment_type) + ]) , true) error_message = "Each adjustment_type must be 'ChangeInCapacity', 'ExactCapacity', or 'PercentChangeInCapacity'." } validation { condition = try( -alltrue([ - for policy in var.scaling_policies : - policy.metric_aggregation_type == null || contains(["Minimum", "Maximum", "Average"], policy.metric_aggregation_type) - ]) + alltrue([ + for policy in var.scaling_policies : + policy.metric_aggregation_type == null || contains(["Minimum", "Maximum", "Average"], policy.metric_aggregation_type) + ]) , true) error_message = "Each metric_aggregation_type must be 'Minimum', 'Maximum', or 'Average'." } validation { condition = try( -alltrue([ - for policy in var.scaling_policies : - policy.target_tracking_configuration == null || - policy.target_tracking_configuration.predefined_metric_specification == null || - contains( - ["ASGAverageCPUUtilization", "ASGAverageNetworkIn", "ASGAverageNetworkOut", "ALBRequestCountPerTarget"], - policy.target_tracking_configuration.predefined_metric_specification.predefined_metric_type - ) - ]) + alltrue([ + for policy in var.scaling_policies : + policy.target_tracking_configuration == null || + policy.target_tracking_configuration.predefined_metric_specification == null || + contains( + ["ASGAverageCPUUtilization", "ASGAverageNetworkIn", "ASGAverageNetworkOut", "ALBRequestCountPerTarget"], + policy.target_tracking_configuration.predefined_metric_specification.predefined_metric_type + ) + ]) , true) error_message = "Each predefined_metric_type for target tracking must be 'ASGAverageCPUUtilization', 'ASGAverageNetworkIn', 'ASGAverageNetworkOut', or 'ALBRequestCountPerTarget'." } validation { condition = try( -alltrue([ - for policy in var.scaling_policies : - policy.predictive_scaling_configuration == null || - contains(["ForecastAndScale", "ForecastOnly"], coalesce(policy.predictive_scaling_configuration.mode, "ForecastOnly")) - ]) + alltrue([ + for policy in var.scaling_policies : + policy.predictive_scaling_configuration == null || + contains(["ForecastAndScale", "ForecastOnly"], coalesce(policy.predictive_scaling_configuration.mode, "ForecastOnly")) + ]) , true) error_message = "Each predictive scaling mode must be 'ForecastAndScale' or 'ForecastOnly'." } validation { condition = try( -alltrue([ - for policy in var.scaling_policies : - policy.predictive_scaling_configuration == null || - contains(["IncreaseMaxCapacity", "HonorMaxCapacity"], coalesce(policy.predictive_scaling_configuration.max_capacity_breach_behavior, "HonorMaxCapacity")) - ]) + alltrue([ + for policy in var.scaling_policies : + policy.predictive_scaling_configuration == null || + contains(["IncreaseMaxCapacity", "HonorMaxCapacity"], coalesce(policy.predictive_scaling_configuration.max_capacity_breach_behavior, "HonorMaxCapacity")) + ]) , true) error_message = "Each max_capacity_breach_behavior must be 'IncreaseMaxCapacity' or 'HonorMaxCapacity'." } validation { condition = try( -alltrue([ - for policy in var.scaling_policies : - policy.predictive_scaling_configuration == null || - policy.predictive_scaling_configuration.scheduling_buffer_time == null || - (policy.predictive_scaling_configuration.scheduling_buffer_time >= 0 && policy.predictive_scaling_configuration.scheduling_buffer_time <= 3600) - ]) + alltrue([ + for policy in var.scaling_policies : + policy.predictive_scaling_configuration == null || + policy.predictive_scaling_configuration.scheduling_buffer_time == null || + (policy.predictive_scaling_configuration.scheduling_buffer_time >= 0 && policy.predictive_scaling_configuration.scheduling_buffer_time <= 3600) + ]) , true) error_message = "Each scheduling_buffer_time must be between 0 and 3600 seconds." } validation { condition = try( -alltrue([ - for policy in var.scaling_policies : - policy.predictive_scaling_configuration == null || - policy.predictive_scaling_configuration.max_capacity_buffer == null || - (policy.predictive_scaling_configuration.max_capacity_buffer >= 0 && policy.predictive_scaling_configuration.max_capacity_buffer <= 100) - ]) + alltrue([ + for policy in var.scaling_policies : + policy.predictive_scaling_configuration == null || + policy.predictive_scaling_configuration.max_capacity_buffer == null || + (policy.predictive_scaling_configuration.max_capacity_buffer >= 0 && policy.predictive_scaling_configuration.max_capacity_buffer <= 100) + ]) , true) error_message = "Each max_capacity_buffer must be between 0 and 100." } @@ -1226,27 +1226,27 @@ variable "notifications" { validation { condition = try( -var.notifications == null || ( - can(regex("^arn:aws:sns:", var.notifications.topic_arn)) - ) + var.notifications == null || ( + can(regex("^arn:aws:sns:", var.notifications.topic_arn)) + ) , true) error_message = "The topic_arn must be a valid SNS topic ARN starting with 'arn:aws:sns:'." } validation { condition = try( -var.notifications == null || ( - alltrue([ - for notification in coalesce(var.notifications.notifications, []) : - contains([ - "autoscaling:EC2_INSTANCE_LAUNCH", - "autoscaling:EC2_INSTANCE_LAUNCH_ERROR", - "autoscaling:EC2_INSTANCE_TERMINATE", - "autoscaling:EC2_INSTANCE_TERMINATE_ERROR", - "autoscaling:TEST_NOTIFICATION" - ], notification) - ]) - ) + var.notifications == null || ( + alltrue([ + for notification in coalesce(var.notifications.notifications, []) : + contains([ + "autoscaling:EC2_INSTANCE_LAUNCH", + "autoscaling:EC2_INSTANCE_LAUNCH_ERROR", + "autoscaling:EC2_INSTANCE_TERMINATE", + "autoscaling:EC2_INSTANCE_TERMINATE_ERROR", + "autoscaling:TEST_NOTIFICATION" + ], notification) + ]) + ) , true) error_message = "Each notification must be one of: 'autoscaling:EC2_INSTANCE_LAUNCH', 'autoscaling:EC2_INSTANCE_LAUNCH_ERROR', 'autoscaling:EC2_INSTANCE_TERMINATE', 'autoscaling:EC2_INSTANCE_TERMINATE_ERROR', 'autoscaling:TEST_NOTIFICATION'." } @@ -1343,30 +1343,30 @@ variable "schedules" { validation { condition = try( -alltrue([ - for schedule in var.schedules : - schedule.min_size == null || schedule.min_size >= 0 - ]) + alltrue([ + for schedule in var.schedules : + schedule.min_size == null || schedule.min_size >= 0 + ]) , true) error_message = "Each schedule min_size must be null or 0 or greater." } validation { condition = try( -alltrue([ - for schedule in var.schedules : - schedule.max_size == null || schedule.max_size >= 1 - ]) + alltrue([ + for schedule in var.schedules : + schedule.max_size == null || schedule.max_size >= 1 + ]) , true) error_message = "Each schedule max_size must be null or at least 1." } validation { condition = try( -alltrue([ - for schedule in var.schedules : - schedule.desired_capacity == null || schedule.desired_capacity >= 0 - ]) + alltrue([ + for schedule in var.schedules : + schedule.desired_capacity == null || schedule.desired_capacity >= 0 + ]) , true) error_message = "Each schedule desired_capacity must be null or 0 or greater." } diff --git a/compute/ecs_cluster/provider.tf b/compute/ecs_cluster/provider.tf index 781fe53..e13fd69 100644 --- a/compute/ecs_cluster/provider.tf +++ b/compute/ecs_cluster/provider.tf @@ -5,10 +5,7 @@ provider "aws" { # Cloudflare provider — used only when the registered DnsProvider is # CLOUDFLARE (count gating on `data.ravion_dns_provider.this[0].cloudflare` # in ravion_domains.tf decides whether any `cloudflare_record` resources -# are actually planned). The api_token attribute is the plaintext -# token Ravion's data source dereferences from WorkOS Vault -# server-side — the token never lands in HCL or TF state in -# unencrypted form because the schema marks it Sensitive. +# are actually planned). # # When the DnsProvider is anything other than CLOUDFLARE the data # source's cloudflare attribute is null; the provider config still diff --git a/compute/ecs_cluster/ravion_domains.tf b/compute/ecs_cluster/ravion_domains.tf index 5d8186f..082a58b 100644 --- a/compute/ecs_cluster/ravion_domains.tf +++ b/compute/ecs_cluster/ravion_domains.tf @@ -15,9 +15,8 @@ # persists metadata via `ravion_dns_records` after- # the-fact (depends_on). # CLOUDFLARE → Customer's Cloudflare zone. `cloudflare_dns_record` -# writes the record using the api_token sourced -# from WorkOS Vault via the data source; Ravion -# metadata after-the-fact. +# writes the record using the api_token + # EXTERNAL → Skipped — module assumes BYO cert in this mode. # # All AWS / Cloudflare resources live in the customer's accounts, @@ -100,7 +99,7 @@ resource "ravion_dns_records" "cluster_validation_metadata_r53" { # ---- 3c. CLOUDFLARE validation records ------------------------------------- # Customer's Cloudflare zone. The cloudflare provider's api_token is -# resolved from WorkOS Vault by data.ravion_dns_provider — see +# resolved by data.ravion_dns_provider — see # provider.tf for the provider block. resource "cloudflare_dns_record" "cluster_validation_cf" { for_each = local.is_cloudflare ? { diff --git a/compute/ecs_cluster/versions.tf b/compute/ecs_cluster/versions.tf index 627d35e..c91c663 100644 --- a/compute/ecs_cluster/versions.tf +++ b/compute/ecs_cluster/versions.tf @@ -24,10 +24,7 @@ terraform { # is CLOUDFLARE — the customer's TF writes acme validation + # apex routing records via `cloudflare_record`, and Ravion # records them after-the-fact via `ravion_dns_records` for the - # UI. Provider config below reads `data.ravion_dns_provider`'s - # cloudflare attribute group; api_token is sourced from - # WorkOS Vault server-side and returned to the runner as a - # sensitive computed attribute. + # UI. cloudflare = { source = "cloudflare/cloudflare" version = ">= 4.0" diff --git a/compute/ecs_service/provider.tf b/compute/ecs_service/provider.tf index d5ee066..184c1cf 100644 --- a/compute/ecs_service/provider.tf +++ b/compute/ecs_service/provider.tf @@ -5,8 +5,6 @@ provider "aws" { # Cloudflare provider — used only when the parent cluster's # DnsProvider is CLOUDFLARE (count gating on # `data.ravion_dns_provider.this[0].cloudflare` in ravion_domains.tf). -# Same api_token resolution path as the cluster module: WorkOS Vault -# deref via Ravion's data source. provider "cloudflare" { api_token = try(data.ravion_dns_provider.this[0].cloudflare.api_token, null) } From 266f009ed86b74791747ce69067eb228f4c9f8ca Mon Sep 17 00:00:00 2001 From: Siddharth Suresh Date: Thu, 21 May 2026 02:59:24 +0530 Subject: [PATCH 10/37] ecs_cluster: add ravion_dns_provider_given_id form field --- compute/ecs_cluster/module.yml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/compute/ecs_cluster/module.yml b/compute/ecs_cluster/module.yml index 88e946d..f1baed0 100644 --- a/compute/ecs_cluster/module.yml +++ b/compute/ecs_cluster/module.yml @@ -332,6 +332,14 @@ input: enable_public_alb: true public_alb_enable_https: true values: "$values:ravion/dns-providers" + - ravion_dns_provider_given_id: + type: string + label: "DNS Provider (by stable id)" + description: "Per-org stable identifier — same dual-lookup as ravion_dns_provider_id. Use this when module definitions are shared across orgs that reference the same provider by name." + show_when: + enable_public_alb: true + public_alb_enable_https: true + ravion_dns_provider_id: "" - ravion_cluster_slug: type: string label: "FQDN Slug" From fcc3522458c7b74f5954f7a502c6d9102d163ea1 Mon Sep 17 00:00:00 2001 From: Siddharth Suresh Date: Thu, 21 May 2026 03:55:20 +0530 Subject: [PATCH 11/37] feat(ecs): cluster use_ravion_subdomain + service ravion_auto_subdomain MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Auto-mode zero-config URLs: Cluster: var.use_ravion_subdomain (default true). Looks up platform DnsProvider via stable given_id="ravion-platform-apex"; posts fqdn_override="."; wildcard cert covers *... var.module_instance_id is the runner-injected identifier (null in standalone use → slug mode). Service: var.ravion_auto_subdomain (default true) + service_given_id. Allocates one ravion_domain with slug=service_given_id + parent_domain_allocation_id=cluster's allocation. Server derives -.. Per-variant routing record + listener rule for the auto allocation, same dispatch as the per- domain blocks (route53_ravion / route53 / cloudflare). Module form (cluster): use_ravion_subdomain boolean at the top of Managed Domains. ravion_dns_provider_id moved to show_when use_ravion_subdomain=false + sourced from $values:ravion/dns-providers-customer (no platform row in the dropdown — auto-mode handles that). ravion_dns_provider_given_id stays as the secondary input under the same false-gate. --- compute/ecs_cluster/.terraform.lock.hcl | 38 +++---- compute/ecs_cluster/data.tf | 32 +++--- compute/ecs_cluster/locals.tf | 43 ++++++-- compute/ecs_cluster/module.yml | 18 +++- compute/ecs_cluster/ravion_domains.tf | 9 +- compute/ecs_cluster/variables.tf | 16 ++- compute/ecs_service/ravion_domains.tf | 125 ++++++++++++++++++++++++ compute/ecs_service/variables.tf | 12 +++ 8 files changed, 245 insertions(+), 48 deletions(-) diff --git a/compute/ecs_cluster/.terraform.lock.hcl b/compute/ecs_cluster/.terraform.lock.hcl index ef959ee..6d20816 100644 --- a/compute/ecs_cluster/.terraform.lock.hcl +++ b/compute/ecs_cluster/.terraform.lock.hcl @@ -2,10 +2,10 @@ # Manual edits may be lost in future updates. provider "ravion.com/ravion/domains" { - version = "2.0.0" + version = "2.1.0" constraints = ">= 2.0.0" hashes = [ - "h1:OLB6BBvlsZr1SwIdNtZ07ihAJFmjd/qVX8UzJ8xuZcw=", + "h1:Q13F9had825xi67cqiX8nNL1Zrm3BU51/n/dC5zRf3Q=", ] } @@ -27,24 +27,24 @@ provider "registry.opentofu.org/cloudflare/cloudflare" { } provider "registry.opentofu.org/hashicorp/aws" { - version = "6.39.0" + version = "6.46.0" constraints = ">= 6.0.0" hashes = [ - "h1:c9SG8ZdYgzqpxORpTqeLFeXW4qQQ8GMGCcUkU+FAfQM=", - "zh:00a6c0d8b5b86833087e367b632e9ab73fb8db9c43569020ebd0489dc2c919ce", - "zh:05f2b56211f4c8a0b66a093d025187cbc7be086dedef62306f5a28290598ebdc", - "zh:24d97a31d5ab814c33ed32a5b7674f1a15544b2367a95bddd00cfdd8d6b82740", - "zh:258194e24ac07ee194d580ca25a25fa7bc48fa40fed4fd58352b0a64da0da4c9", - "zh:315337e5f0ccafeadf490f117151b52c6d66244bf652f4fee975eddda662af3b", - "zh:38573dd56cca8c0ffe33396cf17cc8bd13de1d27d3c4da4177e485d174f1eaf0", - "zh:4baa806c5eb8faae95cea3f1dfafb153b5e3e96c5b30a2102072da4f032d2d9b", - "zh:4f258106baca7e00a6904b2353579d283e4400a75cd0353a25e057921e8a8d96", - "zh:62e5d4628d03883a6c2a6e3c297eb54df9b5935e9e3a655dbb1c6c5ddaf7ea33", - "zh:8af5fae01c1cef65d149fa6fe47e94cf46ffa97d29e8f2dfe41aeae01da590ea", - "zh:a8240b40f7be408ac24897597a85dc4fe56f390224b11ecad2c1327e686fca58", - "zh:c549eee2a0cf0e2c4a676614d990121b685beab0047b1073407ee26247c4be13", - "zh:cfed074ba8948c75445c74c69722cb17c960024b1917b4f26905aa9c9ac4e667", - "zh:d6f4f4fa01e33d0d546705e2776f38d0b4f2847827b3f07ecde87cc02ef3d23e", - "zh:e7239b349c3149e4670750481b687c5c828908fd09f2196d7af1ac1b4d83e80b", + "h1:VWSIs/K9tDs2X4Ej6fXHfiqlFubplb9ueRmTYmuqYh4=", + "zh:0a1dad9bd82743d63c1e65551e68948d33638dfaebb7f44bd89ec948e33ff975", + "zh:166dde870afc7d1fca056d99eb8af8a0729f54e84467e6e2fa7f0a3bea5982cf", + "zh:41bceb5f5f3aa832e07a4f077371f1309de1cf42621117d144227022d5ebc95c", + "zh:42fae38f09d1c23d8b9bdd12306d7740196837c150891f21517ada314ec40b35", + "zh:8d01f9b87083201fdfef4139068dc6540f9a0e3ac1e394b5c53195c2ac760b59", + "zh:9964ed013c0eb3cd8ff9b501a88e6a823b42655c4354a0f37e6f764bc51ab1fe", + "zh:a153cbb1cb57288955ba3db2e25368931afda529bab1afd5eda361fc368378e3", + "zh:a3bd14a333791ed26e60921d1c348ab5b9b5aae8fcfc590e40e9d741cdd9a9b4", + "zh:a4d32af825ba8f37d3c0c979a241a132a31332684b19060b0523b92c4dc5337f", + "zh:bc5ff7cf49be2c648d16c96449f0b02fda3e0d3af5e6820d625246a249ff31ee", + "zh:beacec832343dac5d4624d33eae80a44d58ef121c065b56ec220d6bbce2b5011", + "zh:c346a67110f3e17a65d4a46316f062b4af6b88d8fc3d5fb4092a6f0669719602", + "zh:cd0b4f44186a0eb2bd3cc59e4a55b5a363c9ac3303b23d0128171ff851f36336", + "zh:d524bff9f4b567ad19c3d3040ead81aa4108030238155d87a1bb2c797a7870e7", + "zh:ef23733dd0c7d40a4f3ccc761c4433a265324fa393aa4ae6015942b31a13d30e", ] } diff --git a/compute/ecs_cluster/data.tf b/compute/ecs_cluster/data.tf index d201bf8..96772f6 100644 --- a/compute/ecs_cluster/data.tf +++ b/compute/ecs_cluster/data.tf @@ -15,18 +15,26 @@ data "aws_region" "current" {} # Get current AWS account ID data "aws_caller_identity" "current" {} -# Resolve the registered Ravion DnsProvider that the cluster's -# wildcard FQDN + cert hang off. Accepts an opaque id -# (`ravion_dns_provider_id = "dnsprov_..."`) OR a per-org stable -# `given_id` — the api-go handler does a dual lookup. Exactly one of -# the per-variant attribute groups (`route53_ravion`, `route53`, -# `cloudflare`, `external`) is non-null on the returned row; the count -# gating in ravion_domains.tf dispatches on those. +# Resolve the cluster's DnsProvider: # -# The count = 0 branch (no provider configured) is the BYO-cert path — -# `var.public_alb_certificate_arns` is consumed directly. +# - Auto-mode (use_ravion_subdomain = true): look up the +# platform-managed apex by its stable givenId. The api-go boot +# seeds this row with `givenId = "ravion-platform-apex"` so any +# Ravion deployment can reference it without knowing the opaque +# dnsprov_* id. +# +# - Customer mode (use_ravion_subdomain = false + caller-supplied +# ravion_dns_provider_id / given_id): standard dual-lookup, same +# as the V2 service modules. +# +# count = 0 path is the BYO-cert escape hatch (no provider configured; +# var.public_alb_certificate_arns is consumed directly). data "ravion_dns_provider" "this" { - count = local.dns_provider_lookup_key == "" ? 0 : 1 - id = var.ravion_dns_provider_id != null && var.ravion_dns_provider_id != "" ? var.ravion_dns_provider_id : null - given_id = var.ravion_dns_provider_given_id != null && var.ravion_dns_provider_given_id != "" ? var.ravion_dns_provider_given_id : null + count = local.enable_dns_provider_lookup ? 1 : 0 + id = local.auto_provider_id + given_id = ( + local.auto_provider_id == null + ? local.auto_provider_given_id + : null + ) } diff --git a/compute/ecs_cluster/locals.tf b/compute/ecs_cluster/locals.tf index 0026b3a..48fa84d 100644 --- a/compute/ecs_cluster/locals.tf +++ b/compute/ecs_cluster/locals.tf @@ -1,31 +1,56 @@ locals { region = coalesce(var.region, data.aws_region.current.id) - # Either input form (id or given_id) drives the lookup. The data - # source's count is gated on this string being non-empty. - dns_provider_lookup_key = coalesce( - var.ravion_dns_provider_id, - var.ravion_dns_provider_given_id, - "", + # Auto-mode resolution. When use_ravion_subdomain is on, the data + # source ignores customer-supplied provider inputs and looks up the + # platform apex by its stable givenId (seeded at api-go boot). When + # off, the customer's provider id/given_id is used. + auto_provider_id = ( + var.use_ravion_subdomain + ? null + : (var.ravion_dns_provider_id != null && var.ravion_dns_provider_id != "" ? var.ravion_dns_provider_id : null) + ) + auto_provider_given_id = ( + var.use_ravion_subdomain + ? "ravion-platform-apex" + : (var.ravion_dns_provider_given_id != null && var.ravion_dns_provider_given_id != "" ? var.ravion_dns_provider_given_id : null) + ) + + # The data source's count is gated on having SOMETHING to look up. + # Auto-mode always has the platform given_id; customer mode requires + # one of the two caller inputs. + enable_dns_provider_lookup = ( + var.use_ravion_subdomain || + local.auto_provider_id != null || + local.auto_provider_given_id != null ) # The resolved DnsProvider row (only present when the data source's # count == 1). Per-variant attribute groups (`route53_ravion`, # `route53`, `cloudflare`, `external`) are how the ravion_domains.tf # blocks dispatch — exactly one is non-null per row. - dns_provider = local.dns_provider_lookup_key != "" ? data.ravion_dns_provider.this[0] : null + dns_provider = local.enable_dns_provider_lookup ? data.ravion_dns_provider.this[0] : null # Ravion-managed domains gate. When true the cluster allocates a # wildcard FQDN + issues a wildcard ACM cert in ravion_domains.tf; # service modules under this cluster inherit the wildcard via SNI. - # Implicit: setting either provider input + enabling HTTPS implies - # "use Ravion-managed cert"; nothing else picks the path. enable_ravion_domain = ( var.enable_public_alb && var.public_alb_enable_https && local.dns_provider != null ) + # Auto-mode fqdnOverride: literal `.`. The + # cluster's wildcard cert covers `*..` so + # services under it inherit via SNI. Falls back to a placeholder + # when module_instance_id is null (standalone use) — in that case + # auto-mode is effectively disabled and slug-mode kicks in. + cluster_auto_fqdn = ( + var.use_ravion_subdomain && var.module_instance_id != null && var.module_instance_id != "" + ? format("%s.%s", var.module_instance_id, local.dns_provider != null ? local.dns_provider.domain_name : "") + : null + ) + # Per-variant flags — count gating on these decides which writer # path validation + apex routing records take. Mutually exclusive: # exactly one is true when enable_ravion_domain is true (except diff --git a/compute/ecs_cluster/module.yml b/compute/ecs_cluster/module.yml index f1baed0..262d5e1 100644 --- a/compute/ecs_cluster/module.yml +++ b/compute/ecs_cluster/module.yml @@ -324,14 +324,23 @@ input: default: false show_when: enable_public_alb: true + - use_ravion_subdomain: + type: boolean + label: "Use a Ravion-generated subdomain" + description: "Skip DNS setup — Ravion allocates *..ravion.app and issues the wildcard cert. Turn off to bring your own zone (Cloudflare, Route53, etc.)." + default: true + show_when: + enable_public_alb: true + public_alb_enable_https: true - ravion_dns_provider_id: type: string label: "DNS Provider" - description: "Ravion DnsProvider the cluster's wildcard FQDN lives under. Register providers (Route53, Cloudflare, or other) on the DNS Providers settings page; the variant (cert source, validation path) is implicit in the provider's type. Leave empty to use BYO certificate ARNs instead." + description: "Ravion DnsProvider the cluster's wildcard FQDN lives under. Register providers (Route53, Cloudflare, or other) on the DNS Providers settings page. Only consulted when use_ravion_subdomain is off." show_when: enable_public_alb: true public_alb_enable_https: true - values: "$values:ravion/dns-providers" + use_ravion_subdomain: false + values: "$values:ravion/dns-providers-customer" - ravion_dns_provider_given_id: type: string label: "DNS Provider (by stable id)" @@ -339,15 +348,16 @@ input: show_when: enable_public_alb: true public_alb_enable_https: true + use_ravion_subdomain: false ravion_dns_provider_id: "" - ravion_cluster_slug: type: string label: "FQDN Slug" - description: "Human-readable slug used to derive the cluster's FQDN (-.). Defaults to the cluster name when empty." + description: "Human-readable slug used to derive the cluster's FQDN (-.). Defaults to the cluster name when empty. Ignored in auto-mode (uses the module-instance id as the literal label)." show_when: enable_public_alb: true public_alb_enable_https: true - ravion_dns_provider_id: "!=" + use_ravion_subdomain: false - public_alb_certificate_arns: type: list label: "ACM Certificate ARNs (BYO)" diff --git a/compute/ecs_cluster/ravion_domains.tf b/compute/ecs_cluster/ravion_domains.tf index 082a58b..a6737fc 100644 --- a/compute/ecs_cluster/ravion_domains.tf +++ b/compute/ecs_cluster/ravion_domains.tf @@ -30,8 +30,13 @@ resource "ravion_domain" "cluster" { count = local.enable_ravion_domain ? 1 : 0 dns_provider_id = local.dns_provider.id - slug = coalesce(var.ravion_cluster_slug, var.name) - wildcard = true + # Auto-mode (use_ravion_subdomain + module_instance_id known) posts + # the literal `.` so the wildcard cert + # covers `*..`. Slug mode falls back to + # the legacy `-.` derivation. + slug = local.cluster_auto_fqdn == null ? coalesce(var.ravion_cluster_slug, var.name) : null + fqdn_override = local.cluster_auto_fqdn + wildcard = true } # ---- 2. ACM wildcard cert (skipped for EXTERNAL) --------------------------- diff --git a/compute/ecs_cluster/variables.tf b/compute/ecs_cluster/variables.tf index 888d94e..f86c7e1 100644 --- a/compute/ecs_cluster/variables.tf +++ b/compute/ecs_cluster/variables.tf @@ -646,9 +646,15 @@ variable "region" { # take. Enum strings never appear in this module's HCL. ################################################################################ +variable "use_ravion_subdomain" { + type = bool + description = "Auto-mode: allocate the cluster wildcard under Ravion's platform apex (no DNS setup required). When true, the module looks up the platform DnsProvider via given_id = \"ravion-platform-apex\" and posts a literal FQDN of `.` (so the cert covers `*..`). When false, the caller's ravion_dns_provider_id / given_id is used." + default = true +} + variable "ravion_dns_provider_id" { type = string - description = "Opaque Ravion DnsProvider id (`dnsprov_*`) the cluster's wildcard allocation lives under. Provide EITHER this or ravion_dns_provider_given_id; if both are set, this wins. Leave both null to opt out of Ravion-managed certs and supply public_alb_certificate_arns directly." + description = "Opaque Ravion DnsProvider id (`dnsprov_*`) the cluster's wildcard allocation lives under. Only consulted when use_ravion_subdomain is false. Provide EITHER this or ravion_dns_provider_given_id; if both are set, this wins." default = null } @@ -660,6 +666,12 @@ variable "ravion_dns_provider_given_id" { variable "ravion_cluster_slug" { type = string - description = "Human-readable slug used to derive the cluster's FQDN (`-.`). Defaults to var.name when null." + description = "Human-readable slug used to derive the cluster's FQDN (`-.`). Defaults to var.name when null. Ignored in use_ravion_subdomain mode (auto-mode uses the literal module-instance id instead)." + default = null +} + +variable "module_instance_id" { + type = string + description = "The cluster module-instance id this module is running for. Used by use_ravion_subdomain auto-mode to construct the wildcard FQDN. Injected by the Ravion runner via the workspace name when present; safe to leave null in standalone use." default = null } diff --git a/compute/ecs_service/ravion_domains.tf b/compute/ecs_service/ravion_domains.tf index 2284a83..2f4cb7a 100644 --- a/compute/ecs_service/ravion_domains.tf +++ b/compute/ecs_service/ravion_domains.tf @@ -50,6 +50,27 @@ locals { } } +# 0. Auto-mode allocation (zero-config). One URL per service when +# ravion_auto_subdomain is on AND the parent cluster wildcard is +# wired. Slug = service's given_id → server derives +# `-.`. No customer typing. +locals { + ravion_auto_enabled = ( + local.ravion_managed && + var.ravion_auto_subdomain && + var.service_given_id != null && + var.service_given_id != "" + ) +} + +resource "ravion_domain" "auto" { + count = local.ravion_auto_enabled ? 1 : 0 + + dns_provider_id = var.ravion_dns_provider_id + slug = var.service_given_id + parent_domain_allocation_id = var.ravion_parent_domain_allocation_id +} + # 1. Allocate one child FQDN per entry in var.ravion_domains. resource "ravion_domain" "this" { for_each = local.ravion_domain_set @@ -501,3 +522,107 @@ resource "aws_lb_listener_rule" "group" { "ravion:cert_group" = each.value.group_name }) } + +################################################################################ +# Auto-mode routing records + listener rule (zero-config URL) +# +# Same per-variant dispatch as the per-domain blocks above, but for +# the single auto-allocation. Count gating is 1 when auto-mode is on +# AND the matching provider variant resolves; 0 otherwise. +################################################################################ + +# Auto: ROUTE53_RAVION routing — Ravion's Route53 ALIAS, inline write. +resource "ravion_dns_records" "auto_ravion" { + count = local.ravion_auto_enabled && local.is_route53_ravion ? 1 : 0 + + managed_domain_id = ravion_domain.auto[0].id + records = [{ + name = ravion_domain.auto[0].fqdn + type = "ALIAS" + value = jsonencode({ + dns_name = var.ravion_cluster_alb_dns_name + zone_id = var.ravion_cluster_alb_zone_id + }) + }] +} + +# Auto: ROUTE53 (customer) routing — customer AWS write + Ravion metadata. +resource "aws_route53_record" "auto_r53" { + count = local.ravion_auto_enabled && local.is_route53 ? 1 : 0 + + zone_id = local.dns_provider.route53.hosted_zone_id + name = ravion_domain.auto[0].fqdn + type = "A" + + alias { + name = var.ravion_cluster_alb_dns_name + zone_id = var.ravion_cluster_alb_zone_id + evaluate_target_health = true + } +} + +resource "ravion_dns_records" "auto_metadata_r53" { + count = local.ravion_auto_enabled && local.is_route53 ? 1 : 0 + + managed_domain_id = ravion_domain.auto[0].id + records = [{ + name = ravion_domain.auto[0].fqdn + type = "ALIAS" + value = jsonencode({ + dns_name = var.ravion_cluster_alb_dns_name + zone_id = var.ravion_cluster_alb_zone_id + }) + }] + depends_on = [aws_route53_record.auto_r53] +} + +# Auto: CLOUDFLARE routing — customer CF write + Ravion metadata. +resource "cloudflare_dns_record" "auto_cf" { + count = local.ravion_auto_enabled && local.is_cloudflare ? 1 : 0 + + zone_id = local.dns_provider.cloudflare.zone_id + name = ravion_domain.auto[0].fqdn + type = "CNAME" + content = var.ravion_cluster_alb_dns_name + ttl = 60 + proxied = false +} + +resource "ravion_dns_records" "auto_metadata_cf" { + count = local.ravion_auto_enabled && local.is_cloudflare ? 1 : 0 + + managed_domain_id = ravion_domain.auto[0].id + records = [{ + name = ravion_domain.auto[0].fqdn + type = "CNAME" + value = var.ravion_cluster_alb_dns_name + ttl = 60 + }] + depends_on = [cloudflare_dns_record.auto_cf] +} + +# Auto: host-header listener rule. Same priority space as the per- +# domain rules; seeded with "auto:" to avoid collision. +resource "aws_lb_listener_rule" "auto" { + count = local.ravion_auto_enabled && local.ravion_has_listener ? 1 : 0 + + listener_arn = var.ravion_cluster_https_listener_arn + priority = (parseint(substr(sha256("auto:${var.name}"), 0, 4), 16) % 49000) + 1000 + + condition { + host_header { + values = [ravion_domain.auto[0].fqdn] + } + } + + action { + type = "forward" + target_group_arn = aws_lb_target_group.this[0].arn + } + + lifecycle { + ignore_changes = [action] + } + + tags = var.tags +} diff --git a/compute/ecs_service/variables.tf b/compute/ecs_service/variables.tf index d8e37d9..73630d0 100644 --- a/compute/ecs_service/variables.tf +++ b/compute/ecs_service/variables.tf @@ -624,6 +624,18 @@ variable "ravion_dns_provider_given_id" { default = null } +variable "ravion_auto_subdomain" { + type = bool + description = "Auto-mode: when true (and inherit_cluster_certificate is set on the caller's wiring), allocate one URL automatically with format `-.` — zero typing. When false, the caller's ravion_domains list (full FQDNs OR leaf labels under the cluster wildcard) is used instead." + default = true +} + +variable "service_given_id" { + type = string + description = "The service module-instance's given_id. Used by auto-mode as the slug for the auto-allocated URL. Injected by the Ravion runner when present; safe to leave null in standalone use." + default = null +} + variable "ravion_parent_domain_allocation_id" { type = string description = "Cluster's DomainAllocation id, from `module.ecs_cluster.ravion_cluster_domain_allocation_id`. When null/empty, no Ravion FQDN is allocated." From fce1f27a695b1e2398d2924a4d93c897c22c3f14 Mon Sep 17 00:00:00 2001 From: Siddharth Suresh Date: Thu, 21 May 2026 05:02:25 +0530 Subject: [PATCH 12/37] feat(ecs): address review feedback (Q3+Q4+Q7) Q3: service_given_id renamed to module_instance_given_id (matches the runner-injected TF_VAR_module_instance_given_id env var). Q4: cert groups drop the per-row use_ravion_subdomain toggle. Each group MUST specify its own customer-owned DNS provider; domains are full FQDNs posted via fqdn_override (validated under the provider's apex server-side). No fallback to the platform apex. Q7: cluster auto-mode local strips trailing dot from the resolved DnsProvider.domain_name so the literal FQDN written to state is canonical (no `.example.com.`). --- compute/ecs_cluster/locals.tf | 14 ++++++--- compute/ecs_service/.terraform.lock.hcl | 38 ++++++++++++------------- compute/ecs_service/ravion_domains.tf | 20 +++++++------ compute/ecs_service/variables.tf | 20 +++++++------ 4 files changed, 52 insertions(+), 40 deletions(-) diff --git a/compute/ecs_cluster/locals.tf b/compute/ecs_cluster/locals.tf index 48fa84d..119c51b 100644 --- a/compute/ecs_cluster/locals.tf +++ b/compute/ecs_cluster/locals.tf @@ -42,12 +42,18 @@ locals { # Auto-mode fqdnOverride: literal `.`. The # cluster's wildcard cert covers `*..` so - # services under it inherit via SNI. Falls back to a placeholder - # when module_instance_id is null (standalone use) — in that case - # auto-mode is effectively disabled and slug-mode kicks in. + # services under it inherit via SNI. Falls back to null when + # module_instance_id is unset (standalone use) — in that case auto- + # mode is effectively disabled and slug-mode kicks in. + # + # Apex trim — the DnsProvider's domain_name MAY arrive with a + # trailing dot (depends on how the row was seeded). Trim it so the + # resulting FQDN is canonical (no `.example.com..`) and the + # api-go under-apex check passes without depending on its + # forgiving-trailing-dot logic. cluster_auto_fqdn = ( var.use_ravion_subdomain && var.module_instance_id != null && var.module_instance_id != "" - ? format("%s.%s", var.module_instance_id, local.dns_provider != null ? local.dns_provider.domain_name : "") + ? format("%s.%s", var.module_instance_id, local.dns_provider != null ? trimsuffix(local.dns_provider.domain_name, ".") : "") : null ) diff --git a/compute/ecs_service/.terraform.lock.hcl b/compute/ecs_service/.terraform.lock.hcl index ef959ee..6d20816 100644 --- a/compute/ecs_service/.terraform.lock.hcl +++ b/compute/ecs_service/.terraform.lock.hcl @@ -2,10 +2,10 @@ # Manual edits may be lost in future updates. provider "ravion.com/ravion/domains" { - version = "2.0.0" + version = "2.1.0" constraints = ">= 2.0.0" hashes = [ - "h1:OLB6BBvlsZr1SwIdNtZ07ihAJFmjd/qVX8UzJ8xuZcw=", + "h1:Q13F9had825xi67cqiX8nNL1Zrm3BU51/n/dC5zRf3Q=", ] } @@ -27,24 +27,24 @@ provider "registry.opentofu.org/cloudflare/cloudflare" { } provider "registry.opentofu.org/hashicorp/aws" { - version = "6.39.0" + version = "6.46.0" constraints = ">= 6.0.0" hashes = [ - "h1:c9SG8ZdYgzqpxORpTqeLFeXW4qQQ8GMGCcUkU+FAfQM=", - "zh:00a6c0d8b5b86833087e367b632e9ab73fb8db9c43569020ebd0489dc2c919ce", - "zh:05f2b56211f4c8a0b66a093d025187cbc7be086dedef62306f5a28290598ebdc", - "zh:24d97a31d5ab814c33ed32a5b7674f1a15544b2367a95bddd00cfdd8d6b82740", - "zh:258194e24ac07ee194d580ca25a25fa7bc48fa40fed4fd58352b0a64da0da4c9", - "zh:315337e5f0ccafeadf490f117151b52c6d66244bf652f4fee975eddda662af3b", - "zh:38573dd56cca8c0ffe33396cf17cc8bd13de1d27d3c4da4177e485d174f1eaf0", - "zh:4baa806c5eb8faae95cea3f1dfafb153b5e3e96c5b30a2102072da4f032d2d9b", - "zh:4f258106baca7e00a6904b2353579d283e4400a75cd0353a25e057921e8a8d96", - "zh:62e5d4628d03883a6c2a6e3c297eb54df9b5935e9e3a655dbb1c6c5ddaf7ea33", - "zh:8af5fae01c1cef65d149fa6fe47e94cf46ffa97d29e8f2dfe41aeae01da590ea", - "zh:a8240b40f7be408ac24897597a85dc4fe56f390224b11ecad2c1327e686fca58", - "zh:c549eee2a0cf0e2c4a676614d990121b685beab0047b1073407ee26247c4be13", - "zh:cfed074ba8948c75445c74c69722cb17c960024b1917b4f26905aa9c9ac4e667", - "zh:d6f4f4fa01e33d0d546705e2776f38d0b4f2847827b3f07ecde87cc02ef3d23e", - "zh:e7239b349c3149e4670750481b687c5c828908fd09f2196d7af1ac1b4d83e80b", + "h1:VWSIs/K9tDs2X4Ej6fXHfiqlFubplb9ueRmTYmuqYh4=", + "zh:0a1dad9bd82743d63c1e65551e68948d33638dfaebb7f44bd89ec948e33ff975", + "zh:166dde870afc7d1fca056d99eb8af8a0729f54e84467e6e2fa7f0a3bea5982cf", + "zh:41bceb5f5f3aa832e07a4f077371f1309de1cf42621117d144227022d5ebc95c", + "zh:42fae38f09d1c23d8b9bdd12306d7740196837c150891f21517ada314ec40b35", + "zh:8d01f9b87083201fdfef4139068dc6540f9a0e3ac1e394b5c53195c2ac760b59", + "zh:9964ed013c0eb3cd8ff9b501a88e6a823b42655c4354a0f37e6f764bc51ab1fe", + "zh:a153cbb1cb57288955ba3db2e25368931afda529bab1afd5eda361fc368378e3", + "zh:a3bd14a333791ed26e60921d1c348ab5b9b5aae8fcfc590e40e9d741cdd9a9b4", + "zh:a4d32af825ba8f37d3c0c979a241a132a31332684b19060b0523b92c4dc5337f", + "zh:bc5ff7cf49be2c648d16c96449f0b02fda3e0d3af5e6820d625246a249ff31ee", + "zh:beacec832343dac5d4624d33eae80a44d58ef121c065b56ec220d6bbce2b5011", + "zh:c346a67110f3e17a65d4a46316f062b4af6b88d8fc3d5fb4092a6f0669719602", + "zh:cd0b4f44186a0eb2bd3cc59e4a55b5a363c9ac3303b23d0128171ff851f36336", + "zh:d524bff9f4b567ad19c3d3040ead81aa4108030238155d87a1bb2c797a7870e7", + "zh:ef23733dd0c7d40a4f3ccc761c4433a265324fa393aa4ae6015942b31a13d30e", ] } diff --git a/compute/ecs_service/ravion_domains.tf b/compute/ecs_service/ravion_domains.tf index 2f4cb7a..4d733b3 100644 --- a/compute/ecs_service/ravion_domains.tf +++ b/compute/ecs_service/ravion_domains.tf @@ -58,8 +58,8 @@ locals { ravion_auto_enabled = ( local.ravion_managed && var.ravion_auto_subdomain && - var.service_given_id != null && - var.service_given_id != "" + var.module_instance_given_id != null && + var.module_instance_given_id != "" ) } @@ -67,7 +67,7 @@ resource "ravion_domain" "auto" { count = local.ravion_auto_enabled ? 1 : 0 dns_provider_id = var.ravion_dns_provider_id - slug = var.service_given_id + slug = var.module_instance_given_id parent_domain_allocation_id = var.ravion_parent_domain_allocation_id } @@ -206,13 +206,15 @@ resource "aws_lb_listener_rule" "ravion" { locals { # Flatten (group, domain) into a single map for nested for_each - # against per-domain resources. The key is "/" so two - # groups can use the same slug without collision. + # against per-domain resources. The key is "/" so two + # groups can list the same fqdn without TF state collision. group_domain_pairs = merge([ for g in var.ravion_certificate_groups : { for d in g.domains : "${g.name}/${d}" => { group_name = g.name - slug = d + # Slug field name kept for backwards-compat with the per-row + # resource that consumed it; value is the full FQDN now. + slug = d } } ]...) @@ -235,12 +237,14 @@ locals { } } -# 1. Per-domain allocations under the group's provider. +# 1. Per-domain allocations under the group's provider. Each entry +# is a FULL FQDN posted via fqdn_override — api-go validates it +# lives under the resolved DnsProvider's apex. resource "ravion_domain" "group" { for_each = local.group_domain_pairs dns_provider_id = local.group_providers[each.value.group_name].id - slug = each.value.slug + fqdn_override = each.value.slug } # 2. ONE ACM cert per group (primary + SANs). Customer's AWS account, diff --git a/compute/ecs_service/variables.tf b/compute/ecs_service/variables.tf index 73630d0..1bd8274 100644 --- a/compute/ecs_service/variables.tf +++ b/compute/ecs_service/variables.tf @@ -630,7 +630,7 @@ variable "ravion_auto_subdomain" { default = true } -variable "service_given_id" { +variable "module_instance_given_id" { type = string description = "The service module-instance's given_id. Used by auto-mode as the slug for the auto-allocated URL. Injected by the Ravion runner when present; safe to leave null in standalone use." default = null @@ -691,17 +691,19 @@ variable "ravion_certificate_groups" { # reuse across groups in the same service. name = string - # Per-group DnsProvider override. Either id or given_id wins - # (id first if both set). Leave both null to inherit from the - # service's top-level ravion_dns_provider_* vars. + # DnsProvider the group's domains live under. Required — each + # group MUST specify its own customer-owned provider (no fallback + # to the service's top-level provider). Either id or given_id + # wins (id first if both set). dns_provider_id = optional(string) dns_provider_given_id = optional(string) - # Domain slugs to cover with this cert. Each becomes a child - # ravion_domain allocation under the group's provider apex (FQDN - # derived as -.). Capped at 10 to match ACM's - # default cert SAN limit — increase only after raising the AWS - # account quota. + # Full FQDNs to cover with this cert. Each becomes a + # ravion_domain allocation posted with fqdn_override (used + # verbatim) under the group's provider apex. Each entry must + # end with the chosen provider's apex (validated server-side + # via PROVIDER_FQDN_NOT_UNDER_APEX). Capped at 10 to match + # ACM's default cert SAN limit. domains = list(string) })) description = "Per-service certificate groups. Each group issues ONE ACM cert covering up to 10 domains, validated via the group's DnsProvider variant, and attached as an SNI cert to the cluster's HTTPS listener. Use when service FQDNs need their own cert (multi-zone setups, non-wildcard apexes) instead of inheriting the cluster's wildcard. Groups are additive — ungrouped ravion_domains keep inheriting the cluster wildcard." From a1a2e698b5bb9b31e351d6298936c76c357f74a9 Mon Sep 17 00:00:00 2001 From: Siddharth Suresh Date: Thu, 21 May 2026 13:02:38 +0530 Subject: [PATCH 13/37] fix(ecs_cluster): correct ravion dns output name in module.yml Replace stale ravion_dns_zone_id with ravion_dns_provider_id to match the actual output defined in outputs.tf. Co-Authored-By: Claude Opus 4.7 (1M context) --- compute/ecs_cluster/module.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compute/ecs_cluster/module.yml b/compute/ecs_cluster/module.yml index 262d5e1..f2bf24b 100644 --- a/compute/ecs_cluster/module.yml +++ b/compute/ecs_cluster/module.yml @@ -554,7 +554,7 @@ output: ravion_cluster_managed_domain_id: string ravion_cluster_fqdn: string ravion_cluster_certificate_arn: string - ravion_dns_zone_id: string + ravion_dns_provider_id: string # Private ALB private_alb_arn: string private_alb_id: string From 403a18b03697c11ab25c37aa7702674a7d250939 Mon Sep 17 00:00:00 2001 From: Siddharth Suresh Date: Thu, 21 May 2026 13:04:02 +0530 Subject: [PATCH 14/37] fix(ecs_service): guard ravion_auto allocations against missing dns_provider_id Add lifecycle preconditions on ravion_auto_label and ravion_auto_auto domain resources so that a missing/empty ravion_dns_provider_id fails loudly at plan time instead of producing allocations with a null provider reference. Co-Authored-By: Claude Opus 4.7 (1M context) --- compute/ecs_service/ravion_domains.tf | 615 ++++++++++---------------- 1 file changed, 234 insertions(+), 381 deletions(-) diff --git a/compute/ecs_service/ravion_domains.tf b/compute/ecs_service/ravion_domains.tf index 4d733b3..b879c36 100644 --- a/compute/ecs_service/ravion_domains.tf +++ b/compute/ecs_service/ravion_domains.tf @@ -1,173 +1,162 @@ ################################################################################ -# Ravion domain control plane — per-service domain allocations (V2) +# Ravion domain control plane — per-service certificate groups (V2) # -# When the parent cluster module is configured for Ravion-managed -# domains (`module.ecs_cluster.ravion_managed_domains_enabled`), each -# entry in var.ravion_domains gets: +# A single var.ravion_certificate_groups list drives EVERYTHING. Two kinds: # -# ravion_domain.this[d] — child allocation under -# the cluster -# _record.routing_ — actual CNAME write -# (Route53 / Cloudflare) -# ravion_dns_records.this[d] — metadata-only sibling -# (depends_on the real -# record write) -# aws_lb_listener_rule.ravion[d] — host-header rule on the -# cluster's HTTPS listener +# ravion_auto — Inherit cluster wildcard cert. `domains` is an +# optional list of DNS-safe leaf labels (e.g. `api`). Each label +# becomes a `