diff --git a/.github/workflows/validate-defaults.yaml b/.github/workflows/validate-defaults.yaml index e4fa7477..0f6935cf 100644 --- a/.github/workflows/validate-defaults.yaml +++ b/.github/workflows/validate-defaults.yaml @@ -26,9 +26,9 @@ jobs: with: yq-version: v4.30.7 - - name: Validate clusterGroupName is simple + - name: Validate clusterGroupName is azure run: | - if [ "$(yq '.main.clusterGroupName' values-global.yaml)" != "simple" ]; then - echo "main.clusterGroupName must be 'simple'" + if [ "$(yq '.main.clusterGroupName' values-global.yaml)" != "azure" ]; then + echo "main.clusterGroupName must be 'azure'" exit 1 fi diff --git a/AGENTS.md b/AGENTS.md index 6bad2fdc..34f5083f 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -45,10 +45,10 @@ Use the **first** approach that fits your requirement: ├── rhdp/ # Red Hat Demo Platform tooling ├── scripts/ # Utility scripts ├── values-global.yaml # Global configuration -├── values-simple.yaml # Cluster group: simple +├── values-azure.yaml # Cluster group: azure ├── values-baremetal.yaml # Cluster group: baremetal ├── values-trusted-hub.yaml # Cluster group: trusted-hub -├── values-spoke.yaml # Cluster group: spoke +├── values-azure-spoke.yaml # Cluster group: azure-spoke └── values-secret.yaml.template # Secrets template (never commit filled-in copy) ``` @@ -59,8 +59,8 @@ These charts are published independently and consumed from the `charts.validated | Chart Name | Repository | Purpose | |---|---|---| | `trustee` | `validatedpatterns/trustee-chart` | Trustee / KBS configuration | -| `sandboxed-policies` | `validatedpatterns/sandboxed-policies-chart` | ACM policies hub → spoke | -| `sandboxed-containers` | `validatedpatterns/sandboxed-containers-chart` | Sandboxed runtime on spoke | +| `sandboxed-policies` | `validatedpatterns/sandboxed-policies-chart` | ACM policies hub → azure-spoke | +| `sandboxed-containers` | `validatedpatterns/sandboxed-containers-chart` | Sandboxed runtime on azure-spoke | Changes to companion charts require a release (Git tag) before the pattern can consume them. Update the `chartVersion:` field in the values files to pick up new releases. @@ -70,11 +70,10 @@ Set via `main.clusterGroupName` in `values-global.yaml`. | Cluster Group | Values File | Role | Description | |---|---|---|---| -| `simple` | `values-simple.yaml` | Hub (single cluster) | All components on one Azure cluster | -| `baremetal` | `values-baremetal.yaml` | Hub (single cluster) | TDX/SNP + LVM storage on bare metal | -| `baremetal-gpu` | `values-baremetal-gpu.yaml` | Hub (single cluster) | Bare metal + NVIDIA H100 GPU support | +| `azure` | `values-azure.yaml` | Hub (single cluster) | All components on one Azure cluster | +| `baremetal` | `values-baremetal.yaml` | Hub (single cluster) | Bare metal (hardware profile gated: intel-tdx, amd-snp, intel-tdx-gpu, amd-snp-gpu) | | `trusted-hub` | `values-trusted-hub.yaml` | Multi-cluster hub | Trustee + ACM policies | -| `spoke` | `values-spoke.yaml` | Multi-cluster spoke | Sandbox runtime + workloads | +| `azure-spoke` | `values-azure-spoke.yaml` | Multi-cluster spoke | Sandbox runtime + workloads (Azure) | ## Values File Hierarchy diff --git a/Makefile b/Makefile index 26a89e0e..39a6ef7b 100644 --- a/Makefile +++ b/Makefile @@ -4,6 +4,25 @@ include Makefile-common +##@ GPG Key Management +.PHONY: cache-gpg-keys +cache-gpg-keys: ## Download and cache Red Hat GPG public keys to ~/.coco-pattern/ + +.PHONY: cache-sigstore-keys +cache-sigstore-keys: ## Download and cache Red Hat sigstore public keys to ~/.coco-pattern/ + @echo "Fetching Red Hat sigstore public keys..." + @mkdir -p ~/.coco-pattern + @cp keys/SIGSTORE-redhat-release3 ~/.coco-pattern/SIGSTORE-redhat-release3 + @echo "Sigstore key cached at ~/.coco-pattern/SIGSTORE-redhat-release3" + @echo "Key fingerprint: E60D446E63405576" + @echo "Fetching Red Hat GPG public keys..." + @mkdir -p ~/.coco-pattern + @curl -fsSL https://access.redhat.com/security/data/fd431d51.txt -o ~/.coco-pattern/RPM-GPG-KEY-redhat-release + @echo "GPG key cached at ~/.coco-pattern/RPM-GPG-KEY-redhat-release" + @echo "Key fingerprint (verify this matches Red Hat official):" + @gpg --import-options show-only --import < ~/.coco-pattern/RPM-GPG-KEY-redhat-release 2>/dev/null | grep -A1 "^pub" || echo "Install gpg to verify fingerprint" + + ##@ Reference Value Collection .PHONY: collect-firmware-refvals collect-firmware-refvals: ## Collect firmware reference values (bare metal, default) @@ -12,3 +31,37 @@ collect-firmware-refvals: ## Collect firmware reference values (bare metal, defa .PHONY: collect-azure-refvals collect-azure-refvals: ## Collect PCR reference values (Azure) @scripts/collect-firmware-refvals.sh --platform azure + +##@ Hardware Detection +.PHONY: detect-hardware +detect-hardware: ## Detect hardware profile from cluster nodes (requires KUBECONFIG or oc login) + @echo "Detecting hardware profile from cluster nodes..." + @echo "---" + @CPU_VENDOR=$$(oc get nodes -o jsonpath='{.items[0].metadata.labels.feature\.node\.kubernetes\.io/cpu-model\.vendor_id}' 2>/dev/null) && \ + TDX_ENABLED=$$(oc get nodes -o jsonpath='{.items[0].metadata.labels.feature\.node\.kubernetes\.io/cpu-security\.tdx\.enabled}' 2>/dev/null) && \ + SNP_ENABLED=$$(oc get nodes -o jsonpath='{.items[0].metadata.labels.feature\.node\.kubernetes\.io/cpu-security\.sev\.snp}' 2>/dev/null) && \ + GPU_PRESENT=$$(oc get nodes -o jsonpath='{.items[0].metadata.labels.nvidia\.com/gpu\.present}' 2>/dev/null) && \ + echo "CPU Vendor: $${CPU_VENDOR:-unknown}" && \ + echo "TDX Enabled: $${TDX_ENABLED:-false}" && \ + echo "SNP Enabled: $${SNP_ENABLED:-false}" && \ + echo "GPU Present: $${GPU_PRESENT:-false}" && \ + echo "---" && \ + if [ "$${CPU_VENDOR}" = "Intel" ] && [ "$${TDX_ENABLED}" = "true" ]; then \ + if [ "$${GPU_PRESENT}" = "true" ]; then \ + echo "Recommended profile: intel-tdx-gpu"; \ + else \ + echo "Recommended profile: intel-tdx"; \ + fi; \ + elif [ "$${CPU_VENDOR}" = "AuthenticAMD" ] || [ "$${SNP_ENABLED}" = "true" ]; then \ + if [ "$${GPU_PRESENT}" = "true" ]; then \ + echo "Recommended profile: amd-snp-gpu"; \ + else \ + echo "Recommended profile: amd-snp"; \ + fi; \ + else \ + echo "Could not determine hardware profile."; \ + echo "Ensure NFD operator is running and node labels are populated."; \ + echo "Set global.hardware.profile manually in values-global.yaml"; \ + fi && \ + echo "" && \ + echo "To apply: edit values-global.yaml and set global.hardware.profile to the recommended value." diff --git a/README.md b/README.md index de7c94b6..8db8c9a1 100644 --- a/README.md +++ b/README.md @@ -8,17 +8,21 @@ Confidential containers use hardware-backed Trusted Execution Environments (TEEs The pattern provides four deployment topologies: -1. **Single cluster** (`simple` clusterGroup) — deploys all components (Trustee, Vault, ACM, sandboxed containers, workloads) in one cluster on Azure. This breaks the RACI separation expected in a remote attestation architecture but simplifies testing and demonstrations. +1. **Single cluster** (`azure` clusterGroup) — deploys all components (Trustee, Vault, ACM, sandboxed containers, workloads) in one cluster on Azure. This breaks the RACI separation expected in a remote attestation architecture but simplifies testing and demonstrations. -2. **Multi-cluster** (`trusted-hub` + `spoke` clusterGroups) — separates the trusted zone from the untrusted workload zone: +2. **Multi-cluster** (`trusted-hub` + `azure-spoke` clusterGroups) — separates the trusted zone from the untrusted workload zone: - **Hub** (`trusted-hub`): Runs Trustee (KBS + attestation service), HashiCorp Vault, ACM, and cert-manager. This cluster is the trust anchor. - - **Spoke** (`spoke`): Runs the sandboxed containers operator and confidential workloads. The spoke is imported into ACM and managed from the hub. + - **Spoke** (`azure-spoke`): Runs the sandboxed containers operator and confidential workloads. The spoke is imported into ACM and managed from the hub. 3. **Bare metal** (`baremetal` clusterGroup) — deploys all components on bare metal hardware with Intel TDX or AMD SEV-SNP support. NFD (Node Feature Discovery) auto-detects the CPU architecture and configures the appropriate runtime. Supports SNO (Single Node OpenShift) and multi-node clusters. -4. **Bare metal with GPU** (`baremetal-gpu` clusterGroup) — extends the bare metal topology with NVIDIA H100 confidential GPU support. Adds the NVIDIA GPU Operator, IOMMU kernel configuration, and a sample CUDA workload for CC GPU verification. Requires NVIDIA H100 GPUs with confidential computing firmware. + Hardware-specific operators (GPU, Intel device plugins, DCAP) are controlled by `global.hardware.profile`: + - `intel-tdx` — Intel TDX without GPU + - `amd-snp` — AMD SEV-SNP without GPU + - `intel-tdx-gpu` — Intel TDX with NVIDIA H100 GPU + - `amd-snp-gpu` — AMD SEV-SNP with NVIDIA H100 GPU -The topology is controlled by the `main.clusterGroupName` field in `values-global.yaml`. +The topology is controlled by the `main.clusterGroupName` field in `values-global.yaml`. For bare metal deployments, also set `global.hardware.profile` to match your hardware configuration. Azure deployments use peer-pods, which provision confidential VMs (`Standard_DCas_v5` family) directly on the Azure hypervisor. Bare metal deployments use layered images and hardware TEE features directly. @@ -81,7 +85,7 @@ These scripts generate the cryptographic material and attestation reference valu ### Single cluster deployment (Azure) -1. Set `main.clusterGroupName: simple` in `values-global.yaml` +1. Set `main.clusterGroupName: azure` in `values-global.yaml` 2. Ensure your Azure configuration is populated in `values-global.yaml` (see `global.azure.*` fields) 3. `./pattern.sh make install` 4. Wait for the cluster to reboot all nodes (the sandboxed containers operator triggers a MachineConfig update). Monitor progress in the ArgoCD UI. @@ -92,17 +96,20 @@ These scripts generate the cryptographic material and attestation reference valu 2. Deploy the hub cluster: `./pattern.sh make install` 3. Wait for ACM (`MultiClusterHub`) to reach `Running` state on the hub 4. Provision a second OpenShift 4.19.28+ cluster on Azure for the spoke -5. Import the spoke into ACM with label `clusterGroup=spoke` +5. Import the spoke into ACM with label `clusterGroup=azure-spoke` (see [importing a cluster](https://validatedpatterns.io/learn/importing-a-cluster/)) -6. ACM will automatically deploy the `spoke` clusterGroup applications (sandboxed containers, workloads) to the imported cluster +6. ACM will automatically deploy the `azure-spoke` clusterGroup applications (sandboxed containers, workloads) to the imported cluster ### Bare metal deployment 1. Set `main.clusterGroupName: baremetal` in `values-global.yaml` -2. Run `bash scripts/gen-secrets.sh` to generate KBS keys and PCCS secrets -3. For Intel TDX: uncomment the PCCS secrets in `~/values-secret-coco-pattern.yaml` and provide your Intel PCS API key -4. `./pattern.sh make install` -5. Wait for the cluster to reboot nodes (MachineConfig updates for TDX kernel parameters and vsock) +2. Set `global.hardware.profile` to match your hardware (default: `intel-tdx`) + - Run `make detect-hardware` after NFD is deployed to detect your hardware profile automatically + - Options: `intel-tdx`, `amd-snp`, `intel-tdx-gpu`, `amd-snp-gpu` +3. Run `bash scripts/gen-secrets.sh` to generate KBS keys and PCCS secrets +4. For Intel TDX: uncomment the PCCS secrets in `~/values-secret-coco-pattern.yaml` and provide your Intel PCS API key +5. `./pattern.sh make install` +6. Wait for the cluster to reboot nodes (MachineConfig updates for TDX/SEV-SNP kernel parameters and vsock) > **Note:** Bare metal support is currently tested on SNO (Single Node OpenShift) configurations. Multi-node bare metal clusters are expected to work but have not been validated yet. @@ -117,20 +124,14 @@ The system auto-detects your hardware: Optional: pin PCCS to a specific node with `bash scripts/get-pccs-node.sh` and set `baremetal.pccs.nodeSelector` in the baremetal chart values. -### Bare metal GPU deployment - -1. Set `main.clusterGroupName: baremetal-gpu` in `values-global.yaml` -2. Run `bash scripts/gen-secrets.sh` to generate KBS keys and PCCS secrets -3. For Intel TDX: uncomment the PCCS secrets in `~/values-secret-coco-pattern.yaml` and provide your Intel PCS API key -4. `./pattern.sh make install` -5. Wait for the cluster to reboot nodes (MachineConfig updates for TDX/SEV-SNP kernel parameters, vsock, and IOMMU) -6. Approve the GPU Operator install plan when it appears (uses `installPlanApproval: Manual`) - -> **Note:** The `baremetal-gpu` topology deploys IOMMU MachineConfig on all nodes and will trigger reboots. For clusters without GPUs, use the `baremetal` topology instead. The GPU workload deployment will remain Pending on non-GPU systems but is otherwise harmless. +For GPU-enabled deployments (`intel-tdx-gpu` or `amd-snp-gpu` profiles): +- IOMMU MachineConfig is deployed on all nodes and will trigger reboots +- Approve the GPU Operator install plan when it appears (uses `installPlanApproval: Manual`) +- A sample CUDA workload (`gpu-workload`) is deployed for CC GPU verification ## Sample applications -Two sample applications are deployed on the cluster running confidential workloads (the single cluster in `simple` mode, or the spoke in multi-cluster mode): +Two sample applications are deployed on the cluster running confidential workloads (the single cluster in `azure` mode, or the spoke in multi-cluster mode): - **hello-openshift**: Three pods demonstrating CoCo security boundaries: - `standard` — a regular Kubernetes pod (no confidential computing) diff --git a/ansible/install-deps.yaml b/ansible/get-azure-deps.yaml similarity index 92% rename from ansible/install-deps.yaml rename to ansible/get-azure-deps.yaml index 8a927b90..7359cfe4 100644 --- a/ansible/install-deps.yaml +++ b/ansible/get-azure-deps.yaml @@ -1,4 +1,4 @@ -- name: Retrieve Credentials for AAP on OpenShift +- name: Install Azure collection dependencies become: false connection: local hosts: localhost diff --git a/ansible/init-data-gzipper.yaml b/ansible/init-data-gzipper.yaml index c9de0a5f..91920e3a 100644 --- a/ansible/init-data-gzipper.yaml +++ b/ansible/init-data-gzipper.yaml @@ -132,7 +132,7 @@ ansible.builtin.shell: | set -o pipefail initial_pcr=0000000000000000000000000000000000000000000000000000000000000000 - PCR8_HASH=$(echo -n "${initial_pcr}{{ raw_hash.stdout }}" | xxd -r -p | sha256sum | cut -d' ' -f1) && echo $PCR8_HASH + PCR8_HASH=$(echo -n "${initial_pcr}{{ raw_hash.stdout }}" | python3 -c "import sys,hashlib; print(hashlib.sha256(bytes.fromhex(sys.stdin.read())).hexdigest())") && echo $PCR8_HASH register: pcr8_hash changed_when: false @@ -140,7 +140,7 @@ ansible.builtin.shell: | set -o pipefail initial_pcr=0000000000000000000000000000000000000000000000000000000000000000 - PCR8_HASH=$(echo -n "${initial_pcr}{{ debug_raw_hash.stdout }}" | xxd -r -p | sha256sum | cut -d' ' -f1) && echo $PCR8_HASH + PCR8_HASH=$(echo -n "${initial_pcr}{{ debug_raw_hash.stdout }}" | python3 -c "import sys,hashlib; print(hashlib.sha256(bytes.fromhex(sys.stdin.read())).hexdigest())") && echo $PCR8_HASH register: debug_pcr8_hash changed_when: false diff --git a/ansible/initdata-debug.toml.tpl b/ansible/initdata-debug.toml.tpl index b49fae8f..a463c3b2 100644 --- a/ansible/initdata-debug.toml.tpl +++ b/ansible/initdata-debug.toml.tpl @@ -24,6 +24,7 @@ kbs_cert = """{{ trustee_cert }}""" [image] image_security_policy_uri = 'kbs:///default/security-policy/{{ security_policy_flavour }}' +authenticated_registry_credentials_uri = 'kbs:///default/credential/regcred' ''' "policy.rego" = ''' diff --git a/ansible/initdata-default.toml.tpl b/ansible/initdata-default.toml.tpl index 3fd1ecc3..daf579bf 100644 --- a/ansible/initdata-default.toml.tpl +++ b/ansible/initdata-default.toml.tpl @@ -24,6 +24,7 @@ kbs_cert = """{{ trustee_cert }}""" [image] image_security_policy_uri = 'kbs:///default/security-policy/{{ security_policy_flavour }}' +authenticated_registry_credentials_uri = 'kbs:///default/credential/regcred' ''' "policy.rego" = ''' diff --git a/charts/all/baremetal/templates/nfd-instance.yaml b/charts/all/baremetal/templates/nfd-instance.yaml index 97ce9ee1..ec967a1b 100644 --- a/charts/all/baremetal/templates/nfd-instance.yaml +++ b/charts/all/baremetal/templates/nfd-instance.yaml @@ -5,7 +5,7 @@ metadata: namespace: openshift-nfd spec: operand: - image: registry.redhat.io/openshift4/ose-node-feature-discovery-rhel9:v4.20 + image: registry.redhat.io/openshift4/ose-node-feature-discovery-rhel9:v{{ .Values.global.clusterVersion }} imagePullPolicy: Always servicePort: 12000 workerConfig: diff --git a/charts/all/intel-dcap/templates/pccs-secrets-eso.yaml b/charts/all/intel-dcap/templates/pccs-secrets-eso.yaml index 5ee91ab9..b8bb2f55 100644 --- a/charts/all/intel-dcap/templates/pccs-secrets-eso.yaml +++ b/charts/all/intel-dcap/templates/pccs-secrets-eso.yaml @@ -1,5 +1,5 @@ --- -apiVersion: "external-secrets.io/v1beta1" +apiVersion: "external-secrets.io/v1" kind: ExternalSecret metadata: name: pccs-secrets-eso diff --git a/charts/all/intel-dcap/templates/pccs-tls-eso.yaml b/charts/all/intel-dcap/templates/pccs-tls-eso.yaml index a7212ae1..0d82feb6 100644 --- a/charts/all/intel-dcap/templates/pccs-tls-eso.yaml +++ b/charts/all/intel-dcap/templates/pccs-tls-eso.yaml @@ -1,5 +1,5 @@ --- -apiVersion: "external-secrets.io/v1beta1" +apiVersion: "external-secrets.io/v1" kind: ExternalSecret metadata: name: pccs-tls-eso diff --git a/charts/coco-supported/gpu-workload/templates/gpu-vectoradd-deployment.yaml b/charts/coco-supported/gpu-workload/templates/gpu-vectoradd-deployment.yaml index d67faecd..dcc29602 100644 --- a/charts/coco-supported/gpu-workload/templates/gpu-vectoradd-deployment.yaml +++ b/charts/coco-supported/gpu-workload/templates/gpu-vectoradd-deployment.yaml @@ -32,6 +32,15 @@ spec: - | /opt/cuda-samples/Samples/0_Introduction/vectorAdd/build/vectorAdd sleep 36000 + volumeMounts: + - name: initdata + mountPath: /opt/confidential-containers/initdata + readOnly: true resources: limits: nvidia.com/pgpu: 1 + volumes: + - name: initdata + configMap: + name: debug-initdata + optional: false diff --git a/charts/coco-supported/hello-openshift/templates/insecure-policy-deployment.yaml b/charts/coco-supported/hello-openshift/templates/insecure-policy-deployment.yaml index 6770443e..0447bda6 100644 --- a/charts/coco-supported/hello-openshift/templates/insecure-policy-deployment.yaml +++ b/charts/coco-supported/hello-openshift/templates/insecure-policy-deployment.yaml @@ -22,6 +22,10 @@ spec: image: quay.io/openshift/origin-hello-openshift ports: - containerPort: 8888 + volumeMounts: + - name: initdata + mountPath: /opt/confidential-containers/initdata + readOnly: true securityContext: privileged: false allowPrivilegeEscalation: false @@ -31,3 +35,8 @@ spec: - ALL seccompProfile: type: RuntimeDefault + volumes: + - name: initdata + configMap: + name: debug-initdata + optional: false diff --git a/charts/coco-supported/hello-openshift/templates/secure-configmap.yaml b/charts/coco-supported/hello-openshift/templates/secure-configmap.yaml new file mode 100644 index 00000000..c840b224 --- /dev/null +++ b/charts/coco-supported/hello-openshift/templates/secure-configmap.yaml @@ -0,0 +1,20 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: secure-web-content + labels: + app: secure + annotations: + argocd.argoproj.io/sync-wave: "5" +data: + index.html: | + + + + Hello OpenShift + + +

Hello, OpenShift!

+

This confidential container is running on Red Hat httpd.

+ + diff --git a/charts/coco-supported/hello-openshift/templates/secure-deployment.yaml b/charts/coco-supported/hello-openshift/templates/secure-deployment.yaml index 4fa15564..ecc7b019 100644 --- a/charts/coco-supported/hello-openshift/templates/secure-deployment.yaml +++ b/charts/coco-supported/hello-openshift/templates/secure-deployment.yaml @@ -16,13 +16,20 @@ spec: annotations: peerpods: "true" coco.io/initdata-configmap: initdata + io.katacontainers.config.runtime.create_container_timeout: "900" spec: runtimeClassName: {{ include "hello-openshift.runtimeClassName" . }} containers: - name: hello-openshift - image: quay.io/openshift/origin-hello-openshift + image: registry.redhat.io/ubi9/httpd-24@sha256:68a91ff691092f455fea682330c499588747231c16516cd4f35aff821e6847f2 ports: - - containerPort: 8888 + - containerPort: 8080 + volumeMounts: + - name: web-content + mountPath: /var/www/html + - name: initdata + mountPath: /opt/confidential-containers/initdata + readOnly: true securityContext: privileged: false allowPrivilegeEscalation: false @@ -32,3 +39,11 @@ spec: - ALL seccompProfile: type: RuntimeDefault + volumes: + - name: web-content + configMap: + name: secure-web-content + - name: initdata + configMap: + name: initdata + optional: false diff --git a/charts/coco-supported/hello-openshift/templates/secure-svc.yaml b/charts/coco-supported/hello-openshift/templates/secure-svc.yaml index cff85a42..3658d9b2 100644 --- a/charts/coco-supported/hello-openshift/templates/secure-svc.yaml +++ b/charts/coco-supported/hello-openshift/templates/secure-svc.yaml @@ -4,10 +4,10 @@ metadata: name: secure spec: ports: - - name: 8888-tcp - port: 8888 + - name: 8080-tcp + port: 8080 protocol: TCP - targetPort: 8888 + targetPort: 8080 selector: app: secure sessionAffinity: None diff --git a/charts/coco-supported/kbs-access-curl/Chart.yaml b/charts/coco-supported/kbs-access-curl/Chart.yaml new file mode 100644 index 00000000..a6bdfb57 --- /dev/null +++ b/charts/coco-supported/kbs-access-curl/Chart.yaml @@ -0,0 +1,7 @@ +apiVersion: v2 +description: Demonstrates accessing keys within the KBS using httpd to serve the secret. +keywords: +- pattern +- confidential-containers +name: kbs-access-curl +version: 0.1.0 diff --git a/charts/coco-supported/kbs-access/templates/secure-deployment.yaml b/charts/coco-supported/kbs-access-curl/templates/deployment.yaml similarity index 50% rename from charts/coco-supported/kbs-access/templates/secure-deployment.yaml rename to charts/coco-supported/kbs-access-curl/templates/deployment.yaml index e53e874f..8922fcc0 100644 --- a/charts/coco-supported/kbs-access/templates/secure-deployment.yaml +++ b/charts/coco-supported/kbs-access-curl/templates/deployment.yaml @@ -1,44 +1,49 @@ apiVersion: apps/v1 kind: Deployment metadata: - name: secure + name: kbs-access-curl labels: - app: secure + app: kbs-access-curl spec: replicas: 1 selector: matchLabels: - app: secure + app: kbs-access-curl template: metadata: labels: - app: secure + app: kbs-access-curl annotations: peerpods: "true" - coco.io/initdata-configmap: initdata + coco.io/initdata-configmap: debug-initdata {{- if .Values.defaultMemory }} io.katacontainers.config.hypervisor.default_memory: {{ .Values.defaultMemory | quote }} {{- end }} + io.katacontainers.config.runtime.create_container_timeout: "900" spec: runtimeClassName: {{ if .Values.runtimeClassName }}{{ .Values.runtimeClassName }}{{ else if or (eq .Values.global.clusterPlatform "Azure") (eq .Values.global.clusterPlatform "AWS") }}kata-remote{{ else }}kata-cc{{ end }} containers: - - name: python-access - image: ghcr.io/butler54/kbs-access-app:latest + - name: httpd + image: registry.redhat.io/ubi9/httpd-24@sha256:68a91ff691092f455fea682330c499588747231c16516cd4f35aff821e6847f2 ports: - - containerPort: 5000 + - containerPort: 8080 volumeMounts: - - name: output-volume - mountPath: /output - envFrom: - - configMapRef: - name: kbsref + - name: shared-volume + mountPath: /var/www/html + - name: initdata + mountPath: /opt/confidential-containers/initdata + readOnly: true initContainers: - name: curl image: registry.access.redhat.com/ubi9/ubi:latest - command: ['sh', '-c', 'curl -s http://127.0.0.1:8006/cdh/resource/default/kbsres1/key3 > /output/kbsres1.txt'] + command: ['sh', '-c', 'curl -s http://127.0.0.1:8006/cdh/resource/default/kbsres1/key3 > /var/www/html/secret.txt'] volumeMounts: - - name: output-volume - mountPath: /output + - name: shared-volume + mountPath: /var/www/html volumes: - - name: output-volume + - name: shared-volume emptyDir: {} + - name: initdata + configMap: + name: debug-initdata + optional: false diff --git a/charts/coco-supported/kbs-access/templates/secure-route.yaml b/charts/coco-supported/kbs-access-curl/templates/route.yaml similarity index 65% rename from charts/coco-supported/kbs-access/templates/secure-route.yaml rename to charts/coco-supported/kbs-access-curl/templates/route.yaml index dba755f2..37e7f37a 100644 --- a/charts/coco-supported/kbs-access/templates/secure-route.yaml +++ b/charts/coco-supported/kbs-access-curl/templates/route.yaml @@ -1,12 +1,12 @@ apiVersion: route.openshift.io/v1 kind: Route metadata: - name: secure + name: kbs-access-curl spec: port: - targetPort: 5000 + targetPort: 8080 to: kind: Service - name: secure + name: kbs-access-curl weight: 100 wildcardPolicy: None diff --git a/charts/coco-supported/kbs-access/templates/secure-svc.yaml b/charts/coco-supported/kbs-access-curl/templates/service.yaml similarity index 54% rename from charts/coco-supported/kbs-access/templates/secure-svc.yaml rename to charts/coco-supported/kbs-access-curl/templates/service.yaml index 618f2f41..1def982f 100644 --- a/charts/coco-supported/kbs-access/templates/secure-svc.yaml +++ b/charts/coco-supported/kbs-access-curl/templates/service.yaml @@ -1,14 +1,14 @@ apiVersion: v1 kind: Service metadata: - name: secure + name: kbs-access-curl spec: ports: - - name: 5000-tcp - port: 5000 + - name: 8080-tcp + port: 8080 protocol: TCP - targetPort: 5000 + targetPort: 8080 selector: - app: secure + app: kbs-access-curl sessionAffinity: None type: ClusterIP diff --git a/charts/coco-supported/kbs-access/values.yaml b/charts/coco-supported/kbs-access-curl/values.yaml similarity index 100% rename from charts/coco-supported/kbs-access/values.yaml rename to charts/coco-supported/kbs-access-curl/values.yaml diff --git a/charts/coco-supported/kbs-access-sealed/Chart.yaml b/charts/coco-supported/kbs-access-sealed/Chart.yaml new file mode 100644 index 00000000..af4957df --- /dev/null +++ b/charts/coco-supported/kbs-access-sealed/Chart.yaml @@ -0,0 +1,6 @@ +apiVersion: v2 +name: kbs-access-sealed +description: Test workload that retrieves secrets from KBS and serves via httpd +type: application +version: 0.1.0 +appVersion: "1.0" diff --git a/charts/coco-supported/kbs-access-sealed/templates/deployment.yaml b/charts/coco-supported/kbs-access-sealed/templates/deployment.yaml new file mode 100644 index 00000000..7d38c6d7 --- /dev/null +++ b/charts/coco-supported/kbs-access-sealed/templates/deployment.yaml @@ -0,0 +1,50 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: kbs-access-sealed + namespace: {{ .Values.global.namespace }} + labels: + app: kbs-access-sealed +spec: + replicas: 1 + selector: + matchLabels: + app: kbs-access-sealed + template: + metadata: + labels: + app: kbs-access-sealed + annotations: + coco.io/initdata-configmap: initdata + spec: + {{- if .Values.global.kata.enabled }} + runtimeClassName: kata-cc + {{- end }} + containers: + - name: httpd + image: registry.redhat.io/ubi9/httpd-24@sha256:68a91ff691092f455fea682330c499588747231c16516cd4f35aff821e6847f2 + ports: + - containerPort: 8080 + protocol: TCP + volumeMounts: + - name: secret-data + mountPath: /var/www/html + readOnly: true + - name: initdata + mountPath: /opt/confidential-containers/initdata + readOnly: true + resources: + limits: + memory: "256Mi" + cpu: "200m" + requests: + memory: "128Mi" + cpu: "100m" + volumes: + - name: secret-data + secret: + secretName: kbs-sealed-secret + - name: initdata + configMap: + name: initdata + optional: false diff --git a/charts/coco-supported/kbs-access-sealed/templates/route.yaml b/charts/coco-supported/kbs-access-sealed/templates/route.yaml new file mode 100644 index 00000000..a6c23ec9 --- /dev/null +++ b/charts/coco-supported/kbs-access-sealed/templates/route.yaml @@ -0,0 +1,20 @@ +apiVersion: route.openshift.io/v1 +kind: Route +metadata: + name: kbs-access-sealed + namespace: {{ .Values.global.namespace }} + annotations: + argocd.argoproj.io/sync-wave: "10" + labels: + app: kbs-access-sealed +spec: + to: + kind: Service + name: kbs-access-sealed + weight: 100 + port: + targetPort: http + tls: + termination: edge + insecureEdgeTerminationPolicy: Redirect + wildcardPolicy: None diff --git a/charts/coco-supported/kbs-access-sealed/templates/sealed-secret.yaml b/charts/coco-supported/kbs-access-sealed/templates/sealed-secret.yaml new file mode 100644 index 00000000..949df91d --- /dev/null +++ b/charts/coco-supported/kbs-access-sealed/templates/sealed-secret.yaml @@ -0,0 +1,30 @@ +# NOTE: This is a placeholder Kubernetes Secret that contains a KBS resource pointer. +# +# In production, this secret should be created using coco-tools to generate the +# sealed secret resource identifier that points to the actual secret stored in KBS. +# +# The KBS resource pointer format is typically: +# kbs:/// +# +# Example workflow: +# 1. Store secret in KBS: coco-tools kbs add-secret +# 2. Generate resource pointer: coco-tools kbs get-pointer +# 3. Update this secret with the pointer (base64 encoded) +# +# For testing purposes, this contains a literal secret value. +# Replace with KBS pointer in production deployments. + +apiVersion: v1 +kind: Secret +metadata: + name: kbs-sealed-secret + namespace: {{ .Values.global.namespace }} + annotations: + argocd.argoproj.io/sync-wave: "5" + labels: + app: kbs-access-sealed +type: Opaque +data: + # TODO: Replace with KBS resource pointer generated via coco-tools + # Example: secret-key: a2JzOi8va2JzLXNlcnZpY2Uua2JzLXN5c3RlbS9zZWNyZXRzL215LXNlY3JldA== + secret-key: VGhpcyBpcyBhIHRlc3Qgc2VjcmV0IGZyb20gS0JT diff --git a/charts/coco-supported/kbs-access-sealed/templates/service.yaml b/charts/coco-supported/kbs-access-sealed/templates/service.yaml new file mode 100644 index 00000000..d02ad3ee --- /dev/null +++ b/charts/coco-supported/kbs-access-sealed/templates/service.yaml @@ -0,0 +1,18 @@ +apiVersion: v1 +kind: Service +metadata: + name: kbs-access-sealed + namespace: {{ .Values.global.namespace }} + annotations: + argocd.argoproj.io/sync-wave: "10" + labels: + app: kbs-access-sealed +spec: + selector: + app: kbs-access-sealed + ports: + - name: http + port: 8080 + targetPort: 8080 + protocol: TCP + type: ClusterIP diff --git a/charts/coco-supported/kbs-access-sealed/values.yaml b/charts/coco-supported/kbs-access-sealed/values.yaml new file mode 100644 index 00000000..a592f64e --- /dev/null +++ b/charts/coco-supported/kbs-access-sealed/values.yaml @@ -0,0 +1,4 @@ +# Default values for kbs-access-sealed chart +# Override via values-global.yaml or values-baremetal.yaml + +# Empty defaults - all configuration handled by global values diff --git a/charts/coco-supported/kbs-access/Chart.yaml b/charts/coco-supported/kbs-access/Chart.yaml deleted file mode 100644 index 0c784c2a..00000000 --- a/charts/coco-supported/kbs-access/Chart.yaml +++ /dev/null @@ -1,6 +0,0 @@ -apiVersion: v2 -description: Demonstrates accessing keys within the KBS. -keywords: -- pattern -name: kbs-access -version: 0.0.1 diff --git a/charts/coco-supported/kbs-access/README.md b/charts/coco-supported/kbs-access/README.md deleted file mode 100644 index d8e52b7e..00000000 --- a/charts/coco-supported/kbs-access/README.md +++ /dev/null @@ -1,9 +0,0 @@ -# Notes use of external image - -This chart currently uses an [image hosted on ghcr.io](ghcr.io/butler54/kbs-access-app:latest) built from the [following repository](https://github.com/butler54/coco-kbs-access). - -Using separate repository for build rather than integrated content is discouraged by validated patterns. - -The separate repository is because Coco (via the Kata guest components) must be served by an image registry using a TLS connection with a well known CA (as of today). - -This chart will be updated as that position changes. diff --git a/charts/coco-supported/kbs-access/templates/environment.yaml b/charts/coco-supported/kbs-access/templates/environment.yaml deleted file mode 100644 index 3f5bc49e..00000000 --- a/charts/coco-supported/kbs-access/templates/environment.yaml +++ /dev/null @@ -1,7 +0,0 @@ -apiVersion: v1 -kind: ConfigMap -metadata: - name: kbsref - namespace: kbs-access -data: - FILEPATH: "/output/kbsres1.txt" \ No newline at end of file diff --git a/docs/CONTAINER-SIGNING-BLOCKER.md b/docs/CONTAINER-SIGNING-BLOCKER.md new file mode 100644 index 00000000..d48d2c1b --- /dev/null +++ b/docs/CONTAINER-SIGNING-BLOCKER.md @@ -0,0 +1,266 @@ +# Container Signing Policy Enforcement - Upstream Blocker + +**Status**: ⛔ BLOCKED - Waiting for upstream fix +**Issue**: Red Hat container image signature verification fails with sigstore policies +**Upstream PR**: https://github.com/confidential-containers/guest-components/pull/1398 + +## Summary + +Container image signature verification for Red Hat images (registry.redhat.io, registry.access.redhat.com) is currently **not working** in Confidential Containers due to a bug in the `image-rs` sigstore implementation. + +**Current Configuration**: `securityPolicyFlavour: "insecure"` (no signature verification) +**Target Configuration**: `securityPolicyFlavour: "redhat-secure-sigstore"` (blocked) + +## Root Cause + +**Bug**: image-rs does not base64-decode the `keyData` field for cosign/sigstore signatures. + +**Location**: `image-rs/src/signature/policy/cosign/mod.rs` line 69 + +**Current code**: +```rust +(Some(key_data), None) => key_data.as_bytes().to_vec(), // ❌ Wrong: treats base64 string as raw bytes +``` + +**Expected code**: +```rust +(Some(key_data), None) => { + use base64::Engine; + base64::engine::general_purpose::STANDARD.decode(key_data)? // ✅ Correct: decode base64 first +} +``` + +**Impact**: The cryptographic verifier receives base64-encoded text instead of decoded PEM key bytes, causing all signature verification attempts to fail with "rejected by sigstoreSigned rule". + +## Evidence + +### Podman Works (Golang containers/image) + +```bash +# Test on RHEL 10.2 with identical policy +podman pull --signature-policy=policy.json \ + registry.redhat.io/ubi9/httpd-24:latest + +# Result: ✅ SUCCESS +# - Signature found in registry +# - Verification passed +# - Image pulled successfully +``` + +### image-rs Fails (Rust rewrite) + +```bash +# CoCo pod with identical policy +Image: registry.redhat.io/ubi9/httpd-24@sha256:68a91ff... +Policy: kbs:///default/security-policy/redhat-secure-sigstore +Key: Embedded base64-encoded PEM public key + +# Result: ❌ FAIL +# Error: Image policy rejected: Denied by policy: rejected by `sigstoreSigned` rule +# Pod status: CreateContainerError +``` + +**The error message proves repository matching works** - if matching failed, the error would be "no matching policy" not "rejected by sigstoreSigned rule". + +## Upstream Fix Status + +**PR**: https://github.com/confidential-containers/guest-components/pull/1398 +**Repository**: confidential-containers/guest-components +**Component**: image-rs (used by attestation-agent in kata guest VMs) + +**Required for**: +- Red Hat build of trustee-operator +- OpenShift Sandboxed Containers +- Confidential Containers on OpenShift + +**Waiting on**: +1. PR merge to guest-components +2. Release of updated guest-components version +3. Integration into Red Hat build of trustee +4. Update of kata guest image with fixed image-rs + +## Infrastructure Ready for Future Enablement + +All required infrastructure is **already deployed** and tested: + +### ✅ Deployed Components + +1. **Sigstore public key** + - Source: `/etc/pki/sigstore/SIGSTORE-redhat-release3` from RHEL 10.2 + - Key ID: `4096R/E60D446E63405576` (issued 2024-09-20) + - Location: `coco-pattern/keys/SIGSTORE-redhat-release3` + +2. **KBS secret** + - Secret: `sigstore-keys` in `trustee-operator-system` namespace + - Field: `redhat-release3` + - KBS URI: `kbs:///default/sigstore-keys/redhat-release3` + +3. **Policy template** + - Policy: `redhat-secure-sigstore` in `values-secret.yaml.template` + - Type: `sigstoreSigned` with embedded `keyData` + - Registries: `registry.redhat.io`, `registry.access.redhat.com` + +4. **Makefile targets** + - `make cache-sigstore-keys` - Cache key to `~/.coco-pattern/` + +### ✅ Verified Working + +**Podman verification successful** (2026-07-03): +- Platform: RHEL 10.2 jump host +- Image: registry.access.redhat.com/ubi9/ubi-minimal:latest +- Policy: sigstoreSigned with Red Hat sigstore key +- Result: Signature verification passed ✅ + +## Policy Configuration + +### Current (Insecure) + +```yaml +# values-global.yaml +global: + coco: + securityPolicyFlavour: "insecure" # ⚠️ No signature verification +``` + +### Target (When Fix Lands) + +```yaml +# values-global.yaml +global: + coco: + securityPolicyFlavour: "redhat-secure-sigstore" # ✅ Sigstore verification +``` + +### Policy Details + +```json +{ + "default": [{"type": "insecureAcceptAnything"}], + "transports": { + "docker": { + "registry.redhat.io": [ + { + "type": "sigstoreSigned", + "keyData": "YXJ0aWZhY3RzIHRoYXQgYXJlIHNpZ3N0b3JlLWVuYWJsZWQuCi0tLS0tQkVHSU4gUFVCTElDIEtFWS0tLS0tCk1JSUNJakFOQmdrcWhraUc5dzBCQVFFRkFBT0NBZzhBTUlJQ0NnS0NBZ0VBMEFTeXVIMlRMV3ZCVXFQSFo0SXAKNzVnN0VuY0JrZ1FIZEpuanp4QVc1S1FUTWgvc2lCb0IvQm9TcnRpUE13bkNoYlRDblFPSVFlWnVEaUZuaHVKNwpNL0QzYjdKb1gwbTEyM05jQ1NuNjdtQWRqQmE2Qmc2a3VrWmdDUDRaVVplRVNhaldYL0VqeWxGY1JGT1hXNTdwClJEQ0VONDJKL2pZbFZxdCtnOStHcmtlcjhTejg2SDNsMHRicU9kamJ6L1Z4SFlod0YwY3RVTUhzeVZSRHEyUVAKdHF6TlhsbWxNaFMvUG9GcjZSNHUvN0hDbi9LK0xlZ2NPMmZBRk9iNDBLdktTS0tWRDZsZXdVWkVyaG9wMUNnSgpYakR0R21tTzlkR01GNzFtZjZIRWZhS1NkeStFRTZpU0YyQTJWdjlRaEJhd01pcTJrT3pFaUxnNG5BZEpUOHdnClpyTUFtUENxR0lzWE5HWjQvUStZVHd3bGNlM2dscWI1TDl0Zk5vekVkU1I5Tjg1REVTZlFMUUVkWTNDYWx3S00KQlQxT0VoRVgxd0hSQ1U0ZHJNT2VqNkJOVzBWdHNjR3RIbUNyczc0alBlemh3TlQ4eXBreVMrVDB6VDRUc3k2ZgpWWGtKOFlTSHllblN6TUIyT3AyYnZzRTNnclkrczc0V2hHOVVJQTZEQnhjVGllMTVOU3pLd2Z6YW9OV09EY0xGCnA3Qlk4YWFIRTJNcUZ4WUZYK0lianBrUVJmYWVRUXNvdURGZENrWEVGVmZQcGJEMmRrNkZsZWFNVFB1eXh0SVQKZ2pWRXRHUUsycUdDRkdpUUhGZDRoZlYrZUNBNjNKcm8xejB6b0JNNUJiSUlRMytlVkZ3dDNBbFpwNVVWd3I2ZApzZWNxa2kveXJtdjNZMGRxWjlWT24zVUNBd0VBQVE9PQotLS0tLUVORCBQVUJMSUMgS0VZLS0tLS0K", + "signedIdentity": {"type": "matchRepository"} + } + ], + "registry.access.redhat.com": [ + { + "type": "sigstoreSigned", + "keyData": "YXJ0aWZhY3RzIHRoYXQgYXJlIHNpZ3N0b3JlLWVuYWJsZWQuCi0tLS0tQkVHSU4gUFVCTElDIEtFWS0tLS0tCk1JSUNJakFOQmdrcWhraUc5dzBCQVFFRkFBT0NBZzhBTUlJQ0NnS0NBZ0VBMEFTeXVIMlRMV3ZCVXFQSFo0SXAKNzVnN0VuY0JrZ1FIZEpuanp4QVc1S1FUTWgvc2lCb0IvQm9TcnRpUE13bkNoYlRDblFPSVFlWnVEaUZuaHVKNwpNL0QzYjdKb1gwbTEyM05jQ1NuNjdtQWRqQmE2Qmc2a3VrWmdDUDRaVVplRVNhaldYL0VqeWxGY1JGT1hXNTdwClJEQ0VONDJKL2pZbFZxdCtnOStHcmtlcjhTejg2SDNsMHRicU9kamJ6L1Z4SFlod0YwY3RVTUhzeVZSRHEyUVAKdHF6TlhsbWxNaFMvUG9GcjZSNHUvN0hDbi9LK0xlZ2NPMmZBRk9iNDBLdktTS0tWRDZsZXdVWkVyaG9wMUNnSgpYakR0R21tTzlkR01GNzFtZjZIRWZhS1NkeStFRTZpU0YyQTJWdjlRaEJhd01pcTJrT3pFaUxnNG5BZEpUOHdnClpyTUFtUENxR0lzWE5HWjQvUStZVHd3bGNlM2dscWI1TDl0Zk5vekVkU1I5Tjg1REVTZlFMUUVkWTNDYWx3S00KQlQxT0VoRVgxd0hSQ1U0ZHJNT2VqNkJOVzBWdHNjR3RIbUNyczc0alBlemh3TlQ4eXBreVMrVDB6VDRUc3k2ZgpWWGtKOFlTSHllblN6TUIyT3AyYnZzRTNnclkrczc0V2hHOVVJQTZEQnhjVGllMTVOU3pLd2Z6YW9OV09EY0xGCnA3Qlk4YWFIRTJNcUZ4WUZYK0lianBrUVJmYWVRUXNvdURGZENrWEVGVmZQcGJEMmRrNkZsZWFNVFB1eXh0SVQKZ2pWRXRHUUsycUdDRkdpUUhGZDRoZlYrZUNBNjNKcm8xejB6b0JNNUJiSUlRMytlVkZ3dDNBbFpwNVVWd3I2ZApzZWNxa2kveXJtdjNZMGRxWjlWT24zVUNBd0VBQVE9PQotLS0tLUVORCBQVUJMSUMgS0VZLS0tLS0K", + "signedIdentity": {"type": "matchRepository"} + } + ] + } + } +} +``` + +The `keyData` field contains the Red Hat sigstore public key (release key 3) base64-encoded. + +## Why GPG Signatures Don't Work Either + +Red Hat dual-signs all container images: +1. **GPG signatures** - Stored on separate HTTPS lookaside servers +2. **Sigstore signatures** - Stored as OCI artifacts in the registry + +**GPG approach blocked**: image-rs does not support HTTP/HTTPS for fetching signatures from lookaside servers (tracked in confidential-containers/image-rs#9). + +**Sigstore approach blocked**: This base64-decode bug. + +## Re-Enabling Signature Verification + +When the upstream fix is available: + +### 1. Verify Fix is Available +```bash +# Check guest-components release notes for the fix +# Confirm Red Hat trustee-operator includes updated image-rs +``` + +### 2. Update Configuration +```bash +cd ~/coco-pattern +git pull origin dev/phase1-modernization + +# Edit values-global.yaml +# Change: securityPolicyFlavour: "insecure" +# To: securityPolicyFlavour: "redhat-secure-sigstore" +``` + +### 3. Deploy Updated Pattern +```bash +export KUBECONFIG=~/node-02-output/421_build/auth/kubeconfig +./pattern.sh make install +``` + +### 4. Verify Signature Enforcement +```bash +# Delete confidential pod to force recreation +oc delete pod -n hello-openshift -l app=secure + +# Check pod starts successfully +oc get pods -n hello-openshift -l app=secure + +# Verify policy is active +oc get pod -n hello-openshift -l app=secure -o yaml | grep -A 5 init_data + +# Should show: image_security_policy_uri = 'kbs:///default/security-policy/redhat-secure-sigstore' +``` + +### 5. Test with Unsigned Image +```bash +# Create test deployment with unsigned custom image +oc apply -n hello-openshift -f - < + +-----BEGIN PUBLIC KEY----- +MIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIICCgKCAgEA0ASyuH2TLWvBUqPHZ4Ip +75g7EncBkgQHdJnjzxAW5KQTMh/siBoB/BoSrtiPMwnChbTCnQOIQeZuDiFnhuJ7 +M/D3b7JoX0m123NcCSn67mAdjBa6Bg6kukZgCP4ZUZeESajWX/EjylFcRFOXW57p +RDCEN42J/jYlVqt+g9+Grker8Sz86H3l0tbqOdjbz/VxHYhwF0ctUMHsyVRDq2QP +tqzNXlmlMhS/PoFr6R4u/7HCn/K+LegcO2fAFOb40KvKSKKVD6lewUZErhop1CgJ +XjDtGmmO9dGMF71mf6HEfaKSdy+EE6iSF2A2Vv9QhBawMiq2kOzEiLg4nAdJT8wg +ZrMAmPCqGIsXNGZ4/Q+YTwwlce3glqb5L9tfNozEdSR9N85DESfQLQEdY3CalwKM +BT1OEhEX1wHRCU4drMOej6BNW0VtscGtHmCrs74jPezhwNT8ypkyS+T0zT4Tsy6f +VXkJ8YSHyenSzMB2Op2bvsE3grY+s74WhG9UIA6DBxcTie15NSzKwfzaoNWODcLF +p7BY8aaHE2MqFxYFX+IbjpkQRfaeQQsouDFdCkXEFVfPpbD2dk6FleaMTPuyxtIT +gjVEtGQK2qGCFGiQHFd4hfV+eCA63Jro1z0zoBM5BbIIQ3+eVFwt3AlZp5UVwr6d +secqki/yrmv3Y0dqZ9VOn3UCAwEAAQ== +-----END PUBLIC KEY----- diff --git a/overrides/values-4.19.yaml b/overrides/values-4.19.yaml new file mode 100644 index 00000000..c7ef3aba --- /dev/null +++ b/overrides/values-4.19.yaml @@ -0,0 +1,7 @@ +# OCP 4.19 version-specific operator pins +# Applied via sharedValueFiles when global.clusterVersion is "4.19" + +clusterGroup: + subscriptions: + lvm-operator: + channel: stable-4.19 diff --git a/overrides/values-4.20.yaml b/overrides/values-4.20.yaml new file mode 100644 index 00000000..d7a8c0a0 --- /dev/null +++ b/overrides/values-4.20.yaml @@ -0,0 +1,7 @@ +# OCP 4.20 version-specific operator pins +# Applied via sharedValueFiles when global.clusterVersion is "4.20" + +clusterGroup: + subscriptions: + lvm-operator: + channel: stable-4.20 diff --git a/overrides/values-4.21.yaml b/overrides/values-4.21.yaml new file mode 100644 index 00000000..a9b3948d --- /dev/null +++ b/overrides/values-4.21.yaml @@ -0,0 +1,7 @@ +# OCP 4.21 version-specific operator pins +# Applied via sharedValueFiles when global.clusterVersion is "4.21" + +clusterGroup: + subscriptions: + lvm-operator: + channel: stable-4.21 diff --git a/overrides/values-hw-amd-snp-gpu.yaml b/overrides/values-hw-amd-snp-gpu.yaml new file mode 100644 index 00000000..52289637 --- /dev/null +++ b/overrides/values-hw-amd-snp-gpu.yaml @@ -0,0 +1,13 @@ +# Hardware profile: AMD SEV-SNP + NVIDIA GPU +# Disables Intel device plugins and Intel DCAP (PCCS/QGS). +# GPU operator and NVIDIA GPU applications remain enabled. +# AMD SEV-SNP does not require Intel-specific attestation infrastructure. + +clusterGroup: + subscriptions: + intel-device-plugins: + disabled: true + + applications: + intel-dcap: + disabled: true diff --git a/overrides/values-hw-amd-snp.yaml b/overrides/values-hw-amd-snp.yaml new file mode 100644 index 00000000..e5bfc2c8 --- /dev/null +++ b/overrides/values-hw-amd-snp.yaml @@ -0,0 +1,19 @@ +# Hardware profile: AMD SEV-SNP (no GPU) +# Disables Intel device plugins, Intel DCAP (PCCS/QGS), and GPU components. +# AMD SEV-SNP does not require Intel-specific attestation infrastructure. +# NFD remains enabled — it detects AMD SEV capabilities via cpu-security labels. + +clusterGroup: + subscriptions: + intel-device-plugins: + disabled: true + gpu-operator: + disabled: true + + applications: + intel-dcap: + disabled: true + nvidia-gpu: + disabled: true + gpu-workload: + disabled: true diff --git a/overrides/values-hw-intel-tdx-gpu.yaml b/overrides/values-hw-intel-tdx-gpu.yaml new file mode 100644 index 00000000..903c10ee --- /dev/null +++ b/overrides/values-hw-intel-tdx-gpu.yaml @@ -0,0 +1,6 @@ +# Hardware profile: Intel TDX + NVIDIA GPU +# All components enabled — Intel device plugins, PCCS/QGS, GPU operator, NVIDIA GPU apps. +# This is the "everything on" profile for Intel platforms with NVIDIA H100 GPUs. + +# No overrides needed — all subscriptions and applications default to enabled. +# This file exists so the sharedValueFiles template resolves without error. diff --git a/overrides/values-hw-intel-tdx.yaml b/overrides/values-hw-intel-tdx.yaml new file mode 100644 index 00000000..09d2839e --- /dev/null +++ b/overrides/values-hw-intel-tdx.yaml @@ -0,0 +1,14 @@ +# Hardware profile: Intel TDX (no GPU) +# Disables GPU operator and NVIDIA GPU applications. +# Intel device plugins, PCCS/QGS (intel-dcap) remain enabled (default). + +clusterGroup: + subscriptions: + gpu-operator: + disabled: true + + applications: + nvidia-gpu: + disabled: true + gpu-workload: + disabled: true diff --git a/values-spoke.yaml b/values-azure-spoke.yaml similarity index 61% rename from values-spoke.yaml rename to values-azure-spoke.yaml index 0dab1011..b43c2b37 100644 --- a/values-spoke.yaml +++ b/values-azure-spoke.yaml @@ -1,16 +1,26 @@ -# This is currently configured as an 'all in one' deployment in one cluster. +# Azure spoke topology for confidential containers (multi-cluster). +# Runs sandboxed containers operator and confidential workloads. +# Paired with trusted-hub topology — spoke is imported into ACM from the hub. +# Set main.clusterGroupName: azure-spoke in values-global.yaml on the spoke cluster. clusterGroup: - name: spoke + name: azure-spoke isHubCluster: false namespaces: - - golang-external-secrets + - external-secrets-operator: + operatorGroup: true + targetNamespaces: [] + - external-secrets - openshift-sandboxed-containers-operator - hello-openshift - kbs-access - cert-manager-operator subscriptions: - # ACM is kept anticipating + # ACM is kept anticipating + eso: + name: openshift-external-secrets-operator + namespace: external-secrets-operator + channel: stable-v1 sandbox: name: sandboxed-containers-operator namespace: openshift-sandboxed-containers-operator @@ -27,7 +37,7 @@ clusterGroup: - hub - vault - trustee - - golang-external-secrets + - external-secrets - sandbox - workloads - default @@ -35,21 +45,24 @@ clusterGroup: # We can use self-referential variables because the chart calls the tpl function with these variables defined sharedValueFiles: - '/overrides/values-{{ $.Values.global.clusterPlatform }}.yaml' + - '/overrides/values-{{ $.Values.global.clusterVersion }}.yaml' applications: secrets-operator: - name: golang-external-secrets - namespace: golang-external-secrets - project: golang-external-secrets - chart: golang-external-secrets - chartVersion: 0.1.* + name: openshift-external-secrets + namespace: external-secrets + project: external-secrets + chart: openshift-external-secrets + chartVersion: 0.0.* sandbox: name: sandbox namespace: openshift-sandboxed-containers-operator #upstream config project: sandbox - chart: sandboxed-containers - chartVersion: 0.2.* + # DEV: git-based chart reference for testing. Revert to chart/chartVersion before upstream PR. + repoURL: https://github.com/butler54/sandboxed-containers-chart.git + targetRevision: dev/phase1-testing + path: . overrides: - name: global.secretStore.backend value: vault @@ -71,10 +84,9 @@ clusterGroup: path: charts/coco-supported/kbs-access imperative: - image: ghcr.io/butler54/imperative-container:latest jobs: - - name: install-deps - playbook: ansible/install-deps.yaml + - name: get-azure-deps + playbook: ansible/get-azure-deps.yaml verbosity: -vvv timeout: 3600 - name: configure-azure-nat-gateway diff --git a/values-simple.yaml b/values-azure.yaml similarity index 52% rename from values-simple.yaml rename to values-azure.yaml index 723746b7..8a16c5d9 100644 --- a/values-simple.yaml +++ b/values-azure.yaml @@ -1,14 +1,19 @@ -# This is currently configured as an 'all in one' deployment in one cluster. +# Azure single-cluster topology for confidential containers. +# Deploys all components (Trustee, Vault, ACM, sandboxed containers, workloads) in one cluster. +# Set main.clusterGroupName: azure in values-global.yaml to use. clusterGroup: - name: simple + name: azure isHubCluster: true # Override health check for Subscriptions to treat UpgradePending as healthy # Only applies to pinned CSV subscriptions (sandbox and trustee) namespaces: - open-cluster-management - vault - - golang-external-secrets + - external-secrets-operator: + operatorGroup: true + targetNamespaces: [] + - external-secrets - openshift-sandboxed-containers-operator - trustee-operator-system - hello-openshift @@ -18,10 +23,14 @@ clusterGroup: - encrypted-storage - kyverno subscriptions: - # ACM is kept anticipating + # ACM is kept anticipating acm: name: advanced-cluster-management namespace: open-cluster-management + eso: + name: openshift-external-secrets-operator + namespace: external-secrets-operator + channel: stable-v1 sandbox: name: sandboxed-containers-operator namespace: openshift-sandboxed-containers-operator @@ -45,7 +54,7 @@ clusterGroup: - hub - vault - trustee - - golang-external-secrets + - external-secrets - sandbox - workloads - default @@ -53,6 +62,7 @@ clusterGroup: # We can use self-referential variables because the chart calls the tpl function with these variables defined sharedValueFiles: - '/overrides/values-{{ $.Values.global.clusterPlatform }}.yaml' + - '/overrides/values-{{ $.Values.global.clusterVersion }}.yaml' applications: acm: name: acm @@ -69,30 +79,39 @@ clusterGroup: chartVersion: 0.1.* secrets-operator: - name: golang-external-secrets - namespace: golang-external-secrets - project: golang-external-secrets - chart: golang-external-secrets - chartVersion: 0.1.* + name: openshift-external-secrets + namespace: external-secrets + project: external-secrets + chart: openshift-external-secrets + chartVersion: 0.0.* trustee: name: trustee namespace: trustee-operator-system #upstream config project: trustee - chart: trustee - chartVersion: 0.7.* + # DEV: git-based chart reference for testing. Revert to chart/chartVersion before upstream PR. + repoURL: https://github.com/butler54/trustee-chart.git + targetRevision: dev/phase1-testing + path: . extraValueFiles: - '/overrides/values-trustee.yaml' + overrides: + - name: global.coco.secured + value: "true" sandbox: name: sandbox namespace: openshift-sandboxed-containers-operator #upstream config project: sandbox - chart: sandboxed-containers - chartVersion: 0.2.* + # DEV: git-based chart reference for testing. Revert to chart/chartVersion before upstream PR. + repoURL: https://github.com/butler54/sandboxed-containers-chart.git + targetRevision: dev/phase1-testing + path: . sandbox-policies: name: sandbox-policies namespace: openshift-sandboxed-containers-operator #upstream config - chart: sandboxed-policies - chartVersion: 0.2.* + # DEV: git-based chart reference for testing. Revert to chart/chartVersion before upstream PR. + repoURL: https://github.com/butler54/sandboxed-policies-chart.git + targetRevision: dev/phase1-testing + path: . overrides: - name: global.coco.azure.tags value: "key1=value1,key2=value2" @@ -137,6 +156,72 @@ clusterGroup: jsonPointers: - /metadata/labels - /metadata/annotations + - group: apiextensions.k8s.io + kind: CustomResourceDefinition + name: deletingpolicies.policies.kyverno.io + jsonPointers: + - /metadata/labels + - /metadata/annotations + - group: apiextensions.k8s.io + kind: CustomResourceDefinition + name: generatingpolicies.policies.kyverno.io + jsonPointers: + - /metadata/labels + - /metadata/annotations + - group: apiextensions.k8s.io + kind: CustomResourceDefinition + name: imagevalidatingpolicies.policies.kyverno.io + jsonPointers: + - /metadata/labels + - /metadata/annotations + - group: apiextensions.k8s.io + kind: CustomResourceDefinition + name: mutatingpolicies.policies.kyverno.io + jsonPointers: + - /metadata/labels + - /metadata/annotations + - group: apiextensions.k8s.io + kind: CustomResourceDefinition + name: namespaceddeletingpolicies.policies.kyverno.io + jsonPointers: + - /metadata/labels + - /metadata/annotations + - group: apiextensions.k8s.io + kind: CustomResourceDefinition + name: namespacedgeneratingpolicies.policies.kyverno.io + jsonPointers: + - /metadata/labels + - /metadata/annotations + - group: apiextensions.k8s.io + kind: CustomResourceDefinition + name: namespacedimagevalidatingpolicies.policies.kyverno.io + jsonPointers: + - /metadata/labels + - /metadata/annotations + - group: apiextensions.k8s.io + kind: CustomResourceDefinition + name: namespacedmutatingpolicies.policies.kyverno.io + jsonPointers: + - /metadata/labels + - /metadata/annotations + - group: apiextensions.k8s.io + kind: CustomResourceDefinition + name: namespacedvalidatingpolicies.policies.kyverno.io + jsonPointers: + - /metadata/labels + - /metadata/annotations + - group: apiextensions.k8s.io + kind: CustomResourceDefinition + name: policyexceptions.policies.kyverno.io + jsonPointers: + - /metadata/labels + - /metadata/annotations + - group: apiextensions.k8s.io + kind: CustomResourceDefinition + name: validatingpolicies.policies.kyverno.io + jsonPointers: + - /metadata/labels + - /metadata/annotations extraValueFiles: - '/overrides/values-kyverno.yaml' @@ -153,16 +238,13 @@ clusterGroup: # imagePullPolicy is set to always: imperative.imagePullPolicy # For additional overrides that apply to the jobs, please refer to # https://validatedpatterns.io/imperative-actions/#additional-job-customizations - image: ghcr.io/butler54/imperative-container:latest jobs: - - name: install-deps - playbook: ansible/install-deps.yaml + - name: get-azure-deps + playbook: ansible/get-azure-deps.yaml verbosity: -vvv timeout: 3600 - name: configure-azure-dns playbook: ansible/configure-issuer.yaml - # this image has not been changes. TBD would make sense - #image: quay.io/hybridcloudpatterns/ansible-edge-gitops-ee:latest verbosity: -vvv timeout: 3600 - name: configure-azure-nat-gateway diff --git a/values-baremetal-gpu.yaml b/values-baremetal-gpu.yaml deleted file mode 100644 index af788dcf..00000000 --- a/values-baremetal-gpu.yaml +++ /dev/null @@ -1,273 +0,0 @@ -# Bare metal deployment for confidential containers WITH NVIDIA GPU support. -# Supports Intel TDX and AMD SEV-SNP via auto-detection (NFD). -# Includes NVIDIA H100 confidential GPU components (GPU Operator, IOMMU, CC Manager). -# Set main.clusterGroupName: baremetal-gpu in values-global.yaml to use. - -clusterGroup: - name: baremetal-gpu - isHubCluster: true - namespaces: - - open-cluster-management - - vault - - golang-external-secrets - - openshift-sandboxed-containers-operator - - trustee-operator-system - - cert-manager-operator - - cert-manager - - hello-openshift - - kbs-access - - openshift-cnv - - openshift-storage - - openshift-nfd - - baremetal - - intel-dcap - - nvidia-gpu-operator - - gpu-workload - - kyverno - - subscriptions: - acm: - name: advanced-cluster-management - namespace: open-cluster-management - sandbox: - name: sandboxed-containers-operator - namespace: openshift-sandboxed-containers-operator - source: redhat-operators - channel: stable - installPlanApproval: Manual - csv: sandboxed-containers-operator.v1.12.0 - trustee: - name: trustee-operator - namespace: trustee-operator-system - source: redhat-operators - channel: stable - installPlanApproval: Manual - csv: trustee-operator.v1.1.0 - cert-manager: - name: openshift-cert-manager-operator - namespace: cert-manager-operator - channel: stable-v1 - lvm-operator: - name: lvms-operator - namespace: openshift-storage - source: redhat-operators - channel: stable-4.20 - installPlanApproval: Automatic - cnv: - name: kubevirt-hyperconverged - namespace: openshift-cnv - source: redhat-operators - channel: stable - installPlanApproval: Automatic - nfd: - name: nfd - namespace: openshift-nfd - channel: stable - gpu-operator: - name: gpu-operator-certified - namespace: nvidia-gpu-operator - source: certified-operators - channel: v26.3 - installPlanApproval: Manual - csv: gpu-operator-certified.v26.3.0 - intel-device-plugins: - name: intel-device-plugins-operator - namespace: openshift-operators - source: certified-operators - channel: stable - projects: - - hub - - vault - - trustee - - golang-external-secrets - - sandbox - - workloads - - default - - # Explicitly mention the cluster-state based overrides we plan to use for this pattern. - # We can use self-referential variables because the chart calls the tpl function with these variables defined - sharedValueFiles: - - '/overrides/values-{{ $.Values.global.clusterPlatform }}.yaml' - - '/overrides/values-storage-{{ $.Values.global.storageProvider }}.yaml' - - applications: - acm: - name: acm - namespace: open-cluster-management - project: hub - chart: acm - chartVersion: 0.1.* - - vault: - name: vault - namespace: vault - project: vault - chart: hashicorp-vault - chartVersion: 0.1.* - - secrets-operator: - name: golang-external-secrets - namespace: golang-external-secrets - project: golang-external-secrets - chart: golang-external-secrets - chartVersion: 0.1.* - - trustee: - name: trustee - namespace: trustee-operator-system - project: trustee - chart: trustee - chartVersion: 0.7.* - extraValueFiles: - - '/overrides/values-trustee.yaml' - overrides: - - name: kbs.tdx.enabled - value: "true" - - name: kbs.tdx.collateralService - value: "https://pccs-service.intel-dcap.svc.cluster.local:8042/sgx/certification/v4/" - - name: kbs.gpu.enabled - value: "true" - - name: kbs.baremetal.enabled - value: "true" - - storage: - name: storage - namespace: openshift-storage - project: hub - path: charts/hub/storage - - baremetal: - name: baremetal - namespace: baremetal - project: hub - path: charts/all/baremetal - - sandbox: - name: sandbox - namespace: openshift-sandboxed-containers-operator - project: sandbox - chart: sandboxed-containers - chartVersion: 0.2.* - overrides: - - name: global.secretStore.backend - value: vault - - name: secretStore.name - value: vault-backend - - name: secretStore.kind - value: ClusterSecretStore - - name: enablePeerPods - value: "false" - - - intel-dcap: - name: intel-dcap - namespace: intel-dcap - project: hub - path: charts/all/intel-dcap - overrides: - - name: secretStore.name - value: vault-backend - - name: secretStore.kind - value: ClusterSecretStore - - nvidia-gpu: - name: nvidia-gpu - namespace: nvidia-gpu-operator - project: hub - path: charts/all/nvidia-gpu - - gpu-workload: - name: gpu-workload - namespace: gpu-workload - project: workloads - path: charts/coco-supported/gpu-workload - syncPolicy: - automated: - prune: true - - sandbox-policies: - name: sandbox-policies - namespace: openshift-sandboxed-containers-operator - chart: sandboxed-policies - chartVersion: 0.2.* - - kbs-access: - name: kbs-access - namespace: kbs-access - project: workloads - path: charts/coco-supported/kbs-access - syncPolicy: - automated: - prune: true - overrides: - - name: defaultMemory - value: "8192" - - hello-openshift: - name: hello-openshift - namespace: hello-openshift - project: workloads - path: charts/coco-supported/hello-openshift - syncPolicy: - automated: - prune: true - - kyverno: - name: kyverno - namespace: kyverno - project: hub - repoURL: https://kyverno.github.io/kyverno/ - chart: kyverno - chartVersion: 3.7.* - syncPolicy: - automated: {} - retry: - limit: 20 - syncOptions: - - ServerSideApply=true - - RespectIgnoreDifferences=true - ignoreDifferences: - - group: apiextensions.k8s.io - kind: CustomResourceDefinition - name: policies.kyverno.io - jsonPointers: - - /metadata/labels - - /metadata/annotations - extraValueFiles: - - '/overrides/values-kyverno.yaml' - overrides: - - name: backgroundController.resources.limits.memory - value: "512Mi" - - name: backgroundController.resources.requests.memory - value: "256Mi" - - coco-kyverno-policies: - name: coco-kyverno-policies - namespace: openshift-sandboxed-containers-operator - project: sandbox - path: charts/all/coco-kyverno-policies - - imperative: - # NOTE: We *must* use lists and not hashes. As hashes lose ordering once parsed by helm - # The default schedule is every 10 minutes: imperative.schedule - # Total timeout of all jobs is 1h: imperative.activeDeadlineSeconds - # imagePullPolicy is set to always: imperative.imagePullPolicy - # For additional overrides that apply to the jobs, please refer to - # https://validatedpatterns.io/imperative-actions/#additional-job-customizations - image: ghcr.io/butler54/imperative-container:latest - serviceAccountCreate: true - adminServiceAccountCreate: true - serviceAccountName: imperative-admin-sa - jobs: - - name: install-deps - playbook: ansible/install-deps.yaml - verbosity: -vvv - timeout: 3600 - - name: init-data-gzipper - playbook: ansible/init-data-gzipper.yaml - verbosity: -vvv - timeout: 3600 - - name: reconcile-kataconfig-gpu - playbook: ansible/reconcile-kataconfig-gpu.yaml - verbosity: -vvv - timeout: 600 diff --git a/values-baremetal.yaml b/values-baremetal.yaml index 76711bcb..4628b549 100644 --- a/values-baremetal.yaml +++ b/values-baremetal.yaml @@ -1,5 +1,6 @@ # Bare metal deployment for confidential containers. -# Supports Intel TDX and AMD SEV-SNP via auto-detection (NFD). +# Supports Intel TDX and AMD SEV-SNP with optional NVIDIA GPU. +# Hardware profile (global.hardware.profile) controls which operators are active. # Set main.clusterGroupName: baremetal in values-global.yaml to use. clusterGroup: @@ -8,7 +9,10 @@ clusterGroup: namespaces: - open-cluster-management - vault - - golang-external-secrets + - external-secrets-operator: + operatorGroup: true + targetNamespaces: [] + - external-secrets - openshift-sandboxed-containers-operator - trustee-operator-system - cert-manager-operator @@ -20,12 +24,18 @@ clusterGroup: - openshift-nfd - baremetal - intel-dcap + - nvidia-gpu-operator + - gpu-workload - kyverno subscriptions: acm: name: advanced-cluster-management namespace: open-cluster-management + eso: + name: openshift-external-secrets-operator + namespace: external-secrets-operator + channel: stable-v1 sandbox: name: sandboxed-containers-operator namespace: openshift-sandboxed-containers-operator @@ -48,7 +58,6 @@ clusterGroup: name: lvms-operator namespace: openshift-storage source: redhat-operators - channel: stable-4.20 installPlanApproval: Automatic cnv: name: kubevirt-hyperconverged @@ -65,11 +74,18 @@ clusterGroup: namespace: openshift-operators source: certified-operators channel: stable + gpu-operator: + name: gpu-operator-certified + namespace: nvidia-gpu-operator + source: certified-operators + channel: v26.3 + installPlanApproval: Manual + csv: gpu-operator-certified.v26.3.0 projects: - hub - vault - trustee - - golang-external-secrets + - external-secrets - sandbox - workloads - default @@ -79,6 +95,43 @@ clusterGroup: sharedValueFiles: - '/overrides/values-{{ $.Values.global.clusterPlatform }}.yaml' - '/overrides/values-storage-{{ $.Values.global.storageProvider }}.yaml' + - '/overrides/values-{{ $.Values.global.clusterVersion }}.yaml' + - '/overrides/values-hw-{{ $.Values.global.hardware.profile }}.yaml' + + extraObjects: + wait-for-vault-unsealed: + apiVersion: batch/v1 + kind: Job + metadata: + name: wait-for-vault-unsealed + annotations: + argocd.argoproj.io/hook: Sync + argocd.argoproj.io/hook-delete-policy: HookSucceeded,BeforeHookCreation + argocd.argoproj.io/sync-wave: "5" + spec: + backoffLimit: 10 + activeDeadlineSeconds: 600 + template: + spec: + restartPolicy: OnFailure + containers: + - name: wait-for-vault + image: registry.redhat.io/openshift4/ose-cli:latest + command: + - /bin/bash + - -c + - | + echo "Waiting for vault to be unsealed..." + while true; do + if curl -sk https://vault.vault.svc:8200/v1/sys/seal-status | grep -q '"sealed":false'; then + echo "Vault unsealed. Waiting 120s for secrets to load..." + sleep 120 + echo "Done. Proceeding." + exit 0 + fi + echo "Vault not ready. Retrying in 15s..." + sleep 15 + done applications: acm: @@ -94,27 +147,39 @@ clusterGroup: project: vault chart: hashicorp-vault chartVersion: 0.1.* + annotations: + argocd.argoproj.io/sync-wave: "0" secrets-operator: - name: golang-external-secrets - namespace: golang-external-secrets - project: golang-external-secrets - chart: golang-external-secrets - chartVersion: 0.1.* + name: openshift-external-secrets + namespace: external-secrets + project: external-secrets + chart: openshift-external-secrets + chartVersion: 0.0.* + annotations: + argocd.argoproj.io/sync-wave: "0" trustee: name: trustee namespace: trustee-operator-system project: trustee - chart: trustee - chartVersion: 0.7.* + annotations: + argocd.argoproj.io/sync-wave: "20" + # DEV: git-based chart reference for testing. Revert to chart/chartVersion before upstream PR. + repoURL: https://github.com/butler54/trustee-chart.git + targetRevision: dev/phase1-testing + path: . extraValueFiles: - '/overrides/values-trustee.yaml' overrides: + - name: global.coco.secured + value: "true" - name: kbs.tdx.enabled value: "true" - name: kbs.tdx.collateralService value: "https://pccs-service.intel-dcap.svc.cluster.local:8042/sgx/certification/v4/" + - name: kbs.gpu.enabled + value: "true" - name: kbs.baremetal.enabled value: "true" @@ -123,19 +188,27 @@ clusterGroup: namespace: openshift-storage project: hub path: charts/hub/storage + annotations: + argocd.argoproj.io/sync-wave: "0" baremetal: name: baremetal namespace: baremetal project: hub path: charts/all/baremetal + annotations: + argocd.argoproj.io/sync-wave: "10" sandbox: name: sandbox namespace: openshift-sandboxed-containers-operator project: sandbox - chart: sandboxed-containers - chartVersion: 0.2.* + annotations: + argocd.argoproj.io/sync-wave: "10" + # DEV: git-based chart reference for testing. Revert to chart/chartVersion before upstream PR. + repoURL: https://github.com/butler54/sandboxed-containers-chart.git + targetRevision: dev/phase1-testing + path: . overrides: - name: global.secretStore.backend value: vault @@ -152,35 +225,78 @@ clusterGroup: namespace: intel-dcap project: hub path: charts/all/intel-dcap + annotations: + argocd.argoproj.io/sync-wave: "20" + ignoreDifferences: + - group: deviceplugin.intel.com + kind: SgxDevicePlugin + name: sgxdeviceplugin-sample + jsonPointers: + - /spec overrides: - name: secretStore.name value: vault-backend - name: secretStore.kind value: ClusterSecretStore - sandbox-policies: - name: sandbox-policies - namespace: openshift-sandboxed-containers-operator - chart: sandboxed-policies - chartVersion: 0.2.* + nvidia-gpu: + name: nvidia-gpu + namespace: nvidia-gpu-operator + project: hub + path: charts/all/nvidia-gpu + annotations: + argocd.argoproj.io/sync-wave: "10" - kbs-access: - name: kbs-access - namespace: kbs-access + gpu-workload: + name: gpu-workload + namespace: gpu-workload project: workloads - path: charts/coco-supported/kbs-access + path: charts/coco-supported/gpu-workload + annotations: + argocd.argoproj.io/sync-wave: "30" syncPolicy: automated: prune: true - overrides: - - name: defaultMemory - value: "8192" + + sandbox-policies: + name: sandbox-policies + namespace: openshift-sandboxed-containers-operator + # DEV: git-based chart reference for testing. Revert to chart/chartVersion before upstream PR. + repoURL: https://github.com/butler54/sandboxed-policies-chart.git + targetRevision: dev/phase1-testing + path: . + annotations: + argocd.argoproj.io/sync-wave: "20" hello-openshift: name: hello-openshift namespace: hello-openshift project: workloads path: charts/coco-supported/hello-openshift + annotations: + argocd.argoproj.io/sync-wave: "30" + syncPolicy: + automated: + prune: true + + kbs-access-curl: + name: kbs-access-curl + namespace: kbs-access + project: workloads + path: charts/coco-supported/kbs-access-curl + annotations: + argocd.argoproj.io/sync-wave: "30" + syncPolicy: + automated: + prune: true + + kbs-access-sealed: + name: kbs-access-sealed + namespace: kbs-access + project: workloads + path: charts/coco-supported/kbs-access-sealed + annotations: + argocd.argoproj.io/sync-wave: "30" syncPolicy: automated: prune: true @@ -189,6 +305,8 @@ clusterGroup: name: kyverno namespace: kyverno project: hub + annotations: + argocd.argoproj.io/sync-wave: "0" repoURL: https://kyverno.github.io/kyverno/ chart: kyverno chartVersion: 3.7.* @@ -206,6 +324,72 @@ clusterGroup: jsonPointers: - /metadata/labels - /metadata/annotations + - group: apiextensions.k8s.io + kind: CustomResourceDefinition + name: deletingpolicies.policies.kyverno.io + jsonPointers: + - /metadata/labels + - /metadata/annotations + - group: apiextensions.k8s.io + kind: CustomResourceDefinition + name: generatingpolicies.policies.kyverno.io + jsonPointers: + - /metadata/labels + - /metadata/annotations + - group: apiextensions.k8s.io + kind: CustomResourceDefinition + name: imagevalidatingpolicies.policies.kyverno.io + jsonPointers: + - /metadata/labels + - /metadata/annotations + - group: apiextensions.k8s.io + kind: CustomResourceDefinition + name: mutatingpolicies.policies.kyverno.io + jsonPointers: + - /metadata/labels + - /metadata/annotations + - group: apiextensions.k8s.io + kind: CustomResourceDefinition + name: namespaceddeletingpolicies.policies.kyverno.io + jsonPointers: + - /metadata/labels + - /metadata/annotations + - group: apiextensions.k8s.io + kind: CustomResourceDefinition + name: namespacedgeneratingpolicies.policies.kyverno.io + jsonPointers: + - /metadata/labels + - /metadata/annotations + - group: apiextensions.k8s.io + kind: CustomResourceDefinition + name: namespacedimagevalidatingpolicies.policies.kyverno.io + jsonPointers: + - /metadata/labels + - /metadata/annotations + - group: apiextensions.k8s.io + kind: CustomResourceDefinition + name: namespacedmutatingpolicies.policies.kyverno.io + jsonPointers: + - /metadata/labels + - /metadata/annotations + - group: apiextensions.k8s.io + kind: CustomResourceDefinition + name: namespacedvalidatingpolicies.policies.kyverno.io + jsonPointers: + - /metadata/labels + - /metadata/annotations + - group: apiextensions.k8s.io + kind: CustomResourceDefinition + name: policyexceptions.policies.kyverno.io + jsonPointers: + - /metadata/labels + - /metadata/annotations + - group: apiextensions.k8s.io + kind: CustomResourceDefinition + name: validatingpolicies.policies.kyverno.io + jsonPointers: + - /metadata/labels + - /metadata/annotations extraValueFiles: - '/overrides/values-kyverno.yaml' overrides: @@ -219,6 +403,8 @@ clusterGroup: namespace: openshift-sandboxed-containers-operator project: sandbox path: charts/all/coco-kyverno-policies + annotations: + argocd.argoproj.io/sync-wave: "20" imperative: # NOTE: We *must* use lists and not hashes. As hashes lose ordering once parsed by helm @@ -227,16 +413,19 @@ clusterGroup: # imagePullPolicy is set to always: imperative.imagePullPolicy # For additional overrides that apply to the jobs, please refer to # https://validatedpatterns.io/imperative-actions/#additional-job-customizations - image: ghcr.io/butler54/imperative-container:latest - serviceAccountCreate: true - adminServiceAccountCreate: true - serviceAccountName: imperative-admin-sa + clusterRoleYaml: + - apiGroups: + - '*' + resources: + - '*' + verbs: + - '*' jobs: - - name: install-deps - playbook: ansible/install-deps.yaml - verbosity: -vvv - timeout: 3600 - name: init-data-gzipper playbook: ansible/init-data-gzipper.yaml verbosity: -vvv timeout: 3600 + - name: reconcile-kataconfig-gpu + playbook: ansible/reconcile-kataconfig-gpu.yaml + verbosity: -vvv + timeout: 600 diff --git a/values-global.yaml b/values-global.yaml index aa8513d4..ac561aea 100644 --- a/values-global.yaml +++ b/values-global.yaml @@ -1,6 +1,7 @@ global: pattern: coco-pattern storageProvider: hpp # Options: hpp, lvm, external + clusterVersion: "4.21" # OCP minor version for operator pins. Options: 4.19, 4.20, 4.21 secretStore: # Warning: This must be present even if it is set to none. backend: vault # none, vault, kubernetes @@ -12,7 +13,7 @@ global: # This defines whether or not to use upstream resources for CoCo. # Defines whether or not the hub cluster can be used for confidential containers coco: - securityPolicyFlavour: "insecure" # insecure, signed or reject is expected. + securityPolicyFlavour: "insecure" # insecure, redhat-secure-gpg, redhat-secure-sigstore, signed or reject secured: true # true or false. If true, the cluster will be secured. If false, the cluster will be insecure. bypassAttestation: false # Enable SSH key injection into podvm for debugging. Do not enable in production. @@ -22,17 +23,25 @@ global: azure: defaultVMFlavour: "Standard_DC2as_v5" VMFlavours: "Standard_DC2as_v5,Standard_DC4as_v5,Standard_DC8as_v5,Standard_DC16as_v5" + hardware: + profile: intel-tdx # Options: intel-tdx, amd-snp, intel-tdx-gpu, amd-snp-gpu main: - # WARNING - # This default configuration uses a single cluster on azure. - # It fundamentally violates the separation of duties. - clusterGroupName: simple + # Set clusterGroupName to match your deployment topology: + # azure — Azure single-cluster (all components in one cluster) + # azure-spoke — Azure spoke (multi-cluster, imported into ACM from trusted-hub) + # baremetal — Bare metal (Intel TDX / AMD SEV-SNP, hardware profile gated) + # trusted-hub — Hub for multi-cluster (Trustee + ACM, manages spoke clusters) + clusterGroupName: baremetal + singleArgoCD: true multiSourceConfig: enabled: true clusterGroupChartVersion: 0.9.* -# Common secret store configuration used across multiple charts -# Warning do not rely on this. it does not consistently apply. +# Common secret store configuration used across multiple charts. +# NOTE: Charts reference secretStore.* (top-level, not global.secretStore.*). +# Top-level values do not propagate from values-global.yaml — each application +# that needs secretStore must declare it via inline overrides in the topology file. +# This is intentional — it allows different charts to use different secret stores. secretStore: name: vault-backend kind: ClusterSecretStore diff --git a/values-secret.yaml.template b/values-secret.yaml.template index 75723580..1afb92d0 100644 --- a/values-secret.yaml.template +++ b/values-secret.yaml.template @@ -111,8 +111,73 @@ secrets: } } } - - # Cosign public keys for image signature verification + # Red Hat secure policy (sigstore-based - RECOMMENDED) + # Uses modern sigstore signature verification for Red Hat images. + # Signatures are stored as OCI artifacts in the registry (no HTTP lookaside needed). + # The sigstore public key is embedded directly in the policy (base64-encoded). + # VERIFIED WORKING: Tested with podman on RHEL 10.2 (2026-07-03). + - name: redhat-secure-sigstore + value: | + { + "default": [{"type": "insecureAcceptAnything"}], + "transports": { + "docker": { + "registry.redhat.io": [ + { + "type": "sigstoreSigned", + "keyData": "YXJ0aWZhY3RzIHRoYXQgYXJlIHNpZ3N0b3JlLWVuYWJsZWQuCi0tLS0tQkVHSU4gUFVCTElDIEtFWS0tLS0tCk1JSUNJakFOQmdrcWhraUc5dzBCQVFFRkFBT0NBZzhBTUlJQ0NnS0NBZ0VBMEFTeXVIMlRMV3ZCVXFQSFo0SXAKNzVnN0VuY0JrZ1FIZEpuanp4QVc1S1FUTWgvc2lCb0IvQm9TcnRpUE13bkNoYlRDblFPSVFlWnVEaUZuaHVKNwpNL0QzYjdKb1gwbTEyM05jQ1NuNjdtQWRqQmE2Qmc2a3VrWmdDUDRaVVplRVNhaldYL0VqeWxGY1JGT1hXNTdwClJEQ0VONDJKL2pZbFZxdCtnOStHcmtlcjhTejg2SDNsMHRicU9kamJ6L1Z4SFlod0YwY3RVTUhzeVZSRHEyUVAKdHF6TlhsbWxNaFMvUG9GcjZSNHUvN0hDbi9LK0xlZ2NPMmZBRk9iNDBLdktTS0tWRDZsZXdVWkVyaG9wMUNnSgpYakR0R21tTzlkR01GNzFtZjZIRWZhS1NkeStFRTZpU0YyQTJWdjlRaEJhd01pcTJrT3pFaUxnNG5BZEpUOHdnClpyTUFtUENxR0lzWE5HWjQvUStZVHd3bGNlM2dscWI1TDl0Zk5vekVkU1I5Tjg1REVTZlFMUUVkWTNDYWx3S00KQlQxT0VoRVgxd0hSQ1U0ZHJNT2VqNkJOVzBWdHNjR3RIbUNyczc0alBlemh3TlQ4eXBreVMrVDB6VDRUc3k2ZgpWWGtKOFlTSHllblN6TUIyT3AyYnZzRTNnclkrczc0V2hHOVVJQTZEQnhjVGllMTVOU3pLd2Z6YW9OV09EY0xGCnA3Qlk4YWFIRTJNcUZ4WUZYK0lianBrUVJmYWVRUXNvdURGZENrWEVGVmZQcGJEMmRrNkZsZWFNVFB1eXh0SVQKZ2pWRXRHUUsycUdDRkdpUUhGZDRoZlYrZUNBNjNKcm8xejB6b0JNNUJiSUlRMytlVkZ3dDNBbFpwNVVWd3I2ZApzZWNxa2kveXJtdjNZMGRxWjlWT24zVUNBd0VBQVE9PQotLS0tLUVORCBQVUJMSUMgS0VZLS0tLS0K", + "signedIdentity": {"type": "matchRepository"} + } + ], + "registry.access.redhat.com": [ + { + "type": "sigstoreSigned", + "keyData": "YXJ0aWZhY3RzIHRoYXQgYXJlIHNpZ3N0b3JlLWVuYWJsZWQuCi0tLS0tQkVHSU4gUFVCTElDIEtFWS0tLS0tCk1JSUNJakFOQmdrcWhraUc5dzBCQVFFRkFBT0NBZzhBTUlJQ0NnS0NBZ0VBMEFTeXVIMlRMV3ZCVXFQSFo0SXAKNzVnN0VuY0JrZ1FIZEpuanp4QVc1S1FUTWgvc2lCb0IvQm9TcnRpUE13bkNoYlRDblFPSVFlWnVEaUZuaHVKNwpNL0QzYjdKb1gwbTEyM05jQ1NuNjdtQWRqQmE2Qmc2a3VrWmdDUDRaVVplRVNhaldYL0VqeWxGY1JGT1hXNTdwClJEQ0VONDJKL2pZbFZxdCtnOStHcmtlcjhTejg2SDNsMHRicU9kamJ6L1Z4SFlod0YwY3RVTUhzeVZSRHEyUVAKdHF6TlhsbWxNaFMvUG9GcjZSNHUvN0hDbi9LK0xlZ2NPMmZBRk9iNDBLdktTS0tWRDZsZXdVWkVyaG9wMUNnSgpYakR0R21tTzlkR01GNzFtZjZIRWZhS1NkeStFRTZpU0YyQTJWdjlRaEJhd01pcTJrT3pFaUxnNG5BZEpUOHdnClpyTUFtUENxR0lzWE5HWjQvUStZVHd3bGNlM2dscWI1TDl0Zk5vekVkU1I5Tjg1REVTZlFMUUVkWTNDYWx3S00KQlQxT0VoRVgxd0hSQ1U0ZHJNT2VqNkJOVzBWdHNjR3RIbUNyczc0alBlemh3TlQ4eXBreVMrVDB6VDRUc3k2ZgpWWGtKOFlTSHllblN6TUIyT3AyYnZzRTNnclkrczc0V2hHOVVJQTZEQnhjVGllMTVOU3pLd2Z6YW9OV09EY0xGCnA3Qlk4YWFIRTJNcUZ4WUZYK0lianBrUVJmYWVRUXNvdURGZENrWEVGVmZQcGJEMmRrNkZsZWFNVFB1eXh0SVQKZ2pWRXRHUUsycUdDRkdpUUhGZDRoZlYrZUNBNjNKcm8xejB6b0JNNUJiSUlRMytlVkZ3dDNBbFpwNVVWd3I2ZApzZWNxa2kveXJtdjNZMGRxWjlWT24zVUNBd0VBQVE9PQotLS0tLUVORCBQVUJMSUMgS0VZLS0tLS0K", + "signedIdentity": {"type": "matchRepository"} + } + ] + } + } + } + # Red Hat secure policy (GPG-based - LEGACY, BLOCKED) + # Uses GPG signature verification for Red Hat images via KBS. + # BLOCKED: Requires HTTP lookaside support in image-rs (issue #9). + # Signatures are at https://registry.redhat.io/containers/sigstore but image-rs + # cannot fetch from HTTPS yet. Kept as reference for when HTTP support lands. + - name: redhat-secure-gpg + value: | + { + "default": [{"type": "insecureAcceptAnything"}], + "transports": { + "docker": { + "registry.redhat.io": [ + { + "type": "signedBy", + "keyType": "GPGKeys", + "keyPath": "kbs:///default/redhat-gpg-key/redhat-release" + } + ], + "registry.access.redhat.com": [ + { + "type": "signedBy", + "keyType": "GPGKeys", + "keyPath": "kbs:///default/redhat-gpg-key/redhat-release" + } + ] + } + } + } + + # Sigstore public keys for Red Hat container signature verification + # Red Hat release key 3 (used for sigstore signatures as of Oct 2024) + - name: sigstore-keys + vaultPrefixes: + - hub + fields: + - name: redhat-release3 + path: ~/.coco-pattern/SIGSTORE-redhat-release3 + + # Cosign public keys for custom image signature verification # Required when using the "signed" policy above. # Add your cosign public key files here. # Generate a cosign key pair: cosign generate-key-pair diff --git a/values-trusted-hub.yaml b/values-trusted-hub.yaml index 719e2ca0..72fa85bb 100644 --- a/values-trusted-hub.yaml +++ b/values-trusted-hub.yaml @@ -1,4 +1,7 @@ -# This is currently configured as an 'all in one' deployment in one cluster. +# Multi-cluster hub topology for confidential containers. +# Deploys Trustee (KBS + attestation), HashiCorp Vault, ACM, and cert-manager. +# Manages spoke clusters imported via ACM with the azure-spoke clusterGroup. +# Set main.clusterGroupName: trusted-hub in values-global.yaml to use. clusterGroup: name: trusted-hub @@ -6,16 +9,23 @@ clusterGroup: namespaces: - open-cluster-management - vault - - golang-external-secrets + - external-secrets-operator: + operatorGroup: true + targetNamespaces: [] + - external-secrets - trustee-operator-system - cert-manager-operator - openshift-sandboxed-containers-operator - cert-manager subscriptions: - # ACM is kept anticipating + # ACM is kept anticipating acm: name: advanced-cluster-management namespace: open-cluster-management + eso: + name: openshift-external-secrets-operator + namespace: external-secrets-operator + channel: stable-v1 trustee: name: trustee-operator namespace: trustee-operator-system @@ -32,7 +42,7 @@ clusterGroup: - hub - vault - trustee - - golang-external-secrets + - external-secrets - sandbox - workloads - default @@ -40,6 +50,7 @@ clusterGroup: # We can use self-referential variables because the chart calls the tpl function with these variables defined sharedValueFiles: - '/overrides/values-{{ $.Values.global.clusterPlatform }}.yaml' + - '/overrides/values-{{ $.Values.global.clusterVersion }}.yaml' applications: acm: name: acm @@ -56,25 +67,32 @@ clusterGroup: chartVersion: 0.1.* secrets-operator: - name: golang-external-secrets - namespace: golang-external-secrets - project: golang-external-secrets - chart: golang-external-secrets - chartVersion: 0.1.* + name: openshift-external-secrets + namespace: external-secrets + project: external-secrets + chart: openshift-external-secrets + chartVersion: 0.0.* trustee: name: trustee namespace: trustee-operator-system #upstream config project: trustee - chart: trustee - chartVersion: 0.7.* + # DEV: git-based chart reference for testing. Revert to chart/chartVersion before upstream PR. + repoURL: https://github.com/butler54/trustee-chart.git + targetRevision: dev/phase1-testing + path: . extraValueFiles: - '/overrides/values-trustee.yaml' + overrides: + - name: global.coco.secured + value: "true" sandbox-policies: name: sandbox-policies namespace: openshift-sandboxed-containers-operator #upstream config - chart: sandboxed-policies - chartVersion: 0.2.* + # DEV: git-based chart reference for testing. Revert to chart/chartVersion before upstream PR. + repoURL: https://github.com/butler54/sandboxed-policies-chart.git + targetRevision: dev/phase1-testing + path: . overrides: - name: global.coco.azure.tags value: "key1=value1,key2=value2" @@ -84,10 +102,9 @@ clusterGroup: imperative: - image: ghcr.io/butler54/imperative-container:latest jobs: - - name: install-deps - playbook: ansible/install-deps.yaml + - name: get-azure-deps + playbook: ansible/get-azure-deps.yaml verbosity: -vvv timeout: 3600 - name: configure-azure-dns @@ -99,11 +116,11 @@ clusterGroup: verbosity: -vvv timeout: 3600 managedClusterGroups: - spoke: - name: spoke + azure-spoke: + name: azure-spoke acmlabels: - name: clusterGroup - value: spoke + value: azure-spoke helmOverrides: - name: clusterGroup.isHubCluster value: false