Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions cmd/vclusterctl/cmd/restore.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ func NewRestore(globalFlags *flags.GlobalFlags) *cobra.Command {
Restore a virtual cluster.

Example:
# Restore from a local file on this machine
vcluster restore my-vcluster file:///home/user/my-snapshot.tar.gz
# Restore from oci image
vcluster restore my-vcluster oci://ghcr.io/my-user/my-repo:my-tag
# Restore from s3 bucket
Expand Down
2 changes: 2 additions & 0 deletions cmd/vclusterctl/cmd/snapshot/create.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ request, which will be processed asynchronously by a vCluster
controller.

Example:
# Snapshot to a local file on this machine
vcluster snapshot create my-vcluster file:///home/user/my-snapshot.tar.gz
# Snapshot to oci image
vcluster snapshot create my-vcluster oci://ghcr.io/my-user/my-repo:my-tag
# Snapshot to s3 bucket
Expand Down
65 changes: 65 additions & 0 deletions e2e-next/test_storage/snapshot/test_snapshot.go
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,7 @@ func SnapshotAllSpec() {
describeSnapshotRestore(&s)
describeSnapshotCanceling(&s)
describeSnapshotDeletion(&s)
describeFileProtocol(&s)
},
)
}
Expand Down Expand Up @@ -695,6 +696,70 @@ func describeSnapshotDeletion(s *snapshotCtx) {
)
}

func describeFileProtocol(s *snapshotCtx) {
// Ordered: create snapshot via file:// -> verify file exists locally -> restore -> verify resources.
// Each spec depends on the snapshot created in spec 1.
Describe("file protocol snapshot and restore", Ordered, func() {
var (
testNS string
snapshotPath string
configMapToRestore *corev1.ConfigMap
)

BeforeAll(func(ctx context.Context) {
testNS = "file-snapshot-" + random.String(6)
snapshotPath = "file:///tmp/vcluster-file-snapshot-" + testNS + ".tar.gz"
cleanupAllSnapshotArtifacts(ctx, s.hostClient, s.vClusterNS)
cmr, _, _, _, _, _ := s.deployTestResources(ctx, testNS)
configMapToRestore = cmr
})

It("Creates the snapshot and writes to local filesystem", func(ctx context.Context) {
By("Creating the snapshot via file protocol", func() {
createSnapshot(s.vClusterName, s.vClusterNS, s.kubeconfig, true, snapshotPath, false)
// The CLI blocks until the snapshot completes and the file is downloaded locally.
waitForSnapshotToBeCreated(ctx, s.hostClient, s.vClusterNS)
})

By("Verifying the snapshot file exists on local filesystem", func() {
localPath := strings.TrimPrefix(snapshotPath, "file://")
_, err := os.Stat(localPath)
Expect(err).To(Succeed(), "snapshot file should exist at %s", localPath)
})
})

It("Restores from local file and verifies resources", func(ctx context.Context) {
By("Deleting pre-snapshot resources that should be recreated by restore", func() {
err := s.vClusterClient.CoreV1().ConfigMaps(testNS).Delete(ctx, configMapToRestore.Name, metav1.DeleteOptions{})
Expect(err).To(Succeed())
})

By("Restoring the tenant cluster from local snapshot file", func() {
restoreVCluster(ctx, s.hostClient, s.vClusterName, s.vClusterNS, snapshotPath, s.kubeconfig, true, false)
s.refreshClient(ctx)
})

By("Verifying pre-snapshot resources are restored", func() {
Eventually(func(g Gomega) {
cms, err := s.vClusterClient.CoreV1().ConfigMaps(testNS).List(ctx, metav1.ListOptions{
LabelSelector: "snapshot=restore",
})
g.Expect(err).To(Succeed())
g.Expect(cms.Items).To(HaveLen(1),
"expected configmap %s to be recreated by restore", configMapToRestore.Name)
g.Expect(cms.Items[0].Data).To(Equal(configMapToRestore.Data))
}).WithPolling(constants.PollingInterval).WithTimeout(constants.PollingTimeout).Should(Succeed())
})
})

AfterAll(func(ctx context.Context) {
localPath := strings.TrimPrefix(snapshotPath, "file://")
_ = os.Remove(localPath)
deleteSnapshotRequestConfigMaps(ctx, s.hostClient, s.vClusterNS)
})
})
}

// --- Volume helpers ---

func createAppWithPVC(ctx context.Context, client kubernetes.Interface, namespace, name string) {
Expand Down
18 changes: 14 additions & 4 deletions pkg/cli/restore_helm.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ func Restore(ctx context.Context, args []string, globalFlags *flags.GlobalFlags,
return restoreVCluster(ctx, kubeClient, restConfig, vCluster, snapshotOpts, podOpts, newVCluster, restoreVolumes, log)
}

func restoreVCluster(ctx context.Context, kubeClient *kubernetes.Clientset, restConfig *rest.Config, vCluster *find.VCluster, snapshotOpts *snapshot.Options, podOptions *pod.Options, newVCluster bool, restoreVolumes bool, log log.Logger) error {
func restoreVCluster(ctx context.Context, kubeClient *kubernetes.Clientset, restConfig *rest.Config, vCluster *find.VCluster, snapshotOpts *snapshot.Options, podOpts *pod.Options, newVCluster bool, restoreVolumes bool, log log.Logger) error {
cmdArgs := []string{"restore"}
if newVCluster {
cmdArgs = append(cmdArgs, "--new-vcluster")
Expand All @@ -48,10 +48,20 @@ func restoreVCluster(ctx context.Context, kubeClient *kubernetes.Clientset, rest
cmdArgs = append(cmdArgs, "--restore-volumes")
}

if snapshotOpts.Type == "file" {
return restoreFromLocalFile(ctx, vCluster, kubeClient, restConfig, snapshotOpts, podOpts, log, cmdArgs)
}

if vCluster.IsStandalone {
return restoreStandaloneVCluster(ctx, vCluster, snapshotOpts, cmdArgs, log)
return restoreStandaloneVCluster(snapshotOpts, cmdArgs, log)
}

return runRestorePod(ctx, kubeClient, restConfig, vCluster, snapshotOpts, podOpts, log, cmdArgs)
}

// runRestorePod runs the restore pod with the given options. It pauses the vCluster before starting the restore and resumes it afterwards.
// The restore pod will perform the restore and resume the vCluster when it's done.
func runRestorePod(ctx context.Context, kubeClient *kubernetes.Clientset, restConfig *rest.Config, vCluster *find.VCluster, snapshotOpts *snapshot.Options, podOpts *pod.Options, log log.Logger, cmdArgs []string) error {
// pause vCluster
log.Infof("Pausing vCluster %s", vCluster.Name)
err := pauseVCluster(ctx, kubeClient, vCluster, log)
Expand All @@ -70,15 +80,15 @@ func restoreVCluster(ctx context.Context, kubeClient *kubernetes.Clientset, rest

// set missing pod options and run snapshot restore pod
command := append([]string{"/vcluster"}, cmdArgs...)
return pod.RunSnapshotPod(ctx, restConfig, kubeClient, command, vCluster, podOptions, snapshotOpts, log)
return pod.RunSnapshotPod(ctx, restConfig, kubeClient, command, vCluster, podOpts, snapshotOpts, log)
}

// restoreStandaloneVCluster stops the standalone service, invokes the vcluster binary
// directly to perform the restore, and always attempts to start the service again
// before returning. If both the restore and restart fail, the returned error retains
// both failures. The CLI must run on the same host as the standalone installation
// because it needs filesystem access to the binary and config.
func restoreStandaloneVCluster(ctx context.Context, vCluster *find.VCluster, snapshotOpts *snapshot.Options, cmdArgs []string, log log.Logger) (retErr error) {
func restoreStandaloneVCluster(snapshotOpts *snapshot.Options, cmdArgs []string, log log.Logger) (retErr error) {
vClusterConfig, err := vclusterconfig.LoadStandaloneConfig("", nil)
if err != nil {
return fmt.Errorf("load standalone config: %w", err)
Expand Down
9 changes: 7 additions & 2 deletions pkg/cli/snapshot_helm.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ func CreateSnapshot(ctx context.Context, args []string, globalFlags *flags.Globa
}

// create the snapshot request which will be reconciled by the vCluster controller
err = createSnapshotRequest(ctx, vCluster, kubeClient, snapshotOpts, log)
err = createSnapshotRequest(ctx, vCluster, kubeClient, snapshotOpts, log, restConfig)
if err != nil {
return err
}
Expand Down Expand Up @@ -162,7 +162,7 @@ func initSnapshotCommand(
return vCluster, kubeClient, restClient, nil
}

func createSnapshotRequest(ctx context.Context, vCluster *find.VCluster, kubeClient *kubernetes.Clientset, snapshotOpts *snapshot.Options, log log.Logger) error {
func createSnapshotRequest(ctx context.Context, vCluster *find.VCluster, kubeClient *kubernetes.Clientset, snapshotOpts *snapshot.Options, log log.Logger, restConfig *rest.Config) error {
err := checkIfVClusterSupportsSnapshotRequests(vCluster, log)
if err != nil {
return fmt.Errorf("vCluster version check failed: %w", err)
Expand All @@ -171,6 +171,11 @@ func createSnapshotRequest(ctx context.Context, vCluster *find.VCluster, kubeCli
if err != nil {
return fmt.Errorf("failed to get vcluster config: %w", err)
}

if snapshotOpts.Type == "file" {
return snapshotToLocalFile(ctx, vCluster, kubeClient, restConfig, snapshotOpts, log, vClusterConfig)
}

// Create snapshot request resources
_, err = snapshot.CreateSnapshotRequestResources(ctx, vCluster.Namespace, vClusterConfig.Name, vClusterConfig, snapshotOpts, kubeClient)
if err != nil {
Expand Down
165 changes: 165 additions & 0 deletions pkg/cli/snapshot_local.go
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there any potential attack vector that you could think of for snapshot restores with vcluster CLI and path traversal?
Just thinking if we could improve file handling a bit with https://go.dev/blog/osroot - just in case it would be of interest in this code path.

Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
package cli

import (
"context"
"fmt"
"os"
"time"

"github.com/loft-sh/log"
vclusterconfig "github.com/loft-sh/vcluster/pkg/config"
"github.com/loft-sh/vcluster/pkg/snapshot"
"github.com/loft-sh/vcluster/pkg/snapshot/pod"
"github.com/loft-sh/vcluster/pkg/util/podhelper"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/rest"

"github.com/loft-sh/vcluster/pkg/cli/find"
)

// dataMountPath is where the vCluster data PVC is mounted in the syncer container.
const dataMountPath = "/data"

func snapshotToLocalFile(ctx context.Context, vCluster *find.VCluster,
kubeClient *kubernetes.Clientset, restConfig *rest.Config,
snapshotOpts *snapshot.Options, log log.Logger, vClusterConfig *vclusterconfig.VirtualClusterConfig) error {
tempPath := fmt.Sprintf("%s/vcluster-snapshot-%d.tar.gz", dataMountPath, time.Now().Unix())
localPath := snapshotOpts.File.Path
if !vCluster.IsStandalone {
// For non-standalone, we need to write the snapshot to the syncer PVC first, then download it via exec.
snapshotOpts.File.Path = tempPath
}

log.Infof("Creating snapshot request...")
snapshotRequest, err := snapshot.CreateSnapshotRequestResources(
ctx, vCluster.Namespace, vClusterConfig.Name, vClusterConfig, snapshotOpts, kubeClient)
if err != nil {
return fmt.Errorf("create snapshot request: %w", err)
}

log.Infof("Waiting for snapshot to complete...")
if err := waitForSnapshotRequest(ctx, kubeClient, vCluster.Namespace, snapshotRequest.Name); err != nil {
return err
}

if vCluster.IsStandalone {
// The file backend writes with 0600 already; chmod is a no-op but kept for safety.
_ = os.Chmod(localPath, 0600)
log.Infof("Snapshot saved to %s", localPath)
return nil
Comment thread
jjaferson marked this conversation as resolved.
}

targetPod, err := findVClusterPod(vCluster)
if err != nil {
return err
}

f, err := os.OpenFile(localPath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0600)
if err != nil {
return fmt.Errorf("create local file %s: %w", localPath, err)
}
defer f.Close()

log.Infof("Downloading snapshot from pod %s to %s...", targetPod.Name, localPath)
if err := podhelper.ExecStream(ctx, restConfig, &podhelper.ExecStreamOptions{
Pod: targetPod.Name,
Namespace: vCluster.Namespace,
Container: "syncer",
Command: []string{"cat", tempPath},
Stdout: f,
Stderr: os.Stderr,
}); err != nil {
_ = os.Remove(localPath)
return fmt.Errorf("download snapshot from pod: %w", err)
}

if _, _, err := podhelper.ExecBuffered(ctx, restConfig, vCluster.Namespace,
targetPod.Name, "syncer", []string{"rm", "-f", tempPath}, nil); err != nil {
log.Warnf("Failed to remove temp snapshot file %s from pod: %v", tempPath, err)
}

log.Infof("Snapshot saved to %s", localPath)
return nil
}

func restoreFromLocalFile(ctx context.Context, vCluster *find.VCluster,
kubeClient *kubernetes.Clientset, restConfig *rest.Config,
snapshotOpts *snapshot.Options, podOpts *pod.Options,
log log.Logger, cmdArgs []string) error {
tempPath := fmt.Sprintf("%s/vcluster-restore-%d.tar.gz", dataMountPath, time.Now().Unix())
localPath := snapshotOpts.File.Path
if _, err := os.Stat(localPath); os.IsNotExist(err) {
return fmt.Errorf("snapshot file not found: %s", localPath)
}

if vCluster.IsStandalone {
// For standalone, we can read directly from the local file instead of going through the syncer PVC.
return restoreStandaloneVCluster(snapshotOpts, cmdArgs, log)
}

// For non-standalone, we need to upload the local file into the syncer container first, then point the restore command at it.
snapshotOpts.File.Path = tempPath

// Stream the local file into the syncer PVC via exec before pausing.
// The PVC (and the staged file) persist through scale-to-zero.
Comment on lines +106 to +107
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Preserve local restore uploads across pause

When restoring a file:// snapshot for a non-standalone vCluster whose /data volume is not a PVC, this upload is lost before the restore pod starts: runRestorePod pauses/scales the vCluster after the file is written, and the chart can render the control plane as a Deployment without persistence (chart/templates/_persistence.tpl lines 4-5) where data is an emptyDir (chart/templates/statefulset.yaml lines 119-121). In that environment the subsequent restore pod gets a fresh /data, so file:///data/vcluster-restore-...tar.gz does not exist and local-file restore fails; stage the file somewhere that survives the pause or stream it into the restore pod instead.

Useful? React with 👍 / 👎.

targetPod, err := findVClusterPod(vCluster)
if err != nil {
return err
}

f, err := os.Open(localPath)
if err != nil {
return fmt.Errorf("open local snapshot %s: %w", localPath, err)
}
defer f.Close()

log.Infof("Uploading %s to pod %s at %s...", localPath, targetPod.Name, tempPath)
if err := podhelper.ExecStream(ctx, restConfig, &podhelper.ExecStreamOptions{
Pod: targetPod.Name,
Namespace: vCluster.Namespace,
Container: "syncer",
Command: []string{"/bin/sh", "-c", "cat > " + tempPath},
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

potential shell injection, could do

Suggested change
Command: []string{"/bin/sh", "-c", "cat > " + tempPath},
Command: []string{"dd", fmt.Sprintf("of=%s", tempPath)},

or []string{"sh", "-c", "cat > \"$1\"", "--", tempPath}

Stdin: f,
Stdout: os.Stdout,
Stderr: os.Stderr,
}); err != nil {
return fmt.Errorf("upload snapshot to pod: %w", err)
}

return runRestorePod(ctx, kubeClient, restConfig, vCluster, snapshotOpts, podOpts, log, cmdArgs)
}

func waitForSnapshotRequest(ctx context.Context, kubeClient *kubernetes.Clientset,
namespace, name string) error {
return wait.PollUntilContextTimeout(ctx, 5*time.Second, 30*time.Minute, true,
func(ctx context.Context) (bool, error) {
cm, err := kubeClient.CoreV1().ConfigMaps(namespace).Get(ctx, name, metav1.GetOptions{})
if err != nil {
return false, fmt.Errorf("get snapshot request ConfigMap: %w", err)
}
req, err := snapshot.UnmarshalSnapshotRequest(cm)
if err != nil {
return false, fmt.Errorf("unmarshal snapshot request: %w", err)
}
if req.Done() {
if req.Status.Phase == snapshot.RequestPhaseCompleted {
return true, nil
}
return false, fmt.Errorf("snapshot %s: %s", req.Status.Phase, req.Status.Error.Message)
}
return false, nil
})
}

func findVClusterPod(vCluster *find.VCluster) (*corev1.Pod, error) {
for i := range vCluster.Pods {
p := &vCluster.Pods[i]
Comment on lines +158 to +159
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For readability and index-less loops.

Suggested change
for i := range vCluster.Pods {
p := &vCluster.Pods[i]
for _, pod := range vCluster.Pods {
p := &pod

if (vCluster.StatefulSet != nil || vCluster.Deployment != nil) && len(p.Name) > 0 {
return p, nil
}
}
return nil, fmt.Errorf("no running pod found for vCluster %s", vCluster.Name)
}
Loading
Loading