diff --git a/cmd/vclusterctl/cmd/restore.go b/cmd/vclusterctl/cmd/restore.go index 1d123f4074..3ffa84d3eb 100644 --- a/cmd/vclusterctl/cmd/restore.go +++ b/cmd/vclusterctl/cmd/restore.go @@ -46,6 +46,8 @@ func NewRestore(globalFlags *flags.GlobalFlags) *cobra.Command { Restore a virtual cluster. Example: +# Restore from a local file on this machine +vcluster restore my-vcluster file:///home/user/my-snapshot.tar.gz # Restore from oci image vcluster restore my-vcluster oci://ghcr.io/my-user/my-repo:my-tag # Restore from s3 bucket diff --git a/cmd/vclusterctl/cmd/snapshot/create.go b/cmd/vclusterctl/cmd/snapshot/create.go index 195f0a85ca..709f2f8ad8 100644 --- a/cmd/vclusterctl/cmd/snapshot/create.go +++ b/cmd/vclusterctl/cmd/snapshot/create.go @@ -41,6 +41,8 @@ request, which will be processed asynchronously by a vCluster controller. Example: +# Snapshot to a local file on this machine +vcluster snapshot create my-vcluster file:///home/user/my-snapshot.tar.gz # Snapshot to oci image vcluster snapshot create my-vcluster oci://ghcr.io/my-user/my-repo:my-tag # Snapshot to s3 bucket diff --git a/e2e-next/test_storage/snapshot/test_snapshot.go b/e2e-next/test_storage/snapshot/test_snapshot.go index a2f86b3432..0b9c81d482 100644 --- a/e2e-next/test_storage/snapshot/test_snapshot.go +++ b/e2e-next/test_storage/snapshot/test_snapshot.go @@ -200,6 +200,7 @@ func SnapshotAllSpec() { describeSnapshotRestore(&s) describeSnapshotCanceling(&s) describeSnapshotDeletion(&s) + describeFileProtocol(&s) }, ) } @@ -695,6 +696,70 @@ func describeSnapshotDeletion(s *snapshotCtx) { ) } +func describeFileProtocol(s *snapshotCtx) { + // Ordered: create snapshot via file:// -> verify file exists locally -> restore -> verify resources. + // Each spec depends on the snapshot created in spec 1. + Describe("file protocol snapshot and restore", Ordered, func() { + var ( + testNS string + snapshotPath string + configMapToRestore *corev1.ConfigMap + ) + + BeforeAll(func(ctx context.Context) { + testNS = "file-snapshot-" + random.String(6) + snapshotPath = "file:///tmp/vcluster-file-snapshot-" + testNS + ".tar.gz" + cleanupAllSnapshotArtifacts(ctx, s.hostClient, s.vClusterNS) + cmr, _, _, _, _, _ := s.deployTestResources(ctx, testNS) + configMapToRestore = cmr + }) + + It("Creates the snapshot and writes to local filesystem", func(ctx context.Context) { + By("Creating the snapshot via file protocol", func() { + createSnapshot(s.vClusterName, s.vClusterNS, s.kubeconfig, true, snapshotPath, false) + // The CLI blocks until the snapshot completes and the file is downloaded locally. + waitForSnapshotToBeCreated(ctx, s.hostClient, s.vClusterNS) + }) + + By("Verifying the snapshot file exists on local filesystem", func() { + localPath := strings.TrimPrefix(snapshotPath, "file://") + _, err := os.Stat(localPath) + Expect(err).To(Succeed(), "snapshot file should exist at %s", localPath) + }) + }) + + It("Restores from local file and verifies resources", func(ctx context.Context) { + By("Deleting pre-snapshot resources that should be recreated by restore", func() { + err := s.vClusterClient.CoreV1().ConfigMaps(testNS).Delete(ctx, configMapToRestore.Name, metav1.DeleteOptions{}) + Expect(err).To(Succeed()) + }) + + By("Restoring the tenant cluster from local snapshot file", func() { + restoreVCluster(ctx, s.hostClient, s.vClusterName, s.vClusterNS, snapshotPath, s.kubeconfig, true, false) + s.refreshClient(ctx) + }) + + By("Verifying pre-snapshot resources are restored", func() { + Eventually(func(g Gomega) { + cms, err := s.vClusterClient.CoreV1().ConfigMaps(testNS).List(ctx, metav1.ListOptions{ + LabelSelector: "snapshot=restore", + }) + g.Expect(err).To(Succeed()) + g.Expect(cms.Items).To(HaveLen(1), + "expected configmap %s to be recreated by restore", configMapToRestore.Name) + g.Expect(cms.Items[0].Data).To(Equal(configMapToRestore.Data)) + }).WithPolling(constants.PollingInterval).WithTimeout(constants.PollingTimeout).Should(Succeed()) + }) + }) + + AfterAll(func(ctx context.Context) { + localPath := strings.TrimPrefix(snapshotPath, "file://") + _ = os.Remove(localPath) + deleteSnapshotRequestConfigMaps(ctx, s.hostClient, s.vClusterNS) + }) + }) +} + // --- Volume helpers --- func createAppWithPVC(ctx context.Context, client kubernetes.Interface, namespace, name string) { diff --git a/pkg/cli/restore_helm.go b/pkg/cli/restore_helm.go index 149347852a..2c30956ed5 100644 --- a/pkg/cli/restore_helm.go +++ b/pkg/cli/restore_helm.go @@ -39,7 +39,7 @@ func Restore(ctx context.Context, args []string, globalFlags *flags.GlobalFlags, return restoreVCluster(ctx, kubeClient, restConfig, vCluster, snapshotOpts, podOpts, newVCluster, restoreVolumes, log) } -func restoreVCluster(ctx context.Context, kubeClient *kubernetes.Clientset, restConfig *rest.Config, vCluster *find.VCluster, snapshotOpts *snapshot.Options, podOptions *pod.Options, newVCluster bool, restoreVolumes bool, log log.Logger) error { +func restoreVCluster(ctx context.Context, kubeClient *kubernetes.Clientset, restConfig *rest.Config, vCluster *find.VCluster, snapshotOpts *snapshot.Options, podOpts *pod.Options, newVCluster bool, restoreVolumes bool, log log.Logger) error { cmdArgs := []string{"restore"} if newVCluster { cmdArgs = append(cmdArgs, "--new-vcluster") @@ -48,10 +48,20 @@ func restoreVCluster(ctx context.Context, kubeClient *kubernetes.Clientset, rest cmdArgs = append(cmdArgs, "--restore-volumes") } + if snapshotOpts.Type == "file" { + return restoreFromLocalFile(ctx, vCluster, kubeClient, restConfig, snapshotOpts, podOpts, log, cmdArgs) + } + if vCluster.IsStandalone { - return restoreStandaloneVCluster(ctx, vCluster, snapshotOpts, cmdArgs, log) + return restoreStandaloneVCluster(snapshotOpts, cmdArgs, log) } + return runRestorePod(ctx, kubeClient, restConfig, vCluster, snapshotOpts, podOpts, log, cmdArgs) +} + +// runRestorePod runs the restore pod with the given options. It pauses the vCluster before starting the restore and resumes it afterwards. +// The restore pod will perform the restore and resume the vCluster when it's done. +func runRestorePod(ctx context.Context, kubeClient *kubernetes.Clientset, restConfig *rest.Config, vCluster *find.VCluster, snapshotOpts *snapshot.Options, podOpts *pod.Options, log log.Logger, cmdArgs []string) error { // pause vCluster log.Infof("Pausing vCluster %s", vCluster.Name) err := pauseVCluster(ctx, kubeClient, vCluster, log) @@ -70,7 +80,7 @@ func restoreVCluster(ctx context.Context, kubeClient *kubernetes.Clientset, rest // set missing pod options and run snapshot restore pod command := append([]string{"/vcluster"}, cmdArgs...) - return pod.RunSnapshotPod(ctx, restConfig, kubeClient, command, vCluster, podOptions, snapshotOpts, log) + return pod.RunSnapshotPod(ctx, restConfig, kubeClient, command, vCluster, podOpts, snapshotOpts, log) } // restoreStandaloneVCluster stops the standalone service, invokes the vcluster binary @@ -78,7 +88,7 @@ func restoreVCluster(ctx context.Context, kubeClient *kubernetes.Clientset, rest // before returning. If both the restore and restart fail, the returned error retains // both failures. The CLI must run on the same host as the standalone installation // because it needs filesystem access to the binary and config. -func restoreStandaloneVCluster(ctx context.Context, vCluster *find.VCluster, snapshotOpts *snapshot.Options, cmdArgs []string, log log.Logger) (retErr error) { +func restoreStandaloneVCluster(snapshotOpts *snapshot.Options, cmdArgs []string, log log.Logger) (retErr error) { vClusterConfig, err := vclusterconfig.LoadStandaloneConfig("", nil) if err != nil { return fmt.Errorf("load standalone config: %w", err) diff --git a/pkg/cli/snapshot_helm.go b/pkg/cli/snapshot_helm.go index 8cd755a6b4..2bcbea3392 100644 --- a/pkg/cli/snapshot_helm.go +++ b/pkg/cli/snapshot_helm.go @@ -61,7 +61,7 @@ func CreateSnapshot(ctx context.Context, args []string, globalFlags *flags.Globa } // create the snapshot request which will be reconciled by the vCluster controller - err = createSnapshotRequest(ctx, vCluster, kubeClient, snapshotOpts, log) + err = createSnapshotRequest(ctx, vCluster, kubeClient, snapshotOpts, log, restConfig) if err != nil { return err } @@ -162,7 +162,7 @@ func initSnapshotCommand( return vCluster, kubeClient, restClient, nil } -func createSnapshotRequest(ctx context.Context, vCluster *find.VCluster, kubeClient *kubernetes.Clientset, snapshotOpts *snapshot.Options, log log.Logger) error { +func createSnapshotRequest(ctx context.Context, vCluster *find.VCluster, kubeClient *kubernetes.Clientset, snapshotOpts *snapshot.Options, log log.Logger, restConfig *rest.Config) error { err := checkIfVClusterSupportsSnapshotRequests(vCluster, log) if err != nil { return fmt.Errorf("vCluster version check failed: %w", err) @@ -171,6 +171,11 @@ func createSnapshotRequest(ctx context.Context, vCluster *find.VCluster, kubeCli if err != nil { return fmt.Errorf("failed to get vcluster config: %w", err) } + + if snapshotOpts.Type == "file" { + return snapshotToLocalFile(ctx, vCluster, kubeClient, restConfig, snapshotOpts, log, vClusterConfig) + } + // Create snapshot request resources _, err = snapshot.CreateSnapshotRequestResources(ctx, vCluster.Namespace, vClusterConfig.Name, vClusterConfig, snapshotOpts, kubeClient) if err != nil { diff --git a/pkg/cli/snapshot_local.go b/pkg/cli/snapshot_local.go new file mode 100644 index 0000000000..bf02f563ee --- /dev/null +++ b/pkg/cli/snapshot_local.go @@ -0,0 +1,165 @@ +package cli + +import ( + "context" + "fmt" + "os" + "time" + + "github.com/loft-sh/log" + vclusterconfig "github.com/loft-sh/vcluster/pkg/config" + "github.com/loft-sh/vcluster/pkg/snapshot" + "github.com/loft-sh/vcluster/pkg/snapshot/pod" + "github.com/loft-sh/vcluster/pkg/util/podhelper" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/rest" + + "github.com/loft-sh/vcluster/pkg/cli/find" +) + +// dataMountPath is where the vCluster data PVC is mounted in the syncer container. +const dataMountPath = "/data" + +func snapshotToLocalFile(ctx context.Context, vCluster *find.VCluster, + kubeClient *kubernetes.Clientset, restConfig *rest.Config, + snapshotOpts *snapshot.Options, log log.Logger, vClusterConfig *vclusterconfig.VirtualClusterConfig) error { + tempPath := fmt.Sprintf("%s/vcluster-snapshot-%d.tar.gz", dataMountPath, time.Now().Unix()) + localPath := snapshotOpts.File.Path + if !vCluster.IsStandalone { + // For non-standalone, we need to write the snapshot to the syncer PVC first, then download it via exec. + snapshotOpts.File.Path = tempPath + } + + log.Infof("Creating snapshot request...") + snapshotRequest, err := snapshot.CreateSnapshotRequestResources( + ctx, vCluster.Namespace, vClusterConfig.Name, vClusterConfig, snapshotOpts, kubeClient) + if err != nil { + return fmt.Errorf("create snapshot request: %w", err) + } + + log.Infof("Waiting for snapshot to complete...") + if err := waitForSnapshotRequest(ctx, kubeClient, vCluster.Namespace, snapshotRequest.Name); err != nil { + return err + } + + if vCluster.IsStandalone { + // The file backend writes with 0600 already; chmod is a no-op but kept for safety. + _ = os.Chmod(localPath, 0600) + log.Infof("Snapshot saved to %s", localPath) + return nil + } + + targetPod, err := findVClusterPod(vCluster) + if err != nil { + return err + } + + f, err := os.OpenFile(localPath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0600) + if err != nil { + return fmt.Errorf("create local file %s: %w", localPath, err) + } + defer f.Close() + + log.Infof("Downloading snapshot from pod %s to %s...", targetPod.Name, localPath) + if err := podhelper.ExecStream(ctx, restConfig, &podhelper.ExecStreamOptions{ + Pod: targetPod.Name, + Namespace: vCluster.Namespace, + Container: "syncer", + Command: []string{"cat", tempPath}, + Stdout: f, + Stderr: os.Stderr, + }); err != nil { + _ = os.Remove(localPath) + return fmt.Errorf("download snapshot from pod: %w", err) + } + + if _, _, err := podhelper.ExecBuffered(ctx, restConfig, vCluster.Namespace, + targetPod.Name, "syncer", []string{"rm", "-f", tempPath}, nil); err != nil { + log.Warnf("Failed to remove temp snapshot file %s from pod: %v", tempPath, err) + } + + log.Infof("Snapshot saved to %s", localPath) + return nil +} + +func restoreFromLocalFile(ctx context.Context, vCluster *find.VCluster, + kubeClient *kubernetes.Clientset, restConfig *rest.Config, + snapshotOpts *snapshot.Options, podOpts *pod.Options, + log log.Logger, cmdArgs []string) error { + tempPath := fmt.Sprintf("%s/vcluster-restore-%d.tar.gz", dataMountPath, time.Now().Unix()) + localPath := snapshotOpts.File.Path + if _, err := os.Stat(localPath); os.IsNotExist(err) { + return fmt.Errorf("snapshot file not found: %s", localPath) + } + + if vCluster.IsStandalone { + // For standalone, we can read directly from the local file instead of going through the syncer PVC. + return restoreStandaloneVCluster(snapshotOpts, cmdArgs, log) + } + + // For non-standalone, we need to upload the local file into the syncer container first, then point the restore command at it. + snapshotOpts.File.Path = tempPath + + // Stream the local file into the syncer PVC via exec before pausing. + // The PVC (and the staged file) persist through scale-to-zero. + targetPod, err := findVClusterPod(vCluster) + if err != nil { + return err + } + + f, err := os.Open(localPath) + if err != nil { + return fmt.Errorf("open local snapshot %s: %w", localPath, err) + } + defer f.Close() + + log.Infof("Uploading %s to pod %s at %s...", localPath, targetPod.Name, tempPath) + if err := podhelper.ExecStream(ctx, restConfig, &podhelper.ExecStreamOptions{ + Pod: targetPod.Name, + Namespace: vCluster.Namespace, + Container: "syncer", + Command: []string{"/bin/sh", "-c", "cat > " + tempPath}, + Stdin: f, + Stdout: os.Stdout, + Stderr: os.Stderr, + }); err != nil { + return fmt.Errorf("upload snapshot to pod: %w", err) + } + + return runRestorePod(ctx, kubeClient, restConfig, vCluster, snapshotOpts, podOpts, log, cmdArgs) +} + +func waitForSnapshotRequest(ctx context.Context, kubeClient *kubernetes.Clientset, + namespace, name string) error { + return wait.PollUntilContextTimeout(ctx, 5*time.Second, 30*time.Minute, true, + func(ctx context.Context) (bool, error) { + cm, err := kubeClient.CoreV1().ConfigMaps(namespace).Get(ctx, name, metav1.GetOptions{}) + if err != nil { + return false, fmt.Errorf("get snapshot request ConfigMap: %w", err) + } + req, err := snapshot.UnmarshalSnapshotRequest(cm) + if err != nil { + return false, fmt.Errorf("unmarshal snapshot request: %w", err) + } + if req.Done() { + if req.Status.Phase == snapshot.RequestPhaseCompleted { + return true, nil + } + return false, fmt.Errorf("snapshot %s: %s", req.Status.Phase, req.Status.Error.Message) + } + return false, nil + }) +} + +func findVClusterPod(vCluster *find.VCluster) (*corev1.Pod, error) { + for i := range vCluster.Pods { + p := &vCluster.Pods[i] + if (vCluster.StatefulSet != nil || vCluster.Deployment != nil) && len(p.Name) > 0 { + return p, nil + } + } + return nil, fmt.Errorf("no running pod found for vCluster %s", vCluster.Name) +} diff --git a/pkg/snapshot/file/store.go b/pkg/snapshot/file/store.go new file mode 100644 index 0000000000..43dccc5630 --- /dev/null +++ b/pkg/snapshot/file/store.go @@ -0,0 +1,96 @@ +package file + +import ( + "context" + "errors" + "fmt" + "io" + "os" + "path/filepath" + "strings" + + "github.com/loft-sh/vcluster/pkg/snapshot/types" +) + +type Options struct { + Path string `json:"path,omitempty"` +} + +func NewStore(options *Options) *Store { + return &Store{path: options.Path} +} + +type Store struct { + path string +} + +func (s *Store) Target() string { + return "file://" + s.path +} + +func (s *Store) GetObject(_ context.Context) (io.ReadCloser, error) { + return os.Open(s.path) +} + +func (s *Store) PutObject(_ context.Context, body io.Reader) error { + if err := os.MkdirAll(filepath.Dir(s.path), 0755); err != nil { + return err + } + f, err := os.OpenFile(s.path, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0600) + if err != nil { + return err + } + defer f.Close() + _, err = io.Copy(f, body) + return err +} + +func (s *Store) List(_ context.Context) ([]types.Snapshot, error) { + path := s.path + fi, err := os.Stat(path) + if err != nil { + return nil, err + } + if !fi.IsDir() { + path = filepath.Dir(path) + } + + entries, err := os.ReadDir(path) + if err != nil { + return nil, err + } + + var snapshots []types.Snapshot + for _, entry := range entries { + info, err := entry.Info() + if err != nil { + if errors.Is(err, os.ErrNotExist) { + continue + } + return nil, err + } + if info.IsDir() || !strings.HasSuffix(entry.Name(), ".tar.gz") { + continue + } + snapshots = append(snapshots, types.Snapshot{ + ID: entry.Name(), + URL: "file://" + path + "/" + entry.Name(), + Timestamp: info.ModTime(), + }) + } + return snapshots, nil +} + +func (s *Store) Delete(_ context.Context) error { + fi, err := os.Stat(s.path) + if err != nil { + return err + } + if fi.IsDir() { + return fmt.Errorf("not a snapshot file") + } + if !strings.HasSuffix(s.path, ".tar.gz") { + return fmt.Errorf("not a snapshot file") + } + return os.Remove(s.path) +} diff --git a/pkg/snapshot/options.go b/pkg/snapshot/options.go index 0b3b9f5247..d63cf7541e 100644 --- a/pkg/snapshot/options.go +++ b/pkg/snapshot/options.go @@ -14,6 +14,7 @@ import ( "github.com/loft-sh/vcluster/pkg/constants" "github.com/loft-sh/vcluster/pkg/snapshot/azure" "github.com/loft-sh/vcluster/pkg/snapshot/container" + "github.com/loft-sh/vcluster/pkg/snapshot/file" "github.com/loft-sh/vcluster/pkg/snapshot/oci" "github.com/loft-sh/vcluster/pkg/snapshot/options" "github.com/loft-sh/vcluster/pkg/snapshot/s3" @@ -32,6 +33,7 @@ type Options struct { Container container.Options `json:"container"` OCI oci.Options `json:"oci"` Azure azure.Options `json:"azure"` + File file.Options `json:"file"` Release *HelmRelease `json:"release,omitempty"` IncludeVolumes bool `json:"include-volumes,omitempty"` @@ -52,6 +54,8 @@ func (o *Options) GetURL() string { snapshotURL = "oci://" + o.OCI.Repository case "azure": snapshotURL = o.Azure.BlobURL + case "file": + snapshotURL = "file://" + o.File.Path } return snapshotURL @@ -101,7 +105,7 @@ func Parse(snapshotURL string, snapshotOptions *Options) error { return fmt.Errorf("error parsing snapshotURL %s: %w", snapshotURL, err) } - supportedSchemes := []string{"oci", "s3", "container", "https"} + supportedSchemes := []string{"oci", "s3", "container", "file", "https"} if !slices.Contains(supportedSchemes, parsedURL.Scheme) { return fmt.Errorf("scheme needs to be one of %s", strings.Join(supportedSchemes, ", ")) } @@ -151,6 +155,12 @@ func Parse(snapshotURL string, snapshotOptions *Options) error { // Azure blob storage support snapshotOptions.Type = "azure" snapshotOptions.Azure.BlobURL = snapshotURL + case "file": + // file:///absolute/path — host is empty for absolute paths + if parsedURL.Path == "" { + return fmt.Errorf("path must be specified via file:///PATH") + } + snapshotOptions.File.Path = parsedURL.Path } return nil @@ -197,8 +207,12 @@ func Validate(options *Options, isList bool) error { if options.Azure.BlobURL == "" { return fmt.Errorf("blob URL must be specified") } + } else if options.Type == "file" { + if options.File.Path == "" { + return fmt.Errorf("path must be specified via file:///PATH") + } } else { - return fmt.Errorf("type must be either 'container', 'oci', 's3', or 'azure'") + return fmt.Errorf("type must be either 'container', 'file', 'oci', 's3', or 'azure'") } return nil diff --git a/pkg/snapshot/storagefactory.go b/pkg/snapshot/storagefactory.go index e0eeaf602d..89477966dc 100644 --- a/pkg/snapshot/storagefactory.go +++ b/pkg/snapshot/storagefactory.go @@ -6,6 +6,7 @@ import ( "github.com/loft-sh/vcluster/pkg/snapshot/azure" "github.com/loft-sh/vcluster/pkg/snapshot/container" + "github.com/loft-sh/vcluster/pkg/snapshot/file" "github.com/loft-sh/vcluster/pkg/snapshot/oci" "github.com/loft-sh/vcluster/pkg/snapshot/s3" "github.com/loft-sh/vcluster/pkg/snapshot/types" @@ -23,6 +24,8 @@ func CreateStore(ctx context.Context, options *Options) (types.Storage, error) { return objectStore, nil } else if options.Type == "container" { return container.NewStore(&options.Container), nil + } else if options.Type == "file" { + return file.NewStore(&options.File), nil } else if options.Type == "oci" { return oci.NewStore(&options.OCI), nil } else if options.Type == "azure" {