diff --git a/src/core/ndd.hpp b/src/core/ndd.hpp index 779ba90c67..934a05202e 100644 --- a/src/core/ndd.hpp +++ b/src/core/ndd.hpp @@ -1949,8 +1949,8 @@ inline void IndexManager::executeBackupJob(const std::string& index_id, const st std::string user_temp_dir = backup_store_.getUserTempDir(username); std::filesystem::create_directories(user_temp_dir); std::string source_dir = data_dir_ + "/" + index_id; - std::string backup_tar_final = user_backup_dir + "/" + backup_name + ".tar"; - std::string backup_tar_temp = user_temp_dir + "/.tmp_" + backup_name + ".tar"; + std::string backup_tar_final = user_backup_dir + "/" + backup_name + ".tar.zst"; + std::string backup_tar_temp = user_temp_dir + "/.tmp_" + backup_name + ".tar.zst"; if(std::filesystem::exists(backup_tar_final)) { throw std::runtime_error("Backup already exists: " + backup_name); @@ -2070,8 +2070,8 @@ inline void IndexManager::executeBackupJob(const std::string& index_id, const st std::string user_backup_dir = backup_store_.getUserBackupDir(username); std::string user_temp_dir = backup_store_.getUserTempDir(username); std::string source_dir = data_dir_ + "/" + index_id; - std::string backup_tar_final = user_backup_dir + "/" + backup_name + ".tar"; - std::string backup_tar_temp = user_temp_dir + "/.tmp_" + backup_name + ".tar"; + std::string backup_tar_final = user_backup_dir + "/" + backup_name + ".tar.zst"; + std::string backup_tar_temp = user_temp_dir + "/.tmp_" + backup_name + ".tar.zst"; std::string metadata_file_in_index = source_dir + "/metadata.json"; if(std::filesystem::exists(backup_tar_temp)) { @@ -2099,7 +2099,7 @@ inline std::pair IndexManager::restoreBackup(const std::strin } std::string backup_dir_root = backup_store_.getUserBackupDir(username); - std::string backup_tar = backup_dir_root + "/" + backup_name + ".tar"; + std::string backup_tar = backup_dir_root + "/" + backup_name + ".tar.zst"; std::string user_temp_dir = backup_store_.getUserTempDir(username); std::filesystem::create_directories(user_temp_dir); std::string backup_extract_dir = user_temp_dir + "/" + backup_name; @@ -2202,7 +2202,7 @@ inline std::pair IndexManager::createBackupAsync(const std::s std::string user_backup_dir = backup_store_.getUserBackupDir(username); std::filesystem::create_directories(user_backup_dir); - std::string backup_tar = user_backup_dir + "/" + backup_name + ".tar"; + std::string backup_tar = user_backup_dir + "/" + backup_name + ".tar.zst"; if (std::filesystem::exists(backup_tar)) { return {false, "Backup already exists: " + backup_name}; } diff --git a/src/main.cpp b/src/main.cpp index 9080f1738c..21c6a9f31b 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -564,7 +564,7 @@ int main(int argc, char** argv) { } std::string backup_file = - settings::DATA_DIR + "/backups/" + settings::DEFAULT_USERNAME + "/" + backup_name + ".tar"; + settings::DATA_DIR + "/backups/" + settings::DEFAULT_USERNAME + "/" + backup_name + ".tar.zst"; if(!std::filesystem::exists(backup_file)) { LOG_WARN(1058, settings::DEFAULT_USERNAME, "Backup download requested for missing backup " << backup_name); @@ -574,7 +574,7 @@ int main(int argc, char** argv) { crow::response response; response.set_static_file_info_unsafe(backup_file); - response.set_header("Content-Type", "application/x-tar"); + response.set_header("Content-Type", "application/zstd"); response.set_header("Content-Disposition", "attachment; filename=\"" + backup_name + ".tar\""); response.set_header("Cache-Control", "no-cache"); @@ -608,11 +608,11 @@ int main(int argc, char** argv) { if(content_disposition.params.count("filename")) { backup_name = content_disposition.params.at("filename"); // check if backup name ends with .tar - if(backup_name.ends_with(".tar")) { - backup_name = backup_name.substr(0, backup_name.size() - 4); + if(backup_name.ends_with(".tar.zst")) { + backup_name = backup_name.substr(0, backup_name.size() - 8); } else { LOG_WARN(1059, ctx.username, "Backup upload used invalid file extension"); - return json_error(400, "Invalid backup file extension. Expected .tar file"); + return json_error(400, "Invalid backup file extension. Expected .tar.zst file"); } } file_content = part.body; @@ -641,7 +641,7 @@ int main(int argc, char** argv) { // Check if backup already exists std::string user_backup_dir = settings::DATA_DIR + "/backups/" + ctx.username; std::filesystem::create_directories(user_backup_dir); - std::string backup_path = user_backup_dir + "/" + backup_name + ".tar"; + std::string backup_path = user_backup_dir + "/" + backup_name + ".tar.zst"; if(std::filesystem::exists(backup_path)) { LOG_WARN(1063, ctx.username, "Backup upload conflicts with existing backup " << backup_name); return json_error(409, diff --git a/src/storage/backup_store.hpp b/src/storage/backup_store.hpp index 45fea9ec4c..22a884d4e9 100644 --- a/src/storage/backup_store.hpp +++ b/src/storage/backup_store.hpp @@ -14,6 +14,10 @@ #include #include +#include +#include +#include + #include "json/nlohmann_json.hpp" #include "index_meta.hpp" #include "settings.hpp" @@ -31,6 +35,62 @@ class BackupStore { std::unordered_map active_user_backups_; mutable std::mutex active_user_backups_mutex_; + bool writeFileToArchive(struct archive* a, struct archive_entry* e, + const std::filesystem::path& file_path, + std::string& error_msg) { + int fd = open(file_path.c_str(), O_RDONLY); + if (fd < 0) { + error_msg = "Failed to open file: " + file_path.string(); + return false; + } + + off_t file_size = std::filesystem::file_size(file_path); + archive_entry_set_size(e, file_size); + + // Map data regions using SEEK_HOLE/SEEK_DATA + off_t pos = 0; + while (pos < file_size) { + off_t data_start = lseek(fd, pos, SEEK_DATA); + if (data_start < 0) break; // no more data regions + + off_t hole_start = lseek(fd, data_start, SEEK_HOLE); + if (hole_start < 0) hole_start = file_size; + + archive_entry_sparse_add_entry(e, data_start, hole_start - data_start); + pos = hole_start; + } + + if (archive_write_header(a, e) != ARCHIVE_OK) { + error_msg = archive_error_string(a); + close(fd); + return false; + } + + // Write only data regions + char buffer[262144]; // 256KB — one IOP on GP3/GCP PD + pos = 0; + while (pos < file_size) { + off_t data_start = lseek(fd, pos, SEEK_DATA); + if (data_start < 0) break; + + off_t hole_start = lseek(fd, data_start, SEEK_HOLE); + if (hole_start < 0) hole_start = file_size; + + off_t region_offset = data_start; + while (region_offset < hole_start) { + size_t to_read = std::min((off_t)sizeof(buffer), hole_start - region_offset); + ssize_t bytes_read = pread(fd, buffer, to_read, region_offset); + if (bytes_read <= 0) break; + archive_write_data(a, buffer, bytes_read); + region_offset += bytes_read; + } + pos = hole_start; + } + + close(fd); + return true; + } + public: BackupStore(const std::string& data_dir) : data_dir_(data_dir) { @@ -46,6 +106,7 @@ class BackupStore { std::stop_token st = {}) { struct archive* a = archive_write_new(); archive_write_set_format_pax_restricted(a); + archive_write_add_filter_zstd(a); if(archive_write_open_filename(a, archive_path.string().c_str()) != ARCHIVE_OK) { error_msg = archive_error_string(a); @@ -67,24 +128,16 @@ class BackupStore { std::filesystem::path rel_path = std::filesystem::relative(entry.path(), source_dir.parent_path()); archive_entry_set_pathname(e, rel_path.string().c_str()); - archive_entry_set_size(e, std::filesystem::file_size(entry.path())); archive_entry_set_filetype(e, AE_IFREG); archive_entry_set_perm(e, 0644); - if(archive_write_header(a, e) != ARCHIVE_OK) { - error_msg = archive_error_string(a); + if(!writeFileToArchive(a, e, entry.path(), error_msg)) { archive_entry_free(e); archive_write_close(a); archive_write_free(a); return false; } - std::ifstream file(entry.path(), std::ios::binary); - char buffer[8192]; - while(file.read(buffer, sizeof(buffer)) || file.gcount() > 0) { - archive_write_data(a, buffer, file.gcount()); - } - file.close(); archive_entry_free(e); } } @@ -103,7 +156,7 @@ class BackupStore { archive_read_support_format_all(a); archive_read_support_filter_all(a); - archive_write_disk_set_options(ext, ARCHIVE_EXTRACT_TIME | ARCHIVE_EXTRACT_PERM); + archive_write_disk_set_options(ext, ARCHIVE_EXTRACT_TIME | ARCHIVE_EXTRACT_PERM | ARCHIVE_EXTRACT_SPARSE); archive_write_disk_set_standard_lookup(ext); if(archive_read_open_filename(a, archive_path.string().c_str(), 10240) != ARCHIVE_OK) { @@ -274,7 +327,7 @@ class BackupStore { return result; } - std::string backup_tar = getUserBackupDir(username) + "/" + backup_name + ".tar"; + std::string backup_tar = getUserBackupDir(username) + "/" + backup_name + ".tar.zst"; if(std::filesystem::exists(backup_tar)) { std::filesystem::remove(backup_tar);