diff --git a/cake-core/src/cake/sharding/discovery.rs b/cake-core/src/cake/sharding/discovery.rs index 469e52cd..e3d74e7d 100644 --- a/cake-core/src/cake/sharding/discovery.rs +++ b/cake-core/src/cake/sharding/discovery.rs @@ -918,6 +918,22 @@ mod tests { assert_eq!(w.max_layers_for_size(layer_size), expected); } + #[test] + fn test_max_layers_apple_mobile_defaults() { + // iPad/iPhone workers use the mobile reserve and cap instead of desktop unified-memory rules. + let vram = 8u64 * 1024 * 1024 * 1024; + let w = make_worker(vec![GpuInfo { + name: "iPad Air".into(), + vram_bytes: vram, + tflops: 3.0, + }]); + let layer_size = 500u64 * 1024 * 1024; + let reserve = (vram as f64 * 0.80) as u64; + let usable = vram.saturating_sub(reserve).min(1536u64 * 1024 * 1024); + let expected = (usable / layer_size) as usize; + assert_eq!(w.max_layers_for_size(layer_size), expected); + } + #[test] fn test_max_layers_cpu() { // CPU device: 20% reserve diff --git a/cake-core/src/cake/sharding/mod.rs b/cake-core/src/cake/sharding/mod.rs index f761e30e..ceb68364 100644 --- a/cake-core/src/cake/sharding/mod.rs +++ b/cake-core/src/cake/sharding/mod.rs @@ -64,24 +64,57 @@ pub trait WorkerCapacity { fn max_layers_for_size(&self, layer_size_bytes: u64) -> usize; } +const DEFAULT_MOBILE_LAYER_BUDGET_MB: u64 = 1536; +const DEFAULT_MOBILE_RESERVE_PCT: u64 = 80; + +fn mobile_layer_budget_bytes() -> u64 { + std::env::var("CAKE_MOBILE_LAYER_BUDGET_MB") + .ok() + .and_then(|v| v.parse::().ok()) + .unwrap_or(DEFAULT_MOBILE_LAYER_BUDGET_MB) + * 1024 + * 1024 +} + +fn mobile_reserve_pct() -> f64 { + std::env::var("CAKE_MOBILE_RESERVE_PCT") + .ok() + .and_then(|v| v.parse::().ok()) + .unwrap_or(DEFAULT_MOBILE_RESERVE_PCT) as f64 + / 100.0 +} + /// Compute max layers from a list of GPUs, applying per-device VRAM reserves. /// /// - **Dedicated VRAM (CUDA)**: reserve max(5%, 768 MiB) -/// - **Unified memory (Apple Silicon)**: reserve max(28%, 6 GiB) +/// - **Apple desktop unified memory**: reserve max(28%, 6 GiB) +/// - **Apple mobile unified memory (iPhone/iPad)**: reserve configurable % +/// (default 80%, override via `CAKE_MOBILE_RESERVE_PCT`), then cap the +/// worker layer budget (default 1.5 GiB, override via +/// `CAKE_MOBILE_LAYER_BUDGET_MB`) to stay under iOS per-process jetsam limits /// - **CPU / mobile**: reserve 20% pub fn max_layers_for_gpus(gpus: &[discovery::GpuInfo], layer_size_bytes: u64) -> usize { if layer_size_bytes == 0 || gpus.is_empty() { return usize::MAX; } + let mobile_cap = mobile_layer_budget_bytes(); + let mobile_reserve = mobile_reserve_pct(); gpus.iter() .map(|g| { let name_lower = g.name.to_lowercase(); let is_cpu = name_lower.starts_with("cpu"); - let is_unified = name_lower.contains("apple"); + let is_apple_mobile = + name_lower.starts_with("iphone") || name_lower.starts_with("ipad"); + let is_apple_desktop = name_lower.contains("apple"); let usable = if is_cpu { let reserve = (g.vram_bytes as f64 * 0.20) as u64; g.vram_bytes.saturating_sub(reserve) - } else if is_unified { + } else if is_apple_mobile { + let reserve = (g.vram_bytes as f64 * mobile_reserve) as u64; + g.vram_bytes + .saturating_sub(reserve) + .min(mobile_cap) + } else if is_apple_desktop { let min_reserve = 6u64 * 1024 * 1024 * 1024; let pct_reserve = (g.vram_bytes as f64 * 0.28) as u64; let os_reserve = pct_reserve.max(min_reserve); diff --git a/cake-mobile-app/shared/src/androidMain/kotlin/com/evilsocket/cake/WorkerBridge.android.kt b/cake-mobile-app/shared/src/androidMain/kotlin/com/evilsocket/cake/WorkerBridge.android.kt index 7849b59c..46e89fa9 100644 --- a/cake-mobile-app/shared/src/androidMain/kotlin/com/evilsocket/cake/WorkerBridge.android.kt +++ b/cake-mobile-app/shared/src/androidMain/kotlin/com/evilsocket/cake/WorkerBridge.android.kt @@ -9,4 +9,7 @@ actual object WorkerBridge { actual fun getWorkerStatus(): String = uniffi.cake_mobile.getWorkerStatus() actual fun setCacheDir(path: String) = uniffi.cake_mobile.setCacheDir(path) + + actual fun configureMobileLimits(budgetMb: UInt, reservePct: UInt) = + uniffi.cake_mobile.configureMobileLimits(budgetMb, reservePct) } diff --git a/cake-mobile-app/shared/src/commonMain/kotlin/com/evilsocket/cake/WorkerBridge.kt b/cake-mobile-app/shared/src/commonMain/kotlin/com/evilsocket/cake/WorkerBridge.kt index e14ac774..da284901 100644 --- a/cake-mobile-app/shared/src/commonMain/kotlin/com/evilsocket/cake/WorkerBridge.kt +++ b/cake-mobile-app/shared/src/commonMain/kotlin/com/evilsocket/cake/WorkerBridge.kt @@ -5,4 +5,5 @@ expect object WorkerBridge { fun stopWorker() fun getWorkerStatus(): String fun setCacheDir(path: String) + fun configureMobileLimits(budgetMb: UInt, reservePct: UInt) } diff --git a/cake-mobile-app/shared/src/commonMain/kotlin/com/evilsocket/cake/WorkerViewModel.kt b/cake-mobile-app/shared/src/commonMain/kotlin/com/evilsocket/cake/WorkerViewModel.kt index 7e72b68c..479ddc91 100644 --- a/cake-mobile-app/shared/src/commonMain/kotlin/com/evilsocket/cake/WorkerViewModel.kt +++ b/cake-mobile-app/shared/src/commonMain/kotlin/com/evilsocket/cake/WorkerViewModel.kt @@ -28,6 +28,8 @@ class WorkerViewModel(private val settings: PlatformSettings) { var workerName by mutableStateOf(settings.getString("worker_name", "My Phone")) var modelName by mutableStateOf(settings.getString("model_name", "Qwen/Qwen3.5-0.8B")) var clusterKey by mutableStateOf(settings.getString("cluster_key", "")) + private val layerBudgetMb = settings.getString("layer_budget_mb", "1536").toUIntOrNull() ?: 1536u + private val reservePct = settings.getString("reserve_pct", "80").toUIntOrNull() ?: 80u fun saveSettings() { settings.setString("worker_name", workerName) @@ -52,6 +54,7 @@ class WorkerViewModel(private val settings: PlatformSettings) { } } + WorkerBridge.configureMobileLimits(layerBudgetMb, reservePct) val result = WorkerBridge.startWorker(workerName, modelName, clusterKey) pollJob.cancel() diff --git a/cake-mobile-app/shared/src/iosMain/cinterop/cake_mobile_c.h b/cake-mobile-app/shared/src/iosMain/cinterop/cake_mobile_c.h index 7d5b3842..810d411f 100644 --- a/cake-mobile-app/shared/src/iosMain/cinterop/cake_mobile_c.h +++ b/cake-mobile-app/shared/src/iosMain/cinterop/cake_mobile_c.h @@ -16,5 +16,10 @@ char* cake_get_worker_status(void); // Set the HuggingFace cache directory (no-op on iOS; used on Android). void cake_set_cache_dir(const char* path); +// Set iOS jetsam-aware memory limits. Call before cake_start_worker. +// budget_mb: max layer budget in MiB (default 1536). +// reserve_pct: percentage of device RAM reserved for OS (default 80). +void cake_configure_mobile_limits(unsigned int budget_mb, unsigned int reserve_pct); + // Free a string returned by the above functions. void cake_free_string(char* s); diff --git a/cake-mobile-app/shared/src/iosMain/kotlin/com/evilsocket/cake/WorkerBridge.ios.kt b/cake-mobile-app/shared/src/iosMain/kotlin/com/evilsocket/cake/WorkerBridge.ios.kt index 4e9f6af0..a38783e5 100644 --- a/cake-mobile-app/shared/src/iosMain/kotlin/com/evilsocket/cake/WorkerBridge.ios.kt +++ b/cake-mobile-app/shared/src/iosMain/kotlin/com/evilsocket/cake/WorkerBridge.ios.kt @@ -1,5 +1,6 @@ package com.evilsocket.cake +import cake_mobile.cake_configure_mobile_limits import cake_mobile.cake_free_string import cake_mobile.cake_get_worker_status import cake_mobile.cake_set_cache_dir @@ -32,4 +33,8 @@ actual object WorkerBridge { actual fun setCacheDir(path: String) { cake_set_cache_dir(path) } + + actual fun configureMobileLimits(budgetMb: UInt, reservePct: UInt) { + cake_configure_mobile_limits(budgetMb, reservePct) + } } diff --git a/cake-mobile/src/lib.rs b/cake-mobile/src/lib.rs index 5a33b646..f5263190 100644 --- a/cake-mobile/src/lib.rs +++ b/cake-mobile/src/lib.rs @@ -75,6 +75,19 @@ pub fn get_worker_status() -> String { #[cfg(target_os = "android")] static ANDROID_CACHE_DIR: Mutex = Mutex::new(String::new()); +/// Set iOS jetsam-aware memory limits before start_worker. +/// `budget_mb`: max layer budget in MiB (default 1536). Set higher for Guided Access / single-app. +/// `reserve_pct`: percentage of device RAM reserved for OS (default 80). Lower = more layers. +#[uniffi::export] +pub fn configure_mobile_limits(budget_mb: u32, reserve_pct: u32) { + log_mobile(&format!( + "[cake-mobile] configure_mobile_limits: budget={}MB reserve={}%", + budget_mb, reserve_pct + )); + std::env::set_var("CAKE_MOBILE_LAYER_BUDGET_MB", budget_mb.to_string()); + std::env::set_var("CAKE_MOBILE_RESERVE_PCT", reserve_pct.to_string()); +} + /// On Android, call this with the app's cacheDir path before start_worker. /// No-op on iOS (sandbox paths are determined automatically). #[uniffi::export] @@ -504,6 +517,11 @@ pub extern "C" fn cake_get_worker_status() -> *mut c_char { CString::new(get_worker_status()).unwrap_or_default().into_raw() } +#[no_mangle] +pub extern "C" fn cake_configure_mobile_limits(budget_mb: u32, reserve_pct: u32) { + configure_mobile_limits(budget_mb, reserve_pct); +} + /// # Safety /// `path` must be a valid, non-null, NUL-terminated C string. #[no_mangle]