Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions cake-core/src/cake/sharding/discovery.rs
Original file line number Diff line number Diff line change
Expand Up @@ -918,6 +918,22 @@ mod tests {
assert_eq!(w.max_layers_for_size(layer_size), expected);
}

#[test]
fn test_max_layers_apple_mobile_defaults() {
// iPad/iPhone workers use the mobile reserve and cap instead of desktop unified-memory rules.
let vram = 8u64 * 1024 * 1024 * 1024;
let w = make_worker(vec![GpuInfo {
name: "iPad Air".into(),
vram_bytes: vram,
tflops: 3.0,
}]);
let layer_size = 500u64 * 1024 * 1024;
let reserve = (vram as f64 * 0.80) as u64;
let usable = vram.saturating_sub(reserve).min(1536u64 * 1024 * 1024);
let expected = (usable / layer_size) as usize;
assert_eq!(w.max_layers_for_size(layer_size), expected);
}

#[test]
fn test_max_layers_cpu() {
// CPU device: 20% reserve
Expand Down
39 changes: 36 additions & 3 deletions cake-core/src/cake/sharding/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,24 +64,57 @@ pub trait WorkerCapacity {
fn max_layers_for_size(&self, layer_size_bytes: u64) -> usize;
}

const DEFAULT_MOBILE_LAYER_BUDGET_MB: u64 = 1536;
const DEFAULT_MOBILE_RESERVE_PCT: u64 = 80;

fn mobile_layer_budget_bytes() -> u64 {
std::env::var("CAKE_MOBILE_LAYER_BUDGET_MB")
.ok()
.and_then(|v| v.parse::<u64>().ok())
.unwrap_or(DEFAULT_MOBILE_LAYER_BUDGET_MB)
* 1024
* 1024
}

fn mobile_reserve_pct() -> f64 {
std::env::var("CAKE_MOBILE_RESERVE_PCT")
.ok()
.and_then(|v| v.parse::<u64>().ok())
.unwrap_or(DEFAULT_MOBILE_RESERVE_PCT) as f64
/ 100.0
}

/// Compute max layers from a list of GPUs, applying per-device VRAM reserves.
///
/// - **Dedicated VRAM (CUDA)**: reserve max(5%, 768 MiB)
/// - **Unified memory (Apple Silicon)**: reserve max(28%, 6 GiB)
/// - **Apple desktop unified memory**: reserve max(28%, 6 GiB)
/// - **Apple mobile unified memory (iPhone/iPad)**: reserve configurable %
/// (default 80%, override via `CAKE_MOBILE_RESERVE_PCT`), then cap the
/// worker layer budget (default 1.5 GiB, override via
/// `CAKE_MOBILE_LAYER_BUDGET_MB`) to stay under iOS per-process jetsam limits
/// - **CPU / mobile**: reserve 20%
pub fn max_layers_for_gpus(gpus: &[discovery::GpuInfo], layer_size_bytes: u64) -> usize {
if layer_size_bytes == 0 || gpus.is_empty() {
return usize::MAX;
}
let mobile_cap = mobile_layer_budget_bytes();
let mobile_reserve = mobile_reserve_pct();
gpus.iter()
.map(|g| {
let name_lower = g.name.to_lowercase();
let is_cpu = name_lower.starts_with("cpu");
let is_unified = name_lower.contains("apple");
let is_apple_mobile =
name_lower.starts_with("iphone") || name_lower.starts_with("ipad");
let is_apple_desktop = name_lower.contains("apple");
let usable = if is_cpu {
let reserve = (g.vram_bytes as f64 * 0.20) as u64;
g.vram_bytes.saturating_sub(reserve)
} else if is_unified {
} else if is_apple_mobile {
let reserve = (g.vram_bytes as f64 * mobile_reserve) as u64;
g.vram_bytes
.saturating_sub(reserve)
.min(mobile_cap)
} else if is_apple_desktop {
let min_reserve = 6u64 * 1024 * 1024 * 1024;
let pct_reserve = (g.vram_bytes as f64 * 0.28) as u64;
let os_reserve = pct_reserve.max(min_reserve);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,7 @@ actual object WorkerBridge {
actual fun getWorkerStatus(): String = uniffi.cake_mobile.getWorkerStatus()

actual fun setCacheDir(path: String) = uniffi.cake_mobile.setCacheDir(path)

actual fun configureMobileLimits(budgetMb: UInt, reservePct: UInt) =
uniffi.cake_mobile.configureMobileLimits(budgetMb, reservePct)
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,5 @@ expect object WorkerBridge {
fun stopWorker()
fun getWorkerStatus(): String
fun setCacheDir(path: String)
fun configureMobileLimits(budgetMb: UInt, reservePct: UInt)
}
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ class WorkerViewModel(private val settings: PlatformSettings) {
var workerName by mutableStateOf(settings.getString("worker_name", "My Phone"))
var modelName by mutableStateOf(settings.getString("model_name", "Qwen/Qwen3.5-0.8B"))
var clusterKey by mutableStateOf(settings.getString("cluster_key", ""))
private val layerBudgetMb = settings.getString("layer_budget_mb", "1536").toUIntOrNull() ?: 1536u
private val reservePct = settings.getString("reserve_pct", "80").toUIntOrNull() ?: 80u

fun saveSettings() {
settings.setString("worker_name", workerName)
Expand All @@ -52,6 +54,7 @@ class WorkerViewModel(private val settings: PlatformSettings) {
}
}

WorkerBridge.configureMobileLimits(layerBudgetMb, reservePct)
val result = WorkerBridge.startWorker(workerName, modelName, clusterKey)
pollJob.cancel()

Expand Down
5 changes: 5 additions & 0 deletions cake-mobile-app/shared/src/iosMain/cinterop/cake_mobile_c.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,10 @@ char* cake_get_worker_status(void);
// Set the HuggingFace cache directory (no-op on iOS; used on Android).
void cake_set_cache_dir(const char* path);

// Set iOS jetsam-aware memory limits. Call before cake_start_worker.
// budget_mb: max layer budget in MiB (default 1536).
// reserve_pct: percentage of device RAM reserved for OS (default 80).
void cake_configure_mobile_limits(unsigned int budget_mb, unsigned int reserve_pct);

// Free a string returned by the above functions.
void cake_free_string(char* s);
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package com.evilsocket.cake

import cake_mobile.cake_configure_mobile_limits
import cake_mobile.cake_free_string
import cake_mobile.cake_get_worker_status
import cake_mobile.cake_set_cache_dir
Expand Down Expand Up @@ -32,4 +33,8 @@ actual object WorkerBridge {
actual fun setCacheDir(path: String) {
cake_set_cache_dir(path)
}

actual fun configureMobileLimits(budgetMb: UInt, reservePct: UInt) {
cake_configure_mobile_limits(budgetMb, reservePct)
}
}
18 changes: 18 additions & 0 deletions cake-mobile/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,19 @@ pub fn get_worker_status() -> String {
#[cfg(target_os = "android")]
static ANDROID_CACHE_DIR: Mutex<String> = Mutex::new(String::new());

/// Set iOS jetsam-aware memory limits before start_worker.
/// `budget_mb`: max layer budget in MiB (default 1536). Set higher for Guided Access / single-app.
/// `reserve_pct`: percentage of device RAM reserved for OS (default 80). Lower = more layers.
#[uniffi::export]
pub fn configure_mobile_limits(budget_mb: u32, reserve_pct: u32) {
log_mobile(&format!(
"[cake-mobile] configure_mobile_limits: budget={}MB reserve={}%",
budget_mb, reserve_pct
));
std::env::set_var("CAKE_MOBILE_LAYER_BUDGET_MB", budget_mb.to_string());
std::env::set_var("CAKE_MOBILE_RESERVE_PCT", reserve_pct.to_string());
}

/// On Android, call this with the app's cacheDir path before start_worker.
/// No-op on iOS (sandbox paths are determined automatically).
#[uniffi::export]
Expand Down Expand Up @@ -504,6 +517,11 @@ pub extern "C" fn cake_get_worker_status() -> *mut c_char {
CString::new(get_worker_status()).unwrap_or_default().into_raw()
}

#[no_mangle]
pub extern "C" fn cake_configure_mobile_limits(budget_mb: u32, reserve_pct: u32) {
configure_mobile_limits(budget_mb, reserve_pct);
}

/// # Safety
/// `path` must be a valid, non-null, NUL-terminated C string.
#[no_mangle]
Expand Down