Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
53cbaee
Add redesign handling of container failing on start
Synicix Apr 10, 2025
fa407bb
Add fail during execution
Synicix Apr 11, 2025
8540eed
Add color printing
Synicix Apr 11, 2025
103cb5c
Fix clippy error
Synicix Apr 11, 2025
b823f18
Split error message
Synicix Apr 11, 2025
d0e24b1
Add new error handling
Synicix Apr 14, 2025
6c090fa
Merge remote-tracking branch 'upstream/dev' into failure_modes
Synicix Apr 15, 2025
3665aa3
Fix issue with queued container
Synicix Apr 17, 2025
335d1fe
Merge remote-tracking branch 'upstream/dev' into failure_modes
Synicix Apr 17, 2025
9ddb8c6
Added nested directory test
Synicix Apr 17, 2025
40a6b74
Merge remote-tracking branch 'upstream/dev' into failure_modes
Synicix Apr 22, 2025
3968e97
Remove old comments
Synicix Apr 22, 2025
d10a387
Merge remote-tracking branch 'upstream/dev' into failure_modes
Synicix Apr 24, 2025
60e8025
Fix all issues relating to merge
Synicix Apr 24, 2025
370eaaf
Remove stale print statement
Synicix Apr 24, 2025
0eb1ccb
Fixed missing image pull for test
Synicix May 9, 2025
02ae4ca
Fix clippy
Synicix May 9, 2025
ab2a2da
Add clippy version exception
Synicix May 9, 2025
17698f4
Remove stale comment
Synicix May 9, 2025
5026acf
Merge remote-tracking branch 'upstream/dev' into failure_modes
Synicix May 31, 2025
e4f2749
Update orchestrator test to fix bugs
Synicix Jun 1, 2025
521e9f0
Merge remote-tracking branch 'upstream/dev' into failure_modes
Synicix Jun 8, 2025
9aa5b6e
Merge branch 'dev' into failure_modes
Synicix Jun 18, 2025
b5cf015
Fix merge errors
Synicix Jun 18, 2025
ccefa31
Merge branch 'dev' into failure_modes
Synicix Jun 20, 2025
7c1fbc8
Fix bug in test
Synicix Jun 20, 2025
fef3b57
Fix format missing order
Synicix Jun 20, 2025
380821e
Remove logging
Synicix Jun 24, 2025
5a45200
Add delay to deal with docker not being fast enough
Synicix Jun 24, 2025
33f54e1
Merge branch 'dev' into failure_modes
Synicix Jun 25, 2025
2387cdd
Merge remote-tracking branch 'upstream/dev' into failure_modes
Synicix Jun 26, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ print_stdout = { level = "allow", priority = 127 } # stdout
pub_use = { level = "allow", priority = 127 } # ok to structure source into many files but clean up import
pub_with_shorthand = { level = "allow", priority = 127 } # allow use of pub(super)
pub_without_shorthand = { level = "allow", priority = 127 } # allow use of pub(in super)
result_large_err = { level = "allow", priority = 127 } # allow large error types in Result
question_mark_used = { level = "allow", priority = 127 } # allow question operator
self_named_module_files = { level = "allow", priority = 127 } # mod files ok
semicolon_inside_block = { level = "allow", priority = 127 } # ok to keep inside block
Expand Down
7 changes: 5 additions & 2 deletions src/core/crypto.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ pub fn hash_buffer(buffer: impl AsRef<[u8]>) -> String {
/// Will return error if unable to access file.
pub fn hash_file(filepath: impl AsRef<Path>) -> Result<String> {
hash_stream(
&mut File::open(&filepath).context(selector::InvalidFilepath {
&mut File::open(&filepath).context(selector::InvalidFileOrDirPath {
path: filepath.as_ref(),
})?,
)
Expand All @@ -62,7 +62,10 @@ pub fn hash_file(filepath: impl AsRef<Path>) -> Result<String> {
pub fn hash_dir(dirpath: impl AsRef<Path>) -> Result<String> {
let summary: BTreeMap<String, String> = dirpath
.as_ref()
.read_dir()?
.read_dir()
.context(selector::InvalidFileOrDirPath {
path: dirpath.as_ref(),
})?
.map(|path| {
let access_path = path?.path();
Ok((
Expand Down
4 changes: 3 additions & 1 deletion src/core/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,10 @@
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
match &self.kind {
Kind::EmptyResponseWhenLoadingContainerAltImage { backtrace, .. }
| Kind::FailedToExtractRunInfo { backtrace, .. }

Check warning on line 93 in src/core/error.rs

View check run for this annotation

Codecov / codecov/patch

src/core/error.rs#L93

Added line #L93 was not covered by tests
| Kind::FailedToStartPod { backtrace, .. }
| Kind::GeneratedNamesOverflow { backtrace, .. }
| Kind::InvalidFilepath { backtrace, .. }
| Kind::InvalidFileOrDirPath { backtrace, .. }
| Kind::InvalidPodResultTerminatedDatetime { backtrace, .. }
| Kind::KeyMissing { backtrace, .. }
| Kind::NoAnnotationFound { backtrace, .. }
Expand Down
238 changes: 165 additions & 73 deletions src/core/orchestrator/docker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,9 @@
},
};
use bollard::{
container::{Config, CreateContainerOptions, ListContainersOptions},
container::{Config, CreateContainerOptions, ListContainersOptions, RemoveContainerOptions},
models::{ContainerStateStatusEnum, HostConfig},
secret::{ContainerInspectResponse, ContainerSummary},
};
use chrono::DateTime;
use futures_util::future::join_all;
Expand All @@ -34,6 +35,11 @@
.expect("Invalid image tag regex.")
});

#[expect(clippy::expect_used, reason = "Valid static regex")]
static RE_FOR_CMD: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r#"[^\s"']+|"[^"]*"|'[^']*'"#).expect("Invalid model metadata regex.")
});

impl LocalDockerOrchestrator {
fn prepare_mount_binds(
namespace_lookup: &HashMap<String, PathBuf>,
Expand Down Expand Up @@ -104,7 +110,11 @@
- Pod commands will always have at least 1 element
"#
)]
pub(crate) fn prepare_container_start_inputs(
/// Prepare the inputs for starting a container.
///
/// # Errors
/// Will fail if pod job is invalid
pub fn prepare_container_start_inputs(
namespace_lookup: &HashMap<String, PathBuf>,
pod_job: &PodJob,
image: String,
Expand All @@ -130,11 +140,15 @@
),
("org.orcapod.pod_job.hash".to_owned(), pod_job.hash.clone()),
]);
let command = pod_job
.pod
.command
.split_whitespace()
.map(String::from)
let command = RE_FOR_CMD
.captures_iter(&pod_job.pod.command)
.map(|capture| {
capture
.extract::<0>()
.0
.to_owned()
.replace(['\'', '\"'], "")
})
.collect::<Vec<_>>();

Ok((
Expand Down Expand Up @@ -165,10 +179,7 @@
))
}
#[expect(
clippy::cast_sign_loss,
clippy::string_slice,
clippy::cast_precision_loss,
clippy::cast_possible_truncation,
clippy::indexing_slicing,
reason = r#"
- Timestamp and memory should always have a value > 0
Expand All @@ -181,7 +192,7 @@
pub(crate) async fn list_containers(
&self,
filters: HashMap<String, Vec<String>>, // https://docs.rs/bollard/latest/bollard/container/struct.ListContainersOptions.html#structfield.filters
) -> Result<impl Iterator<Item = (String, RunInfo)>> {
) -> Result<impl Iterator<Item = Result<(String, RunInfo)>>> {
Ok(join_all(
self.api
.list_containers(Some(ListContainersOptions {
Expand All @@ -205,70 +216,151 @@
)
.await
.into_iter()
.filter_map(|result: Result<_>| {
let (container_name, container_summary, container_spec) = result.ok()?;
let terminated_timestamp =
DateTime::parse_from_rfc3339(container_spec.state.as_ref()?.finished_at.as_ref()?)
.ok()?
.timestamp() as u64;
Some((
.map(|result: Result<_>| {
let (container_name, container_summary, container_inspect_response) = match result {
Ok((container_name, container_summary, container_inspect_response)) => (
container_name,
container_summary,
container_inspect_response,
),
Err(error) => {
return Err(error);

Check warning on line 227 in src/core/orchestrator/docker.rs

View check run for this annotation

Codecov / codecov/patch

src/core/orchestrator/docker.rs#L226-L227

Added lines #L226 - L227 were not covered by tests
}
};

Ok(
Self::extract_run_info(&container_summary, &container_inspect_response)
.map(|run_info| (container_name.clone(), run_info))
.context(selector::FailedToExtractRunInfo { container_name })?,
)
}))
}
pub(crate) async fn delete_container(&self, container_name: &str) -> Result<()> {
self.api
.remove_container(
container_name,
RunInfo {
image: container_spec.config.as_ref()?.image.as_ref()?.clone(),
created: container_summary.created? as u64,
terminated: (terminated_timestamp > 0).then_some(terminated_timestamp),
env_vars: container_spec
.config
.as_ref()?
.env
Some(RemoveContainerOptions {
force: true,
..Default::default()
}),
)
.await?;
Ok(())
}

#[expect(
clippy::cast_sign_loss,
clippy::cast_precision_loss,
clippy::cast_possible_truncation,
reason = r#"
- Timestamp and memory should always have a value > 0
- Container will always have a name with more than 1 character
- No issue in core casting if between 0 - 3.40e38(f32:MAX)
- No issue in exit code casting if between -3.27e4(i16:MIN) - 3.27e4(i16:MAX)
- Containers will always have at least 1 name with at least 2 characters
- This functions requires a lot of boilerplate code to extract the run info
"#
)]
fn extract_run_info(
container_summary: &ContainerSummary,
container_inspect_response: &ContainerInspectResponse,
) -> Option<RunInfo> {
let terminated_timestamp = DateTime::parse_from_rfc3339(
container_inspect_response
.state
.as_ref()?
.finished_at
.as_ref()?,
)
.ok()?
.timestamp() as u64;
Some(RunInfo {
image: container_inspect_response
.config
.as_ref()?
.image
.as_ref()?
.clone(),
created: container_summary.created? as u64,
terminated: (terminated_timestamp > 0).then_some(terminated_timestamp),
env_vars: container_inspect_response
.config
.as_ref()?
.env
.as_ref()?
.iter()
.filter_map(|x| {
x.split_once('=')
.map(|(key, value)| (key.to_owned(), value.to_owned()))
})
.collect(),
command: format!(
"{} {}",
container_inspect_response
.config
.as_ref()?
.entrypoint
.as_ref()?
.join(" "),
container_inspect_response
.config
.as_ref()?
.cmd
.as_ref()?
.join(" ")
),
status: match (
container_inspect_response.state.as_ref()?.status?,
container_inspect_response.state.as_ref()?.exit_code? as i16,
) {
(ContainerStateStatusEnum::RUNNING, _) => Status::Running,
(ContainerStateStatusEnum::EXITED, 0) => Status::Completed,
(ContainerStateStatusEnum::EXITED | ContainerStateStatusEnum::DEAD, code) => {
Status::Failed(code)
}
(
ContainerStateStatusEnum::CREATED | ContainerStateStatusEnum::RESTARTING,
code,
) => {
if container_inspect_response
.state
.as_ref()?
.iter()
.filter_map(|x| {
x.split_once('=')
.map(|(key, value)| (key.to_owned(), value.to_owned()))
})
.collect(),
command: format!(
"{} {}",
container_spec
.config
.as_ref()?
.entrypoint
.as_ref()?
.join(" "),
container_spec.config.as_ref()?.cmd.as_ref()?.join(" ")
),
status: match (
container_spec.state.as_ref()?.status.as_ref()?,
container_spec.state.as_ref()?.exit_code? as i16,
) {
(ContainerStateStatusEnum::RUNNING, _) => Status::Running,
(ContainerStateStatusEnum::EXITED, 0) => Status::Completed,
(ContainerStateStatusEnum::EXITED, code) => Status::Failed(code),
_ => todo!(),
},
mounts: container_spec
.mounts
.error
.as_ref()?
.iter()
.map(|mount_point| {
Some(format!(
"{}:{}{}",
mount_point.source.as_ref()?,
mount_point.destination.as_ref()?,
mount_point
.mode
.as_ref()
.map_or_else(String::new, |mode| format!(":{mode}"))
))
})
.collect::<Option<Vec<_>>>()?,
labels: container_spec.config.as_ref()?.labels.as_ref()?.clone(),
cpu_limit: container_spec.host_config.as_ref()?.nano_cpus? as f32
/ 10_f32.powi(9), // ncpu, ucores=3, mcores=6, cores=9
memory_limit: container_spec.host_config.as_ref()?.memory? as u64,
},
))
}))
.is_empty()
{
Status::Queued
} else {
Status::Failed(code)
}
}
_ => Status::Unknown,

Check warning on line 337 in src/core/orchestrator/docker.rs

View check run for this annotation

Codecov / codecov/patch

src/core/orchestrator/docker.rs#L337

Added line #L337 was not covered by tests
},
mounts: container_inspect_response
.mounts
.as_ref()?
.iter()
.map(|mount_point| {
Some(format!(
"{}:{}{}",
mount_point.source.as_ref()?,
mount_point.destination.as_ref()?,
mount_point
.mode
.as_ref()
.map_or_else(String::new, |mode| format!(":{mode}"))
))
})
.collect::<Option<Vec<_>>>()?,
labels: container_inspect_response
.config
.as_ref()?
.labels
.as_ref()?
.clone(),
cpu_limit: container_inspect_response.host_config.as_ref()?.nano_cpus? as f32
/ 10_f32.powi(9), // ncpu, ucores=3, mcores=6, cores=9
memory_limit: container_inspect_response.host_config.as_ref()?.memory? as u64,
})
}
}
21 changes: 20 additions & 1 deletion src/uniffi/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,25 @@ pub(crate) enum Kind {
path: PathBuf,
backtrace: Option<Backtrace>,
},
#[snafu(display(
"Failed to extract run info from the container image file: {container_name}."
))]
FailedToExtractRunInfo {
container_name: String,
backtrace: Option<Backtrace>,
},
#[snafu(display(
"Fail to start pod with container_name: {container_name} with error: {source}"
))]
FailedToStartPod {
container_name: String,
source: BollardError,
backtrace: Option<Backtrace>,
},
#[snafu(display("Out of generated random names."))]
GeneratedNamesOverflow { backtrace: Option<Backtrace> },
#[snafu(display("{source} ({path:?})."))]
InvalidFilepath {
InvalidFileOrDirPath {
path: PathBuf,
source: io::Error,
backtrace: Option<Backtrace>,
Expand Down Expand Up @@ -122,4 +137,8 @@ impl OrcaError {
pub const fn is_purged_pod_run(&self) -> bool {
matches!(self.kind, Kind::NoMatchingPodRun { .. })
}
/// Returns `true` if the error was caused by an invalid file or directory path.
pub const fn is_failed_to_start_pod(&self) -> bool {
matches!(self.kind, Kind::FailedToStartPod { .. })
}
}
Loading