-
Notifications
You must be signed in to change notification settings - Fork 2.1k
fix: avoid panic in date_bin compute_distance near i64::MIN #22408
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -328,20 +328,35 @@ fn date_bin_nanos_interval(stride_nanos: i64, source: i64, origin: i64) -> Resul | |
| })?; | ||
|
|
||
| // distance from origin to bin | ||
| let time_delta = compute_distance(time_diff, stride_nanos); | ||
| let time_delta = compute_distance(time_diff, stride_nanos)?; | ||
|
|
||
| Ok(origin + time_delta) | ||
| origin.checked_add(time_delta).ok_or_else(|| { | ||
| arrow::error::ArrowError::InvalidArgumentError(format!( | ||
| "date_bin origin {origin} + delta {time_delta} overflows i64" | ||
| )) | ||
| .into() | ||
| }) | ||
| } | ||
|
|
||
| // distance from origin to bin | ||
| fn compute_distance(time_diff: i64, stride: i64) -> i64 { | ||
| let time_delta = time_diff - (time_diff % stride); | ||
| fn compute_distance(time_diff: i64, stride: i64) -> Result<i64> { | ||
| let remainder = time_diff % stride; | ||
| let time_delta = time_diff.checked_sub(remainder).ok_or_else(|| { | ||
| arrow::error::ArrowError::InvalidArgumentError(format!( | ||
| "date_bin compute_distance time_diff {time_diff} - remainder {remainder} overflows i64" | ||
| )) | ||
| })?; | ||
|
|
||
| if time_diff < 0 && stride > 1 && time_delta != time_diff { | ||
| // The origin is later than the source timestamp, round down to the previous bin | ||
| time_delta - stride | ||
| time_delta.checked_sub(stride).ok_or_else(|| { | ||
| arrow::error::ArrowError::InvalidArgumentError(format!( | ||
| "date_bin compute_distance time_delta {time_delta} - stride {stride} overflows i64" | ||
| )) | ||
| .into() | ||
| }) | ||
| } else { | ||
| time_delta | ||
| Ok(time_delta) | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -357,7 +372,7 @@ fn date_bin_months_interval(stride_months: i64, source: i64, origin: i64) -> Res | |
| - origin_date.month() as i32; | ||
|
|
||
| // distance from origin to bin | ||
| let month_delta = compute_distance(month_diff as i64, stride_months); | ||
| let month_delta = compute_distance(month_diff as i64, stride_months)?; | ||
|
|
||
| let mut bin_time = if month_delta < 0 { | ||
| match origin_date | ||
|
|
@@ -1341,4 +1356,39 @@ mod tests { | |
| assert!(val.is_none(), "Expected None for out of range operation"); | ||
| } | ||
| } | ||
|
|
||
| #[test] | ||
| fn test_date_bin_compute_distance_i64_min() { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The unit test coverage here looks good for the UDF invocation path. Since this is SQL-visible behavior and there are already That would help verify the problematic scalar input returns |
||
| // Regression for #22215: date_bin_nanos_interval on a source near i64::MIN | ||
| // previously panicked inside compute_distance with "attempt to subtract with overflow". | ||
| // Now it must return a normal Err that the scalar pipeline maps to NULL. | ||
| let result = date_bin_nanos_interval(3, i64::MIN, 0); | ||
| assert!( | ||
| result.is_err(), | ||
| "expected Err for source=i64::MIN, got {result:?}" | ||
| ); | ||
|
|
||
| let return_field = &Arc::new(Field::new( | ||
| "f", | ||
| DataType::Timestamp(TimeUnit::Nanosecond, None), | ||
| true, | ||
| )); | ||
| let args = vec![ | ||
| ColumnarValue::Scalar(ScalarValue::new_interval_mdn(0, 0, 3)), | ||
| ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(Some(i64::MIN), None)), | ||
| ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(Some(0), None)), | ||
| ]; | ||
| let result = invoke_date_bin_with_args(args, 1, return_field); | ||
| assert!(result.is_ok(), "expected Ok with NULL, got {result:?}"); | ||
| if let ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(val, _)) = | ||
| result.unwrap() | ||
| { | ||
| assert!( | ||
| val.is_none(), | ||
| "Expected None for compute_distance overflow, got {val:?}" | ||
| ); | ||
| } else { | ||
| panic!("Expected TimestampNanosecond scalar"); | ||
| } | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nice catch making this path fallible overall. I think there’s still one unchecked overflow case left here though.
time_diff % stridecan still panic whentime_diff == i64::MINandstride == -1. Right nowdate_bin_implonly rejectsstride == 0, so a negative interval can still reach this code path and panic instead of returning the normal error/NULL behavior this fix is aiming for.Could we switch this to
checked_remhere, or alternatively reject non-positive strides before callingcompute_distance?It would also be great to add a regression test covering the
i64::MIN, -1case.