-
Notifications
You must be signed in to change notification settings - Fork 1.2k
API to help with the pattern of 'replaces the values of the REE array #9891
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -30,6 +30,25 @@ use crate::{ | |
| types::{Int16Type, Int32Type, Int64Type, RunEndIndexType}, | ||
| }; | ||
|
|
||
| /// Recursively applies a function to the values of a RunEndEncoded array, preserving the run structure. | ||
| /// | ||
| /// # Example | ||
| /// | ||
| /// ```ignore | ||
| /// let result = ree_recurse!(array, Int32Type, my_function)?; | ||
| /// ``` | ||
| /// | ||
| /// This macro is useful for implementing functions that should work on the logical values | ||
| /// of a REE array while preserving the run-end encoding structure. | ||
| #[macro_export] | ||
| macro_rules! ree_map { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What about having a
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think thats a change thats worth its own PR
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
| ($array:expr, $run_type:ty, $func:expr) => {{ | ||
| let ree = $array.as_run_opt::<$run_type>().unwrap(); | ||
| let inner_values = $func(ree.values().as_ref())?; | ||
| Ok(std::sync::Arc::new(ree.with_values(inner_values))) | ||
| }}; | ||
| } | ||
|
|
||
| /// An array of [run-end encoded values]. | ||
| /// | ||
| /// This encoding is variation on [run-length encoding (RLE)] and is good for representing | ||
|
|
@@ -200,6 +219,46 @@ impl<R: RunEndIndexType> RunArray<R> { | |
| &self.values | ||
| } | ||
|
|
||
| /// Returns a new [`RunArray`] with the same `run_ends` and the supplied `values`. | ||
| /// | ||
| /// # Panics | ||
|
Rich-T-kid marked this conversation as resolved.
|
||
| /// | ||
| /// Panics if `values.len()` does not equal `self.values().len()`. | ||
| /// | ||
| /// # Example | ||
| /// | ||
| /// ``` | ||
| /// # use std::sync::Arc; | ||
| /// # use arrow_array::{RunArray, Int32Array, StringArray, ArrayRef,Array}; | ||
| /// # use arrow_array::types::Int32Type; | ||
| /// // A RunArray logically representing ["a", "a", "b", "c", "c"] | ||
| /// let run_ends = Int32Array::from(vec![2, 3, 5]); | ||
| /// let values: ArrayRef = Arc::new(StringArray::from(vec!["a", "b", "c"])); | ||
| /// let run_array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap(); | ||
| /// | ||
| /// // Swap in new values while keeping the same run pattern. | ||
| /// // The result logically represents ["x", "x", "y", "z", "z"]. | ||
| /// let new_values: ArrayRef = Arc::new(StringArray::from(vec!["x", "y", "z"])); | ||
| /// let new_run_array = run_array.with_values(new_values); | ||
| /// | ||
| /// assert_eq!(new_run_array.len(), 5); | ||
| /// assert_eq!(new_run_array.run_ends().values(), &[2, 3, 5]); | ||
| /// ``` | ||
| pub fn with_values(&self, values: ArrayRef) -> Self { | ||
| assert_eq!(values.len(), self.values().len()); | ||
| let (run_ends_field, values_field) = match &self.data_type { | ||
| DataType::RunEndEncoded(r, v) => (r, v), | ||
| _ => unreachable!("RunArray should have type RunEndEncoded"), | ||
| }; | ||
| let data_type = | ||
| DataType::RunEndEncoded(Arc::clone(run_ends_field), Arc::clone(values_field)); | ||
| Self { | ||
| data_type, | ||
| run_ends: self.run_ends.clone(), | ||
| values, | ||
| } | ||
| } | ||
|
|
||
| /// Similar to [`values`] but accounts for logical slicing, returning only the values | ||
| /// that are part of the logical slice of this array. | ||
| /// | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is unrelated to adding
with_values, right? It is adding REE support to date_part 🤔There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yea I noticed it while I was going through spots in the codebase that used this pattern. its was a pretty small change so I included it in this PR
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In general I would prefer separate small PRs (they do actually get reviewed and merged faster !)