-
Notifications
You must be signed in to change notification settings - Fork 5
feat: subchunk write order #160
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 4 commits
5ef4b1b
746f39c
2a59fc5
28cce28
037db6e
1338e49
46506a5
0ff7867
4dad7bb
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -18,10 +18,12 @@ use rayon::iter::{IntoParallelIterator, ParallelIterator}; | |
| use rayon_iter_concurrent_limit::iter_concurrent_limit; | ||
| use unsafe_cell_slice::UnsafeCellSlice; | ||
| use utils::is_whole_chunk; | ||
| use zarrs::array::codec::{ShardingCodecOptions, SubchunkWriteOrder}; | ||
| use zarrs::array::{ | ||
| ArrayBytes, ArrayBytesDecodeIntoTarget, ArrayBytesFixedDisjointView, ArrayMetadata, | ||
| ArrayPartialDecoderTraits, ArrayToBytesCodecTraits, CodecChain, CodecOptions, DataType, | ||
| FillValue, StoragePartialDecoder, copy_fill_value_into, update_array_bytes, | ||
| ArrayPartialDecoderTraits, ArrayToBytesCodecTraits, CodecChain, CodecOptions, | ||
| CodecSpecificOptions, DataType, FillValue, StoragePartialDecoder, copy_fill_value_into, | ||
| update_array_bytes, | ||
| }; | ||
| use zarrs::config::global_config; | ||
| use zarrs::convert::array_metadata_v2_to_v3; | ||
|
|
@@ -38,7 +40,7 @@ mod utils; | |
|
|
||
| use crate::concurrency::ChunkConcurrentLimitAndCodecOptions; | ||
| use crate::store::StoreConfig; | ||
| use crate::utils::{PyCodecErrExt, PyErrExt as _}; | ||
| use crate::utils::{PyCodecErrExt, PyErrExt as _, SubchunkWriteOrderWrapper}; | ||
|
|
||
| // TODO: Use a OnceLock for store with get_or_try_init when stabilised? | ||
| #[gen_stub_pyclass] | ||
|
|
@@ -218,6 +220,7 @@ impl CodecPipelineImpl { | |
| chunk_concurrent_maximum=None, | ||
| num_threads=None, | ||
| direct_io=false, | ||
| subchunk_write_order=SubchunkWriteOrderWrapper(SubchunkWriteOrder::Random), | ||
|
Comment on lines
215
to
+224
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I basically just got this working with the compiler but is it used since we have
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Neat, yeah that’s new! We could try migrating to it in a different PR. |
||
| ))] | ||
| #[new] | ||
| fn new( | ||
|
|
@@ -228,6 +231,7 @@ impl CodecPipelineImpl { | |
| chunk_concurrent_maximum: Option<usize>, | ||
| num_threads: Option<usize>, | ||
| direct_io: bool, | ||
| subchunk_write_order: SubchunkWriteOrderWrapper, | ||
| ) -> PyResult<Self> { | ||
| store_config.direct_io(direct_io); | ||
| let metadata = serde_json::from_str(array_metadata).map_py_err::<PyTypeError>()?; | ||
|
|
@@ -237,8 +241,16 @@ impl CodecPipelineImpl { | |
| } | ||
| ArrayMetadata::V3(v3) => Cow::Borrowed(v3), | ||
| }; | ||
| let codec_chain = | ||
| Arc::new(CodecChain::from_metadata(&metadata_v3.codecs).map_py_err::<PyTypeError>()?); | ||
| let codec_chain = Arc::new( | ||
| CodecChain::from_metadata(&metadata_v3.codecs) | ||
| .map_py_err::<PyTypeError>()? | ||
| .with_codec_specific_options( | ||
| &CodecSpecificOptions::default().with_option( | ||
| ShardingCodecOptions::default() | ||
| .with_subchunk_write_order(subchunk_write_order.0), | ||
| ), | ||
| ), | ||
| ); | ||
| let codec_options = CodecOptions::default().with_validate_checksums(validate_checksums); | ||
|
|
||
| let chunk_concurrent_minimum = | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,7 +1,10 @@ | ||
| use std::fmt::Display; | ||
|
|
||
| use pyo3::{PyErr, PyResult, PyTypeInfo}; | ||
| use zarrs::array::CodecError; | ||
| use pyo3::{ | ||
| Borrowed, Bound, FromPyObject, IntoPyObject, PyAny, PyErr, PyResult, PyTypeInfo, Python, | ||
| exceptions::PyValueError, types::PyString, | ||
| }; | ||
| use zarrs::array::{CodecError, codec::SubchunkWriteOrder}; | ||
|
|
||
| use crate::ChunkItem; | ||
|
|
||
|
|
@@ -41,3 +44,42 @@ pub fn is_whole_chunk(item: &ChunkItem) -> bool { | |
| item.chunk_subset.start().iter().all(|&o| o == 0) | ||
| && item.chunk_subset.shape() == bytemuck::must_cast_slice::<_, u64>(&item.shape) | ||
| } | ||
|
|
||
| #[derive(Debug, Clone)] | ||
| pub struct SubchunkWriteOrderWrapper(pub SubchunkWriteOrder); | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So this is requeired because of the orphan rule right?
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
So went my thinking, although I think
Is that different than what we have here?
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah, it’d be a trait instead of a type, which would let us use the unwrapped type. But as said, probably doesn’t work. |
||
|
|
||
| impl<'py> IntoPyObject<'py> for SubchunkWriteOrderWrapper { | ||
| type Target = PyString; | ||
| type Output = Bound<'py, PyString>; | ||
| type Error = PyErr; | ||
|
|
||
| fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> { | ||
| match self.0 { | ||
| SubchunkWriteOrder::C => Ok("C".into_pyobject(py)?), | ||
| SubchunkWriteOrder::Random => Ok("random".into_pyobject(py)?), | ||
| _ => Err(PyValueError::new_err( | ||
| "Unrecognized subchunk write order for converting to python object, only `C` and `random` allowed.", | ||
| )), | ||
| } | ||
| } | ||
| } | ||
|
|
||
| impl<'py> FromPyObject<'_, 'py> for SubchunkWriteOrderWrapper { | ||
| type Error = PyErr; | ||
|
|
||
| fn extract(option: Borrowed<'_, 'py, PyAny>) -> PyResult<SubchunkWriteOrderWrapper> { | ||
| match option.extract::<&str>()? { | ||
| "C" => Ok(SubchunkWriteOrderWrapper(SubchunkWriteOrder::C)), | ||
| "random" => Ok(SubchunkWriteOrderWrapper(SubchunkWriteOrder::Random)), | ||
| _ => Err(PyValueError::new_err( | ||
| "Unrecognized subchunk write order while extracting to rust, only `C` and `random` allowed.", | ||
| )), | ||
| } | ||
| } | ||
| } | ||
|
|
||
| impl pyo3_stub_gen::PyStubType for SubchunkWriteOrderWrapper { | ||
| fn type_output() -> pyo3_stub_gen::TypeInfo { | ||
| pyo3_stub_gen::TypeInfo::builtin("str") | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Perhaps a bit cleaner for type hints: what about class SubchunkWriteOrder(StrEnum):
C = "C"
random = "random"
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
So users couldn’t do There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit but i find
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I didn't even try to check if the
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Wow I didn't realize |
||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
maybe add a bit of reasoning why one would be chosen over the other.