Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 10 additions & 19 deletions components/experimental/src/compactdecimal/provider.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,10 @@
//! Read more about data providers: [`icu_provider`]

use icu_pattern::SinglePlaceholderPattern;
use icu_plurals::provider::PluralElementsPackedULE;
use icu_plurals::provider::{FourBitMetadata, PluralElementsPackedULE};
use icu_provider::prelude::*;
use zerovec::ule::vartuple::VarTupleULE;
use zerovec::ule::{to_sized_varule_bytes, SizedVarULEBytes};
use zerovec::VarZeroVec;

#[cfg(feature = "compiled_data")]
Expand Down Expand Up @@ -71,9 +72,13 @@ pub struct CompactDecimalPatternData<'data> {

impl CompactDecimalPatternData<'_> {
/// The pattern `0`, which is used for low magnitudes and omitted from the data struct.
// Safety: the integrity of the VarULE is enforced in validate_plural_pattern_0_map
pub const PLURAL_PATTERN_0: &'static PluralElementsPackedULE<SinglePlaceholderPattern> =
unsafe { PluralElementsPackedULE::from_bytes_unchecked(&[0, 1]) };
pub const PLURAL_PATTERN_0: SizedVarULEBytes<
2,
PluralElementsPackedULE<SinglePlaceholderPattern>,
> = PluralElementsPackedULE::new_mn(
FourBitMetadata::zero(),
to_sized_varule_bytes!(SinglePlaceholderPattern::PASS_THROUGH),
);

pub(crate) fn patterns_and_exponent_for_magnitude(
&self,
Expand All @@ -84,22 +89,8 @@ impl CompactDecimalPatternData<'_> {
.filter(|t| i16::from(t.sized) <= magnitude)
.last()
.map(|t| (&t.variable, t.sized - t.variable.get_default().0.get()))
.unwrap_or((Self::PLURAL_PATTERN_0, 0))
.unwrap_or((Self::PLURAL_PATTERN_0.as_varule(), 0))
}
}

#[test]
fn validate_plural_pattern_0_map() {
use icu_plurals::{provider::FourBitMetadata, PluralElements};
use zerovec::ule::encode_varule_to_box;

assert_eq!(
CompactDecimalPatternData::PLURAL_PATTERN_0,
&*encode_varule_to_box(&PluralElements::new((
FourBitMetadata::try_from_byte(0).unwrap(),
SinglePlaceholderPattern::PASS_THROUGH
)))
);
}

icu_provider::data_struct!(CompactDecimalPatternData<'_>, #[cfg(feature = "datagen")]);
6 changes: 6 additions & 0 deletions components/pattern/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,12 @@ impl SinglePlaceholderPattern {
/// ```
pub const PASS_THROUGH: &'static SinglePlaceholderPattern =
SinglePlaceholderPattern::from_ref_store_unchecked("\x01");

#[doc(hidden)] // for macro to_sized_varule_bytes
pub const fn as_bytes(&self) -> &[u8] {
// TODO: Add safety note
self.store.as_bytes()
}
}

/// # Examples
Expand Down
90 changes: 89 additions & 1 deletion components/plurals/src/provider.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,11 @@ use yoke::Yokeable;
use zerofrom::ZeroFrom;
use zerovec::ule::vartuple::VarTuple;
use zerovec::ule::vartuple::VarTupleULE;
use zerovec::ule::AsULE;
use zerovec::ule::EncodeAsVarULE;
use zerovec::ule::UleError;
use zerovec::ule::VarULE;
use zerovec::ule::ULE;
use zerovec::ule::{AsULE, SizedVarULEBytes};
use zerovec::VarZeroSlice;

pub mod rules;
Expand Down Expand Up @@ -482,6 +482,94 @@ where
core::mem::transmute(bytes)
}

/// Creates a [`PluralElementsPackedULE`] with an "other" variant in a const context.
///
/// Const parameters:
///
/// - `M`: the length of `input`
/// - `N`: the length of the return value which is `M + 1`
///
/// When [generic_const_exprs] is stabilized, we will be able to add a new
/// function signature without both const parameters.
///
/// # Panics
///
/// Panics if N != M + 1.
///
/// # Examples
///
/// ```
/// use icu::plurals::provider::PluralElementsPackedULE;
/// use icu::plurals::provider::FourBitMetadata;
/// use icu::plurals::PluralRules;
/// use icu::locale::locale;
/// use zerovec::ule::SizedVarULEBytes;
///
/// let value = "hello, world!"; // 13 bytes long
Copy link
Copy Markdown
Member

@Manishearth Manishearth Jan 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

issue: this isn't a motivating example: this doesn't work in const anyway (since there's try_from_encodeable)

(It's an example, but not a motivating one)

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would work in const if I replaced try_from_encodeable with new_unchecked, I just wanted to not have unsafe code in the docs test

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, that's fine, I was just saying that the example itself wasn't motivating the new zerovec API.

But I have a clearer idea of what's going on now.

/// let metadata = FourBitMetadata::try_from_byte(11).unwrap();
/// let inner_ule = SizedVarULEBytes::<13, str>::try_from_encodeable(value).unwrap();
/// let plural_ule = PluralElementsPackedULE::new_mn::<_, 14>(metadata, inner_ule);
/// let rules = PluralRules::try_new(locale!("en").into(), Default::default()).unwrap();
///
/// assert_eq!(plural_ule.as_varule().get(0.into(), &rules), (metadata, "hello, world!"));
/// assert_eq!(plural_ule.as_varule().get(1.into(), &rules), (metadata, "hello, world!"));
/// assert_eq!(plural_ule.as_varule().get(2.into(), &rules), (metadata, "hello, world!"));
/// ```
///
/// In a const context:
///
/// ```
/// use icu::plurals::provider::PluralElementsPackedULE;
/// use icu::plurals::provider::FourBitMetadata;
/// use icu::plurals::PluralRules;
/// use icu::locale::locale;
/// use zerovec::ule::SizedVarULEBytes;
///
/// const metadata: FourBitMetadata = FourBitMetadata::zero();
/// let plural_ule = const {
/// PluralElementsPackedULE::new_mn::<_, 1>(metadata, SizedVarULEBytes::EMPTY_STR)
/// };
///
/// let rules = PluralRules::try_new(locale!("en").into(), Default::default()).unwrap();
///
/// assert_eq!(plural_ule.as_varule().get(0.into(), &rules), (metadata, ""));
/// assert_eq!(plural_ule.as_varule().get(1.into(), &rules), (metadata, ""));
/// assert_eq!(plural_ule.as_varule().get(2.into(), &rules), (metadata, ""));
/// ```
///
/// [generic_const_exprs]: https://doc.rust-lang.org/beta/unstable-book/language-features/generic-const-exprs.html#generic_const_exprs
pub const fn new_mn<const M: usize, const N: usize>(
metadata: FourBitMetadata,
input: SizedVarULEBytes<M, V>,
) -> SizedVarULEBytes<N, PluralElementsPackedULE<V>> {
#[allow(clippy::panic)] // for safety, and documented
if N != M + 1 {
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

issue: this should be a const assertion, not a runtime panic

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please suggest code that compiles with a const assertion.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if you can't do it then we shouldn't have this yet

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hm? We have lots of const code with runtime assertions because there isn't a good way to write const assertions.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Even the Rust standard library does runtime panics on const parameters. For example:

https://doc.rust-lang.org/std/primitive.slice.html#method.as_chunks

slice::as_chunks panics if the const parameter N is zero. There was a lengthy discussion on the proposal thread, starting here: rust-lang/rust#74985 (comment)

panic!(concat!(
"new_mn: N (",
stringify!(N),
") != 1 + M (",
stringify!(M),
")"
));
}
let mut bytes = [0u8; N];
#[allow(clippy::unwrap_used)] // the bytes are nonempty because N > 0
let (start, remainder) = bytes.split_first_mut().unwrap();
// TODO(1.87): use copy_from_slice
let mut i = 0;
#[allow(clippy::indexing_slicing)] // both remainder and input are length M
while i < M {
remainder[i] = input.as_bytes()[i];
i += 1;
}
// First byte = 0...mmmm for a singleton
*start = metadata.get();
// Safety: bytes are a valid representation of this type:
// 1. The first bit is 0 which indicates a singleton
// 2. The remainder is a valid V by invariant of the input parameter
unsafe { SizedVarULEBytes::new_unchecked(bytes) }
}

/// Returns a tuple with:
/// 1. The lead byte
/// 2. Bytes corresponding to the default V
Expand Down
181 changes: 181 additions & 0 deletions utils/zerovec/src/ule/fixed_length.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

use crate::ule::{EncodeAsVarULE, UleError, VarULE, ULE};
use core::fmt;
use core::marker::PhantomData;
use core::ops::Deref;

/// A container for a [`VarULE`] with a fixed byte length.
///
/// This container may be useful if the length of your VarULE is known at compile-time.
///
/// To construct one of these in a const context, consider [`to_sized_varule_bytes!`].
///
/// # Examples
///
/// ```
/// use zerovec::ule::SizedVarULEBytes;
/// use zerovec::ule::to_sized_varule_bytes;
///
/// let from_constructor = SizedVarULEBytes::<13, str>::from_varule("hello, world!").unwrap();
/// let from_macro = to_sized_varule_bytes!("hello, world!");
///
/// assert_eq!(&*from_constructor, "hello, world!");
/// assert_eq!(&*from_macro, "hello, world!");
/// ```
#[derive(Copy, Clone, PartialEq, Eq)]
pub struct SizedVarULEBytes<const N: usize, V: VarULE + ?Sized> {
/// Invariant: The bytes MUST be a valid VarULE representation of `V`.
bytes: [u8; N],
_marker: PhantomData<V>,
}

impl<const N: usize, V: VarULE + ?Sized> SizedVarULEBytes<N, V> {
/// Creates one of these from an [`EncodeAsVarULE`].
///
/// Returns an error if the byte length in the container is not the correct length
/// for the encodeable object.
///
/// # Examples
///
/// ```
/// use zerovec::ule::SizedVarULEBytes;
///
/// let container = SizedVarULEBytes::<13, str>::try_from_encodeable("hello, world!").unwrap();
///
/// assert_eq!(&*container, "hello, world!");
///
/// // Returns an error if the container is not the correct size:
/// SizedVarULEBytes::<20, str>::try_from_encodeable("hello, world!").unwrap_err();
/// ```
pub fn try_from_encodeable(input: impl EncodeAsVarULE<V>) -> Result<Self, UleError> {
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
pub fn try_from_encodeable(input: impl EncodeAsVarULE<V>) -> Result<Self, UleError> {
pub fn try_from_encodeable(input: &impl EncodeAsVarULE<V>) -> Result<Self, UleError> {

?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure the recommend style here. It doesn't matter if the trait is implemented on a reference. @Manishearth ?

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think this is style choice. you only test this with a &str input, but say you have a String, you don't want to accidentally pass that by value.

let len = input.encode_var_ule_len();
if len != N {
return Err(UleError::length::<V>(len));
}
let mut bytes = [0u8; N];
input.encode_var_ule_write(&mut bytes);
// Safety: the bytes were just written from an EncodeAsVarULE impl
unsafe { Ok(Self::new_unchecked(bytes)) }
}

/// Creates one of these from a [`VarULE`].
///
/// Returns an error if the byte length in the container is not the correct length
/// for the encodeable object.
pub fn from_varule(input: &V) -> Result<Self, UleError> {
let src = input.as_bytes();
let len = src.len();
if len != N {
return Err(UleError::length::<V>(len));
}
let mut bytes = [0u8; N];
bytes.copy_from_slice(src);
// Safety: the bytes were just copied from V
unsafe { Ok(Self::new_unchecked(bytes)) }
}

/// Creates one of these directly from bytes.
///
/// # Safety
///
/// The bytes MUST be a valid VarULE representation of `V`.
pub const unsafe fn new_unchecked(bytes: [u8; N]) -> Self {
Self {
bytes,
_marker: PhantomData,
}
}

#[doc(hidden)] // macro constructor
pub const unsafe fn new_unchecked_with_type_hint(bytes: [u8; N], _hint: &V) -> Self {
Self::new_unchecked(bytes)
}

/// Returns the bytes backing this [`SizedVarULEBytes`], which are
/// guaranteed to be a valid VarULE representation of `V`.
pub const fn as_bytes(&self) -> &[u8; N] {
&self.bytes
}

/// Returns the container as an instance of `V`.
pub fn as_varule(&self) -> &V {
debug_assert!(V::validate_bytes(&self.bytes).is_ok());
// Safety: self.bytes are a valid VarULE representation of `V`.
unsafe { V::from_bytes_unchecked(&self.bytes) }
}
}

impl<const N: usize, V: VarULE + ?Sized> fmt::Debug for SizedVarULEBytes<N, V>
where
V: fmt::Debug,
{
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.as_varule().fmt(f)
}
}

impl<const N: usize, V: VarULE + ?Sized> AsRef<V> for SizedVarULEBytes<N, V> {
fn as_ref(&self) -> &V {
self.as_varule()
}
}

impl<const N: usize, V: VarULE + ?Sized> Deref for SizedVarULEBytes<N, V> {
type Target = V;
fn deref(&self) -> &Self::Target {
self.as_varule()
}
}

impl SizedVarULEBytes<0, str> {
/// The empty string as a [`SizedVarULEBytes`].
// Safety: the empty slice is a valid str
pub const EMPTY_STR: Self = unsafe { Self::new_unchecked([]) };
}

impl<T: ULE> SizedVarULEBytes<0, [T]> {
/// The empty slice as a [`SizedVarULEBytes`].
// Safety: the empty slice is a valid str
pub const EMPTY_SLICE: Self = unsafe { Self::new_unchecked([]) };
}

/// Takes a const expression resolving to a [`VarULE`] and returns one
/// resolving to an appropriately sized [`SizedVarULEBytes`].
///
/// The expression is inserted twice into code, once for evaluation and once
/// for the type hint only. If this is a problem, save the expression into a
/// const variable first.
///
/// # Examples
///
/// ```
/// use zerovec::ule::SizedVarULEBytes;
/// use zerovec::ule::to_sized_varule_bytes;
///
/// let stack_str = const { to_sized_varule_bytes!("hello, world!") };
/// assert_eq!(&*stack_str, "hello, world!");
/// ```
#[macro_export]
#[doc(hidden)] // macro
macro_rules! __to_sized_varule_bytes {
($expr:expr) => {{
const SRC: &[u8] = { $expr }.as_bytes();
const N: usize = SRC.len();
let mut bytes: [u8; N] = [0; N];
// TODO(1.87): use copy_from_slice
let mut i = 0;
#[allow(clippy::indexing_slicing)] // both bytes and SRC are length N
while i < N {
bytes[i] = SRC[i];
i += 1;
}
// Safety: `bytes` is a valid representation of input by the VarULE
// trait bound on SizedVarULEBytes below
unsafe { SizedVarULEBytes::new_unchecked_with_type_hint(bytes, { $expr }) }
}};
}
#[doc(inline)]
pub use __to_sized_varule_bytes as to_sized_varule_bytes;
4 changes: 4 additions & 0 deletions utils/zerovec/src/ule/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ mod chars;
#[cfg(doc)]
pub mod custom;
mod encode;
mod fixed_length;
mod macros;
mod multi;
mod niche;
Expand All @@ -30,6 +31,7 @@ pub use chars::CharULE;
#[cfg(feature = "alloc")]
pub use encode::encode_varule_to_box;
pub use encode::EncodeAsVarULE;
pub use fixed_length::{to_sized_varule_bytes, SizedVarULEBytes};
pub use multi::MultiFieldsULE;
pub use niche::{NicheBytes, NichedOption, NichedOptionULE};
pub use option::{OptionULE, OptionVarULE};
Expand Down Expand Up @@ -441,6 +443,8 @@ impl UleError {
}

/// Construct an "invalid length" error for the given type and length
///
/// The length is of the input bytes, not the expected length.
pub fn length<T: ?Sized + 'static>(len: usize) -> UleError {
UleError::InvalidLength {
ty: any::type_name::<T>(),
Expand Down