From 5482960ca7764ac2e19f2a636f8ddc61b1cca3a2 Mon Sep 17 00:00:00 2001 From: Paul Dicker Date: Fri, 9 Jun 2023 19:39:39 +0200 Subject: [PATCH 1/8] Fix parsing of offset with colon and optional minutes --- src/format/scan.rs | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/format/scan.rs b/src/format/scan.rs index 98f3673ff..a136f70ef 100644 --- a/src/format/scan.rs +++ b/src/format/scan.rs @@ -257,7 +257,14 @@ where s = &s[2..]; // colons (and possibly other separators) - s = consume_colon(s)?; + match (allow_missing_minutes, consume_colon(s)) { + (false, Err(e)) => return Err(e), + (true, Err(_)) => { + let seconds = hours * 3600; + return Ok((s, if negative { -seconds } else { seconds })); + } + (_, Ok(s_new)) => s = s_new, + } // minutes (00--59) // if the next two items are digits then we have to add minutes From f25c6768e4bbc22ea05f8a2d06e0dd20c91da6dc Mon Sep 17 00:00:00 2001 From: Paul Dicker Date: Fri, 9 Jun 2023 09:43:57 +0200 Subject: [PATCH 2/8] Add ISO 8601 date parser --- src/format/mod.rs | 2 + src/format/parse.rs | 2 +- src/format/parse_iso8601.rs | 185 ++++++++++++++++++++++++++++++++++++ src/naive/date.rs | 47 ++++++++- 4 files changed, 231 insertions(+), 5 deletions(-) create mode 100644 src/format/parse_iso8601.rs diff --git a/src/format/mod.rs b/src/format/mod.rs index d22cdf071..0142a0e1f 100644 --- a/src/format/mod.rs +++ b/src/format/mod.rs @@ -44,6 +44,7 @@ mod parsed; // due to the size of parsing routines, they are in separate modules. mod parse; +pub(crate) mod parse_iso8601; pub(crate) mod scan; pub mod strftime; @@ -71,6 +72,7 @@ pub use locales::Locale; pub(crate) use locales::Locale; pub(crate) use parse::parse_rfc3339; pub use parse::{parse, parse_and_remainder}; +pub(crate) use parse_iso8601::parse_iso8601_date; pub use parsed::Parsed; pub use strftime::StrftimeItems; diff --git a/src/format/parse.rs b/src/format/parse.rs index 418fa424e..9b61d74f3 100644 --- a/src/format/parse.rs +++ b/src/format/parse.rs @@ -27,7 +27,7 @@ fn set_weekday_with_num_days_from_sunday(p: &mut Parsed, v: i64) -> ParseResult< }) } -fn set_weekday_with_number_from_monday(p: &mut Parsed, v: i64) -> ParseResult<()> { +pub(super) fn set_weekday_with_number_from_monday(p: &mut Parsed, v: i64) -> ParseResult<()> { p.set_weekday(match v { 1 => Weekday::Mon, 2 => Weekday::Tue, diff --git a/src/format/parse_iso8601.rs b/src/format/parse_iso8601.rs new file mode 100644 index 000000000..141bbdde9 --- /dev/null +++ b/src/format/parse_iso8601.rs @@ -0,0 +1,185 @@ +use super::parse::set_weekday_with_number_from_monday; +use super::scan; +use super::{ParseResult, Parsed, TOO_SHORT}; + +#[derive(Copy, Clone, PartialEq, Eq)] +pub(crate) enum Iso8601Format { + Basic, + Extended, + Unknown, +} + +/// The ISO 8601 date format is a combination of 12 different date formats: +/// +/// | | calendar date | ordinal date | week date | +/// |-------------------------------------------|---------------|--------------|--------------| +/// | basic format | YYYYMMDD | YYYYDDD | YYYYWwwD | +/// | extended format | YYYY-MM-DD | YYYY-DDD | YYYY-Www-D | +/// | basic format, expanded representations | ±Y̲YYYYMMDD | ±Y̲YYYYDDD | ±Y̲YYYYWwwD | +/// | extended format, expanded representations | ±Y̲YYYY-MM-DD | ±Y̲YYYY-DDD | ±Y̲YYYY-Www-D | +/// +/// Returns `(remainder, Iso8601Format)`. +//// - The ISO 8601 format of the date is returned so the calling function can check it matches the +/// format of a time component (basic or extended format). +pub(crate) fn parse_iso8601_date<'a>( + parsed: &mut Parsed, + mut s: &'a str, +) -> ParseResult<(&'a str, Iso8601Format)> { + macro_rules! try_consume { + ($e:expr) => {{ + let (s_, v) = $e?; + s = s_; + v + }}; + } + + let year = try_consume!(parse_iso8601_year(s)); + + let extended_format = s.as_bytes().first() == Some(&b'-'); + if extended_format { + s = &s[1..]; + } + + if s.as_bytes().first() == Some(&b'W') { + // Week date. Basic format: `WwwD`. Extended format: `Www-D`. + parsed.set_isoyear(year)?; + parsed.set_isoweek(try_consume!(scan::number(&s[1..], 2, 2)))?; + if extended_format { + s = scan::char(s, b'-')?; + } + set_weekday_with_number_from_monday(parsed, try_consume!(scan::number(s, 1, 1)))?; + } else { + parsed.set_year(year)?; + let digits = s.as_bytes().iter().take_while(|c| c.is_ascii_digit()).count(); + if digits == 3 { + // Week date. Format: `DDD` + parsed.set_ordinal(try_consume!(scan::number(s, 3, 3)))?; + } else { + // Calendar date. Basic format: `MMDD`. Extended format: `MM-DD`. + parsed.set_month(try_consume!(scan::number(s, 2, 2)))?; + if extended_format { + s = scan::char(s, b'-')?; + } + parsed.set_day(try_consume!(scan::number(s, 2, 2)))?; + } + } + let format = if extended_format { Iso8601Format::Extended } else { Iso8601Format::Basic }; + Ok((s, format)) +} + +fn parse_iso8601_year(mut s: &str) -> ParseResult<(&str, i64)> { + match s.as_bytes().first() { + Some(sign) if sign == &b'-' || sign == &b'+' => { + // expanded representation + let negative = sign == &b'-'; + s = &s[1..]; + let mut digits = s.as_bytes().iter().take_while(|c| c.is_ascii_digit()).count(); + if let Some(&b'-' | &b'W') = s.as_bytes().get(digits) { + // The date format is either an extended format with `-` as seperator between date + // fields, or it is a week date in basic format. In both cases all counted digits + // belong to the year. + if digits < 4 { + return Err(TOO_SHORT); + } + } else if digits == 7 { + digits -= 3; // must be the format ±YYYYDDD + } else if digits > 7 { + // The basic format with expanded representation of a calendar date (±Y̲YYYYMMDD) + // and ordinal date (±Y̲YYYYDDD) are ambiguous. In this case we assume a calendar + // date, where the last 4 digits are for the month and day. + digits -= 4; + } else { + return Err(TOO_SHORT); + } + let (s, year) = scan::number(s, 4, digits)?; + Ok((s, if negative { -year } else { year })) + } + Some(_) => scan::number(s, 4, 4), + None => Err(TOO_SHORT), + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::format::INVALID; + use crate::NaiveDate; + + #[test] + fn test_parse_iso8601_date() { + use crate::Weekday::Fri; + fn parse(s: &str) -> ParseResult<(NaiveDate, &str)> { + let mut parsed = Parsed::new(); + let (s, _) = parse_iso8601_date(&mut parsed, s)?; + parsed.to_naive_date().map(|d| (d, s)) + } + + // calendar date, basic format + assert_eq!(parse("20230609 "), Ok((NaiveDate::from_ymd_opt(2023, 6, 9).unwrap(), " "))); + // calendar date, extended format + assert_eq!(parse("2023-06-09 "), Ok((NaiveDate::from_ymd_opt(2023, 6, 9).unwrap(), " "))); + // calendar date, basic format, expanded representation + assert_eq!(parse("-20230609 "), Ok((NaiveDate::from_ymd_opt(-2023, 6, 9).unwrap(), " "))); + assert_eq!(parse("+20230609 "), Ok((NaiveDate::from_ymd_opt(2023, 6, 9).unwrap(), " "))); + assert_eq!(parse("+020230609 "), Ok((NaiveDate::from_ymd_opt(2023, 6, 9).unwrap(), " "))); + assert_eq!(parse("+120230609 "), Ok((NaiveDate::from_ymd_opt(12023, 6, 9).unwrap(), " "))); + // calendar date, extended format, expanded representation + assert_eq!(parse("-2023-06-09 "), Ok((NaiveDate::from_ymd_opt(-2023, 6, 9).unwrap(), " "))); + assert_eq!(parse("+2023-06-09 "), Ok((NaiveDate::from_ymd_opt(2023, 6, 9).unwrap(), " "))); + assert_eq!(parse("+02023-06-09 "), Ok((NaiveDate::from_ymd_opt(2023, 6, 9).unwrap(), " "))); + assert_eq!(parse("+12023-06-09"), Ok((NaiveDate::from_ymd_opt(12023, 6, 9).unwrap(), ""))); + // mixed basic and extended format + assert_eq!(parse("2023-0609 "), Err(INVALID)); + assert_eq!(parse("202306-09 "), Err(INVALID)); + assert_eq!(parse("-2023-0609 "), Err(INVALID)); + // No padding + assert_eq!(parse("2023-6-09 "), Err(INVALID)); + assert_eq!(parse("2023-06-9 "), Err(INVALID)); + assert_eq!(parse("23-06-09 "), Err(INVALID)); + + // ordinal date, basic format + assert_eq!(parse("2023160 "), Ok((NaiveDate::from_yo_opt(2023, 160).unwrap(), " "))); + // ordinal date, extended format + assert_eq!(parse("2023-160 "), Ok((NaiveDate::from_yo_opt(2023, 160).unwrap(), " "))); + // ordinal date, basic format, expanded representation + assert_eq!(parse("-2023160 "), Ok((NaiveDate::from_yo_opt(-2023, 160).unwrap(), " "))); + assert_eq!(parse("+2023160 "), Ok((NaiveDate::from_yo_opt(2023, 160).unwrap(), " "))); + // ordinal date, extended format, expanded representation + assert_eq!(parse("-2023-160 "), Ok((NaiveDate::from_yo_opt(-2023, 160).unwrap(), " "))); + assert_eq!(parse("+2023-160 "), Ok((NaiveDate::from_yo_opt(2023, 160).unwrap(), " "))); + assert_eq!(parse("+02023-160 "), Ok((NaiveDate::from_yo_opt(2023, 160).unwrap(), " "))); + assert_eq!(parse("+12023-160 "), Ok((NaiveDate::from_yo_opt(12023, 160).unwrap(), " "))); + // ambiguous, interpreted as calendar date + assert!(parse("+02023160 ").is_err()); + assert!(parse("+12023160 ").is_err()); + // No padding + assert_eq!(parse("2023-16 "), Err(INVALID)); + assert_eq!(parse("2023-1 "), Err(INVALID)); + assert_eq!(parse("23-160 "), Err(INVALID)); + + let from_isoywd_opt = NaiveDate::from_isoywd_opt; + // week date, basic format + assert_eq!(parse("2023W235 "), Ok((from_isoywd_opt(2023, 23, Fri).unwrap(), " "))); + // week date, extended format + assert_eq!(parse("2023-W23-5 "), Ok((from_isoywd_opt(2023, 23, Fri).unwrap(), " "))); + // week date, basic format, expanded representation + assert_eq!(parse("-2023W235 "), Ok((from_isoywd_opt(-2023, 23, Fri).unwrap(), " "))); + assert_eq!(parse("+2023W235 "), Ok((from_isoywd_opt(2023, 23, Fri).unwrap(), " "))); + assert_eq!(parse("+02023W235 "), Ok((from_isoywd_opt(2023, 23, Fri).unwrap(), " "))); + assert_eq!(parse("+12023W235 "), Ok((from_isoywd_opt(12023, 23, Fri).unwrap(), " "))); + // calendar date, extended format, expanded representation + assert_eq!(parse("-2023-W23-5 "), Ok((from_isoywd_opt(-2023, 23, Fri).unwrap(), " "))); + assert_eq!(parse("+2023-W23-5 "), Ok((from_isoywd_opt(2023, 23, Fri).unwrap(), " "))); + assert_eq!(parse("+02023-W23-5 "), Ok((from_isoywd_opt(2023, 23, Fri).unwrap(), " "))); + assert_eq!(parse("+12023-W23-5 "), Ok((from_isoywd_opt(12023, 23, Fri).unwrap(), " "))); + // mixed basic and extended format + assert_eq!(parse("2023-W235 "), Err(INVALID)); + assert_eq!(parse("202306-W235 "), Err(INVALID)); + assert_eq!(parse("-2023-W235 "), Err(INVALID)); + // No padding + assert_eq!(parse("2023-W25 "), Err(INVALID)); + assert_eq!(parse("23-W23-5 "), Err(INVALID)); + // Year is interpreted as `iso_year` + assert_eq!(parse("2022-W52-7 "), Ok((NaiveDate::from_ymd_opt(2023, 1, 1).unwrap(), " "))); + } +} diff --git a/src/naive/date.rs b/src/naive/date.rs index 677c5fa6f..765a17d8f 100644 --- a/src/naive/date.rs +++ b/src/naive/date.rs @@ -19,10 +19,9 @@ use pure_rust_locales::Locale; use crate::duration::Duration as OldDuration; #[cfg(any(feature = "alloc", feature = "std"))] use crate::format::DelayedFormat; -use crate::format::{ - parse, parse_and_remainder, write_hundreds, Item, Numeric, Pad, ParseError, ParseResult, - Parsed, StrftimeItems, -}; +use crate::format::{parse, parse_and_remainder, parse_iso8601_date, write_hundreds}; +use crate::format::{Item, Numeric, Pad, StrftimeItems}; +use crate::format::{ParseError, ParseResult, Parsed}; use crate::month::Months; use crate::naive::{IsoWeek, NaiveDateTime, NaiveTime}; use crate::{expect, try_opt}; @@ -607,6 +606,46 @@ impl NaiveDate { parsed.to_naive_date().map(|d| (d, remainder)) } + /// Parses an ISO 8601 date string into a `NaiveDate` value. + /// + /// ISO 8601 allows representing values in a wide range of formats. See below for some examples. + /// + /// # Example + /// + /// ``` + /// # use chrono::{NaiveDate, Weekday}; + /// // calendar date, basic format + /// assert_eq!( + /// NaiveDate::parse_from_iso8601("20230609").unwrap(), + /// (NaiveDate::from_ymd_opt(2023, 6, 9).unwrap(), "") + /// ); + /// // calendar date, extended format + /// assert_eq!( + /// NaiveDate::parse_from_iso8601("2023-06-09").unwrap(), + /// (NaiveDate::from_ymd_opt(2023, 6, 9).unwrap(), "") + /// ); + /// // ordinal date, basic format + /// assert_eq!( + /// NaiveDate::parse_from_iso8601("2023160").unwrap(), + /// (NaiveDate::from_yo_opt(2023, 160).unwrap(), "") + /// ); + /// // week date, extended format + /// assert_eq!( + /// NaiveDate::parse_from_iso8601("2023-W23-5").unwrap(), + /// (NaiveDate::from_isoywd_opt(2023, 23, Weekday::Fri).unwrap(), "") + /// ); + /// // calendar date, extended format, expanded representation + /// assert_eq!( + /// NaiveDate::parse_from_iso8601("+12023-06-09").unwrap(), + /// (NaiveDate::from_ymd_opt(12023, 6, 9).unwrap(), "") + /// ); + /// ``` + pub fn parse_from_iso8601(s: &str) -> ParseResult<(NaiveDate, &str)> { + let mut parsed = Parsed::new(); + let (remainder, _) = parse_iso8601_date(&mut parsed, s)?; + parsed.to_naive_date().map(|d| (d, remainder)) + } + /// Add a duration in [`Months`] to the date /// /// Uses the last day of the month if the day does not exist in the resulting month. From 8aa2db946d2c4161841d783207be3564808a8f4e Mon Sep 17 00:00:00 2001 From: Paul Dicker Date: Fri, 9 Jun 2023 19:50:30 +0200 Subject: [PATCH 3/8] Add helper type for parsing fractions --- src/format/parse_iso8601.rs | 83 +++++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) diff --git a/src/format/parse_iso8601.rs b/src/format/parse_iso8601.rs index 141bbdde9..fc32e3ba1 100644 --- a/src/format/parse_iso8601.rs +++ b/src/format/parse_iso8601.rs @@ -99,6 +99,74 @@ fn parse_iso8601_year(mut s: &str) -> ParseResult<(&str, i64)> { } } +/// Helper type for parsing fractional numbers. +/// +/// The fractions is stored as an integer in the range 0..=10^15. +/// With this limit `10^15 * 3600` fits in an `u64` without overflow. +/// +// We don't use `f64` to support targets that may not have floating point support. +struct Fraction(u64); + +impl Fraction { + /// Supported formats are `,fraction` and `.fraction`. + /// `fraction` can have an unlimited length. We only keep the first 15 digits, and look at the + /// 16th digit for correct rounding. + fn parse(mut s: &str) -> Option<(&str, Self)> { + s = match s.as_bytes().first() { + Some(&b',' | &b'.') => &s[1..], + _ => return None, + }; + let digits_in_fraction = s.as_bytes().iter().take_while(|c| c.is_ascii_digit()).count(); + let mut fraction = scan::number(s, 1, 15).map(|(_, f)| f).ok()? as u64; + if digits_in_fraction <= 15 { + fraction *= POW10[15 - digits_in_fraction]; + } else if s.as_bytes()[15] >= b'5' { + fraction += 1; + } + s = &s[digits_in_fraction..]; + Some((s, Fraction(fraction))) + } + + /// Returns the result of multiplying this `Fraction` with `unit`. + /// + /// Rounds to the nearest integer. + fn mul(&self, unit: u64) -> i64 { + assert!(unit <= 3600); // assumption to prevent overflow later. + ((self.0 * unit + (POW10[15] / 2 - 1)) / POW10[15]) as i64 + } + + /// Returns the result of multiplying this `Fraction` with `unit`. + /// + /// Returns two integers to represent the whole number and the fraction as nanos. + fn mul_with_nanos(&self, unit: u64) -> (i64, i64) { + assert!(unit <= 3600); // assumption to prevent overflow later. + let div = POW10[15 - 9]; + let huge = self.0 * unit + (div / 2 - 1); + let whole = huge / POW10[15]; + let fraction_as_nanos = (huge % POW10[15]) / div; + (whole as i64, fraction_as_nanos as i64) + } +} + +const POW10: [u64; 16] = [ + 1, // unused, for easy indexing + 10, + 100, + 1_000, + 10_000, + 100_000, + 1_000_000, + 10_000_000, + 100_000_000, + 1_000_000_000, + 10_000_000_000, + 100_000_000_000, + 1_000_000_000_000, + 10_000_000_000_000, + 100_000_000_000_000, + 1_000_000_000_000_000, +]; + #[cfg(test)] mod tests { use super::*; @@ -182,4 +250,19 @@ mod tests { // Year is interpreted as `iso_year` assert_eq!(parse("2022-W52-7 "), Ok((NaiveDate::from_ymd_opt(2023, 1, 1).unwrap(), " "))); } + + #[test] + fn test_parse_fraction() { + let (_, fraction) = Fraction::parse(",123").unwrap(); + assert_eq!(fraction.0, 123_000_000_000_000); + let (_, fraction) = Fraction::parse(",123456789012345").unwrap(); + assert_eq!(fraction.0, 123_456_789_012_345); + let (_, fraction) = Fraction::parse(",1234567890123454").unwrap(); + assert_eq!(fraction.0, 123_456_789_012_345); + let (_, fraction) = Fraction::parse(",1234567890123455").unwrap(); + assert_eq!(fraction.0, 123_456_789_012_346); + + let (_, fraction) = Fraction::parse(",5").unwrap(); + assert_eq!(fraction.mul_with_nanos(1), (0, 500_000_000)); + } } From 544bd67c3877b4fad8b1aeb5b7ad7a260c1dc282 Mon Sep 17 00:00:00 2001 From: Paul Dicker Date: Sat, 16 Sep 2023 13:12:21 +0200 Subject: [PATCH 4/8] Add ISO 8601 time parser --- src/format/mod.rs | 2 +- src/format/parse_iso8601.rs | 197 +++++++++++++++++++++++++++++++++++- src/naive/time/mod.rs | 44 +++++++- 3 files changed, 239 insertions(+), 4 deletions(-) diff --git a/src/format/mod.rs b/src/format/mod.rs index 0142a0e1f..3e9ed373d 100644 --- a/src/format/mod.rs +++ b/src/format/mod.rs @@ -72,7 +72,7 @@ pub use locales::Locale; pub(crate) use locales::Locale; pub(crate) use parse::parse_rfc3339; pub use parse::{parse, parse_and_remainder}; -pub(crate) use parse_iso8601::parse_iso8601_date; +pub(crate) use parse_iso8601::{parse_iso8601_date, parse_iso8601_time}; pub use parsed::Parsed; pub use strftime::StrftimeItems; diff --git a/src/format/parse_iso8601.rs b/src/format/parse_iso8601.rs index fc32e3ba1..a3853aca1 100644 --- a/src/format/parse_iso8601.rs +++ b/src/format/parse_iso8601.rs @@ -1,6 +1,6 @@ use super::parse::set_weekday_with_number_from_monday; use super::scan; -use super::{ParseResult, Parsed, TOO_SHORT}; +use super::{ParseResult, Parsed, INVALID, TOO_SHORT}; #[derive(Copy, Clone, PartialEq, Eq)] pub(crate) enum Iso8601Format { @@ -99,6 +99,144 @@ fn parse_iso8601_year(mut s: &str) -> ParseResult<(&str, i64)> { } } +/// The ISO 8601 time format has a basic and an extended format, representations with reduced +/// accuracy, and representations with a decimal fraction: +/// +/// | | basic format | extended format | +/// |-----------------------------------|--------------|-----------------| +/// | complete representation | hhmmss | hh:mm:ss | +/// | reduced accuracy: hour and minute | hhmm | hh:mm | +/// | reduced accuracy: hour | hh | | +/// | decimal fraction of the second | hhmmss,ss̲ | hh:mm:ss,ss̲ | +/// | decimal fraction of the minute | hhmm,mm̲ | hh:mm,mm̲ | +/// | decimal fraction of the hour | hh,hh̲ | hh,hh̲ | +/// +/// A decimal sign is either `,` or `.`; a `,` is preferred. A decimal fraction must have at least +/// one digit. The standard puts no limit on the number of digits. +/// +/// Midnight can be represented with both `00:00` (at the start of the day) and `24:00` (at the end +/// of the calendar day). +/// +/// Returns `(remainder, Iso8601Format, hour24)`. +/// - The ISO 8601 format of the time is return so the calling function can check it matches the +/// format of a date component (basic or extended format). If there a representation with the +/// accuracy reduced to hours, the format is `Unknown`. +/// - `24:00` can't be encoded in `Parsed`, so we encode it as `00:00` and return a `bool` to +/// indicate the date should wrap to the next day. +pub(crate) fn parse_iso8601_time<'a>( + parsed: &mut Parsed, + mut s: &'a str, +) -> ParseResult<(&'a str, Iso8601Format, bool)> { + use Iso8601Format::*; + + macro_rules! try_consume { + ($e:expr) => {{ + let (s_, v) = $e?; + s = s_; + v + }}; + } + + let mut format = Unknown; + let mut hour; + let mut minute = 0; + let mut second = 0; + let mut nanosecond = 0; + fn set_time_fields( + parsed: &mut Parsed, + hour: i64, + minute: i64, + second: i64, + nanosecond: i64, + ) -> ParseResult { + match hour < 24 { + true => parsed.set_hour(hour)?, + false => { + if !(hour == 24 && minute == 0 && second == 0 && nanosecond == 0) { + return Err(INVALID); + } + parsed.set_hour(0)?; + } + } + parsed.set_minute(minute)?; + parsed.set_second(second)?; + parsed.set_nanosecond(nanosecond)?; + Ok(hour == 24) + } + + hour = try_consume!(scan::number(s, 2, 2)); + + if let Some((s_, fraction)) = Fraction::parse(s) { + s = s_; + // Minute, second and nanosecond are expressed as a fraction of an hour. + let (sec, nanos) = fraction.mul_with_nanos(3600); + minute = sec / 60; + second = sec % 60; + nanosecond = nanos; + return Ok((s, format, set_time_fields(parsed, hour, minute, second, nanosecond)?)); + } + + let c = s.as_bytes().first().unwrap_or(&b'a'); + if !(c.is_ascii_digit() || c == &b':') { + // Allow reduced accuracy + return Ok((s, format, set_time_fields(parsed, hour, minute, second, nanosecond)?)); + } + + format = if s.as_bytes().first() == Some(&b':') { Extended } else { Basic }; + if format == Extended { + s = &s[1..]; + } + minute = try_consume!(scan::number(s, 2, 2)); + + if let Some((s_, fraction)) = Fraction::parse(s) { + s = s_; + // Second and nanosecond are expressed as a fraction of a minute. + let (sec, nanos) = fraction.mul_with_nanos(60); + second = sec; + nanosecond = nanos; + if sec == 60 { + second = 0; + minute += 1; + if minute == 60 { + minute = 0; + hour += 1; + } + } + return Ok((s, format, set_time_fields(parsed, hour, minute, second, nanosecond)?)); + } + + let c = s.as_bytes().first().unwrap_or(&b'a'); + if !(c.is_ascii_digit() || (format == Extended && c == &b':')) { + // Allow reduced accuracy + return Ok((s, format, set_time_fields(parsed, hour, minute, second, nanosecond)?)); + } + + if format == Extended { + s = scan::char(s, b':')?; + } + second = try_consume!(scan::number(s, 2, 2)); + + if let Some((s_, fraction)) = Fraction::parse(s) { + s = s_; + // Nanosecond are expressed as a fraction of a minute. + let (sec_from_rounding, nanos) = fraction.mul_with_nanos(1); + nanosecond = nanos; + if sec_from_rounding != 0 { + if second < 59 { + second += 1; + } else { + second = 0; + minute += 1; + if minute == 60 { + minute = 0; + hour += 1; + } + } + } + } + Ok((s, format, set_time_fields(parsed, hour, minute, second, nanosecond)?)) +} + /// Helper type for parsing fractional numbers. /// /// The fractions is stored as an integer in the range 0..=10^15. @@ -251,6 +389,63 @@ mod tests { assert_eq!(parse("2022-W52-7 "), Ok((NaiveDate::from_ymd_opt(2023, 1, 1).unwrap(), " "))); } + #[test] + fn test_parse_iso8601_time() { + fn parse(s: &str) -> ParseResult<(&str, u32, u32, u32, u32, bool)> { + let mut parsed = Parsed::new(); + let (s, _, hour24) = parse_iso8601_time(&mut parsed, s)?; + Ok(( + s, + 12 * parsed.hour_div_12.unwrap() + parsed.hour_mod_12.unwrap(), + parsed.minute.unwrap(), + parsed.second.unwrap_or(0), + parsed.nanosecond.unwrap_or(0), + hour24, + )) + } + + // basic format, complete representation + assert_eq!(parse("152830 "), Ok((" ", 15, 28, 30, 0, false))); + // extended format, complete representation + assert_eq!(parse("15:28:30 "), Ok((" ", 15, 28, 30, 0, false))); + // basic format, fractional second + assert_eq!(parse("152830,6 "), Ok((" ", 15, 28, 30, 600_000_000, false))); + assert_eq!(parse("152830.60 "), Ok((" ", 15, 28, 30, 600_000_000, false))); + assert_eq!(parse("152830.999999999 "), Ok((" ", 15, 28, 30, 999_999_999, false))); + assert_eq!(parse("152830.9999999999 "), Ok((" ", 15, 28, 31, 0, false))); + // extended format, fractional second + assert_eq!(parse("15:28:30,6 "), Ok((" ", 15, 28, 30, 600_000_000, false))); + assert_eq!(parse("15:28:30.60 "), Ok((" ", 15, 28, 30, 600_000_000, false))); + // basic format, fractional minute + assert_eq!(parse("1528,5 "), Ok((" ", 15, 28, 30, 0, false))); + assert_eq!(parse("1528.51 "), Ok((" ", 15, 28, 30, 600_000_000, false))); + // extended format, fractional minute + assert_eq!(parse("15:28,5 "), Ok((" ", 15, 28, 30, 0, false))); + assert_eq!(parse("15:28.51 "), Ok((" ", 15, 28, 30, 600_000_000, false))); + assert_eq!(parse("15:59.999999999999 "), Ok((" ", 16, 0, 0, 0, false))); + // extended format, fractional hour + assert_eq!(parse("15,45 "), Ok((" ", 15, 27, 0, 0, false))); + assert_eq!(parse("15.12345 "), Ok((" ", 15, 7, 24, 420_000_000, false))); + assert_eq!(parse("15,999999999999 "), Ok((" ", 15, 59, 59, 999_999_996, false))); + assert_eq!(parse("15,9999999999999 "), Ok((" ", 15, 60, 0, 0, false))); + + // 24:00:00 is allowed + assert_eq!(parse("240000 "), Ok((" ", 0, 0, 0, 0, true))); + assert_eq!(parse("24:00:00 "), Ok((" ", 0, 0, 0, 0, true))); + assert_eq!(parse("24:00:00,0 "), Ok((" ", 0, 0, 0, 0, true))); + // But no times beyond that + assert_eq!(parse("24:30:00 "), Err(INVALID)); + assert_eq!(parse("24:00:30 "), Err(INVALID)); + assert_eq!(parse("24:00:00,5 "), Err(INVALID)); + assert_eq!(parse("24.99 "), Err(INVALID)); + assert_eq!(parse("24,9999999999999 "), Err(INVALID)); // rounds to 25:00:00 + + // Reduced accuracy + assert_eq!(parse("1528 "), Ok((" ", 15, 28, 0, 0, false))); + assert_eq!(parse("15:28 "), Ok((" ", 15, 28, 0, 0, false))); + assert_eq!(parse("15 "), Ok((" ", 15, 0, 0, 0, false))); + } + #[test] fn test_parse_fraction() { let (_, fraction) = Fraction::parse(",123").unwrap(); diff --git a/src/naive/time/mod.rs b/src/naive/time/mod.rs index da35a4964..6c42c77ca 100644 --- a/src/naive/time/mod.rs +++ b/src/naive/time/mod.rs @@ -16,8 +16,8 @@ use crate::duration::Duration as OldDuration; #[cfg(any(feature = "alloc", feature = "std"))] use crate::format::DelayedFormat; use crate::format::{ - parse, parse_and_remainder, write_hundreds, Fixed, Item, Numeric, Pad, ParseError, ParseResult, - Parsed, StrftimeItems, + parse, parse_and_remainder, parse_iso8601_time, write_hundreds, Fixed, Item, Numeric, Pad, + ParseError, ParseResult, Parsed, StrftimeItems, }; use crate::Timelike; use crate::{expect, try_opt}; @@ -558,6 +558,46 @@ impl NaiveTime { parsed.to_naive_time().map(|t| (t, remainder)) } + /// Parses an ISO 8601 time string into a `NaiveTime` value. + /// + /// ISO 8601 allows representing values in a wide range of formats. See below for some examples. + /// + /// # Example + /// + /// ``` + /// # use chrono::NaiveTime; + /// // complete representation, basic format + /// assert_eq!( + /// NaiveTime::parse_from_iso8601("101530").unwrap(), + /// (NaiveTime::from_hms_opt(10, 15, 30).unwrap(), "") + /// ); + /// // reduced representation, extended format + /// assert_eq!( + /// NaiveTime::parse_from_iso8601("10:15").unwrap(), + /// (NaiveTime::from_hms_opt(10, 15, 0).unwrap(), "") + /// ); + /// // time with fraction of a second, extended format, `,` as decimal sign + /// assert_eq!( + /// NaiveTime::parse_from_iso8601("10:15:30,25").unwrap(), + /// (NaiveTime::from_hms_milli_opt(10, 15, 30, 250).unwrap(), "") + /// ); + /// // week date, time with fraction of an hour, `.` as decimal sign + /// assert_eq!( + /// NaiveTime::parse_from_iso8601("10.25").unwrap(), + /// (NaiveTime::from_hms_opt(10, 15, 0).unwrap(), "") + /// ); + /// // `24:00`, midnight at the end of the day, parses as `00:00`. + /// assert_eq!( + /// NaiveTime::parse_from_iso8601("24:00:00").unwrap(), + /// (NaiveTime::from_hms_opt(0, 0, 0).unwrap(), "") + /// ); + /// ``` + pub fn parse_from_iso8601(s: &str) -> ParseResult<(NaiveTime, &str)> { + let mut parsed = Parsed::new(); + let (remainder, _, _) = parse_iso8601_time(&mut parsed, s)?; + parsed.to_naive_time().map(|t| (t, remainder)) + } + /// Adds given `Duration` to the current time, and also returns the number of *seconds* /// in the integral number of days ignored from the addition. /// From 1072df99a7c5aac48c0173d8e702152fa22154dc Mon Sep 17 00:00:00 2001 From: Paul Dicker Date: Fri, 9 Jun 2023 19:50:30 +0200 Subject: [PATCH 5/8] Add ISO 8601 `NaiveDateTime` parser --- src/format/mod.rs | 2 +- src/format/parse_iso8601.rs | 25 ++++++++++++++++++- src/naive/datetime/mod.rs | 50 +++++++++++++++++++++++++++++++++++-- src/naive/datetime/tests.rs | 20 +++++++++++++++ 4 files changed, 93 insertions(+), 4 deletions(-) diff --git a/src/format/mod.rs b/src/format/mod.rs index 3e9ed373d..eb9af2b53 100644 --- a/src/format/mod.rs +++ b/src/format/mod.rs @@ -72,7 +72,7 @@ pub use locales::Locale; pub(crate) use locales::Locale; pub(crate) use parse::parse_rfc3339; pub use parse::{parse, parse_and_remainder}; -pub(crate) use parse_iso8601::{parse_iso8601_date, parse_iso8601_time}; +pub(crate) use parse_iso8601::{parse_iso8601_date, parse_iso8601_datetime, parse_iso8601_time}; pub use parsed::Parsed; pub use strftime::StrftimeItems; diff --git a/src/format/parse_iso8601.rs b/src/format/parse_iso8601.rs index a3853aca1..49ed8d6f7 100644 --- a/src/format/parse_iso8601.rs +++ b/src/format/parse_iso8601.rs @@ -1,6 +1,7 @@ use super::parse::set_weekday_with_number_from_monday; use super::scan; -use super::{ParseResult, Parsed, INVALID, TOO_SHORT}; +use super::{ParseResult, Parsed, INVALID, OUT_OF_RANGE, TOO_SHORT}; +use crate::{Days, NaiveDateTime}; #[derive(Copy, Clone, PartialEq, Eq)] pub(crate) enum Iso8601Format { @@ -9,6 +10,28 @@ pub(crate) enum Iso8601Format { Unknown, } +/// Returns `(NaiveDateTime, remainder, Iso8601Format)`. +/// - This method returns a `NaiveDateTime` instead of working with `Parsed` because `Parsed` can't +/// handle a time of `24:00:00` (which should parse to `00:00:00` the next day). +/// - The ISO 8601 format of the date and time is returned so the calling function can check it +/// matches the format of a offset component (basic or extended format). +pub(crate) fn parse_iso8601_datetime(s: &str) -> ParseResult<(NaiveDateTime, &str, Iso8601Format)> { + let mut parsed = Parsed::new(); + + let (s, date_format) = parse_iso8601_date(&mut parsed, s)?; + let s = scan::char(s, b'T')?; + let (s, time_format, hour24) = parse_iso8601_time(&mut parsed, s)?; + if time_format != Iso8601Format::Unknown && date_format != time_format { + return Err(INVALID); + } + + let mut dt = parsed.to_naive_datetime_with_offset(0)?; + if hour24 { + dt = dt.checked_add_days(Days::new(1)).ok_or(OUT_OF_RANGE)?; + } + Ok((dt, s, time_format)) +} + /// The ISO 8601 date format is a combination of 12 different date formats: /// /// | | calendar date | ordinal date | week date | diff --git a/src/naive/datetime/mod.rs b/src/naive/datetime/mod.rs index cd41207ce..95eea8851 100644 --- a/src/naive/datetime/mod.rs +++ b/src/naive/datetime/mod.rs @@ -16,8 +16,10 @@ use rkyv::{Archive, Deserialize, Serialize}; use crate::duration::Duration as OldDuration; #[cfg(any(feature = "alloc", feature = "std"))] use crate::format::DelayedFormat; -use crate::format::{parse, parse_and_remainder, ParseError, ParseResult, Parsed, StrftimeItems}; -use crate::format::{Fixed, Item, Numeric, Pad}; +use crate::format::{ + parse, parse_and_remainder, parse_iso8601_datetime, Fixed, Item, Numeric, Pad, ParseError, + ParseResult, Parsed, StrftimeItems, +}; use crate::naive::{Days, IsoWeek, NaiveDate, NaiveTime}; use crate::offset::Utc; use crate::{expect, DateTime, Datelike, LocalResult, Months, TimeZone, Timelike, Weekday}; @@ -339,6 +341,50 @@ impl NaiveDateTime { parsed.to_naive_datetime_with_offset(0).map(|d| (d, remainder)) // no offset adjustment } + /// Parses an ISO 8601 date and time string into a `NaiveDateTime` value. + /// + /// ISO 8601 allows representing values in a wide range of formats. See below for some examples. + /// + /// # Example + /// + /// ``` + /// # use chrono::{NaiveDate, NaiveDateTime, Weekday}; + /// // calendar date, regular time, basic format + /// assert_eq!( + /// NaiveDateTime::parse_from_iso8601("20230609T101530").unwrap(), + /// (NaiveDate::from_ymd_opt(2023, 6, 9).unwrap().and_hms_opt(10, 15, 30).unwrap(), "") + /// ); + /// // calendar date, regular time, extended format + /// assert_eq!( + /// NaiveDateTime::parse_from_iso8601("2023-06-09T10:15:30").unwrap(), + /// (NaiveDate::from_ymd_opt(2023, 6, 9).unwrap().and_hms_opt(10, 15, 30).unwrap(), "") + /// ); + /// // ordinal date, time with fraction of a second, extended format, `,` as decimal sign + /// assert_eq!( + /// NaiveDateTime::parse_from_iso8601("2023-160T10:15:30,25").unwrap(), + /// (NaiveDate::from_yo_opt(2023, 160) + /// .unwrap() + /// .and_hms_milli_opt(10, 15, 30, 250) + /// .unwrap(), "") + /// ); + /// // week date, time with fraction of an hour, basic format, `.` as decimal sign + /// assert_eq!( + /// NaiveDateTime::parse_from_iso8601("2023W235T10.25").unwrap(), + /// (NaiveDate::from_isoywd_opt(2023, 23, Weekday::Fri) + /// .unwrap() + /// .and_hms_opt(10, 15, 0) + /// .unwrap(), "") + /// ); + /// // calendar date, regular time, extended format, expanded representation + /// assert_eq!( + /// NaiveDateTime::parse_from_iso8601("+12023-06-09T10:15:30").unwrap(), + /// (NaiveDate::from_ymd_opt(12023, 6, 9).unwrap().and_hms_opt(10, 15, 30).unwrap(), "") + /// ); + /// ``` + pub fn parse_from_iso8601(s: &str) -> ParseResult<(NaiveDateTime, &str)> { + parse_iso8601_datetime(s).map(|(dt, remainder, _)| (dt, remainder)) + } + /// Retrieves a date component. /// /// # Example diff --git a/src/naive/datetime/tests.rs b/src/naive/datetime/tests.rs index 39187de67..6753e97b0 100644 --- a/src/naive/datetime/tests.rs +++ b/src/naive/datetime/tests.rs @@ -433,3 +433,23 @@ fn test_and_utc() { assert_eq!(dt_utc.naive_local(), ndt); assert_eq!(dt_utc.timezone(), Utc); } + +#[test] +fn test_parse_from_iso8601() { + let parse = |s| NaiveDateTime::parse_from_iso8601(s).map(|(dt, _)| dt); + let datetime = |y, m, d, h, n, s, nano| { + NaiveDate::from_ymd_opt(y, m, d).unwrap().and_hms_nano_opt(h, n, s, nano).unwrap() + }; + + // Taken from ISO 8601 + assert_eq!(parse("19850412T101530"), Ok(datetime(1985, 4, 12, 10, 15, 30, 0))); + assert_eq!(parse("1985-04-12T10:15:30"), Ok(datetime(1985, 4, 12, 10, 15, 30, 0))); + assert_eq!(parse("19850412T1015"), Ok(datetime(1985, 4, 12, 10, 15, 0, 0))); + assert_eq!(parse("1985-04-12T10:15"), Ok(datetime(1985, 4, 12, 10, 15, 0, 0))); + assert_eq!(parse("1985102T1015"), Ok(datetime(1985, 4, 12, 10, 15, 0, 0))); + assert_eq!(parse("1985-102T10:15"), Ok(datetime(1985, 4, 12, 10, 15, 0, 0))); + assert_eq!(parse("1985W155T1015"), Ok(datetime(1985, 4, 12, 10, 15, 0, 0))); + assert_eq!(parse("1985-W15-5T10:15"), Ok(datetime(1985, 4, 12, 10, 15, 0, 0))); + // Test 24:00:00 wraps to the next day + assert_eq!(parse("2023-06-09T24:00:00"), Ok(datetime(2023, 6, 10, 0, 0, 0, 0))); +} From 6abc6f62868866e5a440e256b2e4af528aa5caa3 Mon Sep 17 00:00:00 2001 From: Paul Dicker Date: Fri, 9 Jun 2023 19:52:30 +0200 Subject: [PATCH 6/8] Add ISO 8601 `DateTime` parser --- src/datetime/mod.rs | 50 +++++++++++++++++++++++++++++++++++-- src/datetime/tests.rs | 30 ++++++++++++++++++++++ src/format/mod.rs | 4 ++- src/format/parse_iso8601.rs | 16 +++++++++++- 4 files changed, 96 insertions(+), 4 deletions(-) diff --git a/src/datetime/mod.rs b/src/datetime/mod.rs index 0df12bc36..b26d3eaa3 100644 --- a/src/datetime/mod.rs +++ b/src/datetime/mod.rs @@ -18,8 +18,8 @@ use crate::duration::Duration as OldDuration; #[cfg(feature = "unstable-locales")] use crate::format::Locale; use crate::format::{ - parse, parse_and_remainder, parse_rfc3339, Fixed, Item, ParseError, ParseResult, Parsed, - StrftimeItems, TOO_LONG, + parse, parse_and_remainder, parse_iso8601, parse_rfc3339, Fixed, Item, ParseError, ParseResult, + Parsed, StrftimeItems, TOO_LONG, }; #[cfg(any(feature = "alloc", feature = "std"))] use crate::format::{write_rfc3339, DelayedFormat}; @@ -790,6 +790,52 @@ impl DateTime { parsed.to_datetime() } + /// Parses an ISO 8601 date-and-time string into a `DateTime` value. + /// + /// ISO 8601 allows representing values in a wide range of formats. Some valid ISO 8601 values + /// are also valid RFC 3339 values. + /// + /// # Example + /// + /// ``` + /// # use chrono::{DateTime, FixedOffset, NaiveDate, TimeZone, Weekday}; + /// // calendar date, regular time, basic format + /// assert_eq!( + /// DateTime::parse_from_iso8601("20230609T101530Z").unwrap(), + /// (FixedOffset::east_opt(0).unwrap().with_ymd_and_hms(2023, 6, 9, 10, 15, 30).unwrap(), + /// "") + /// ); + /// // calendar date, regular time, extended format (is also valid RFC 3339) + /// assert_eq!( + /// DateTime::parse_from_iso8601("2023-06-09T10:15:30Z").unwrap(), + /// (FixedOffset::east_opt(0).unwrap().with_ymd_and_hms(2023, 6, 9, 10, 15, 30).unwrap(), + /// "") + /// ); + /// // ordinal date, time with fraction of a second, extended format, `,` as decimal sign + /// assert_eq!( + /// DateTime::parse_from_iso8601("2023-160T10:15:30,25+01:00").unwrap(), + /// (NaiveDate::from_yo_opt(2023, 160) + /// .unwrap() + /// .and_hms_milli_opt(10, 15, 30, 250) + /// .unwrap() + /// .and_local_timezone(FixedOffset::east_opt(1 * 3600).unwrap()) + /// .unwrap(), "") + /// ); + /// // week date, time with fraction of an hour, basic format, `.` as decimal sign + /// assert_eq!( + /// DateTime::parse_from_iso8601("2023W235T10.25-01").unwrap(), + /// (NaiveDate::from_isoywd_opt(2023, 23, Weekday::Fri) + /// .unwrap() + /// .and_hms_opt(10, 15, 0) + /// .unwrap() + /// .and_local_timezone(FixedOffset::east_opt(-1 * 3600).unwrap()) + /// .unwrap(), "") + /// ); + /// ``` + pub fn parse_from_iso8601(s: &str) -> ParseResult<(DateTime, &str)> { + parse_iso8601(s) + } + /// Parses a string from a user-specified format into a `DateTime` value. /// /// Note that this method *requires a timezone* in the input string. See diff --git a/src/datetime/tests.rs b/src/datetime/tests.rs index 63dfe59b4..c0560c3e5 100644 --- a/src/datetime/tests.rs +++ b/src/datetime/tests.rs @@ -1519,3 +1519,33 @@ fn nano_roundrip() { assert_eq!(nanos, nanos2); } } + +#[test] +fn test_parse_from_iso8601() { + let parse = |s| DateTime::::parse_from_iso8601(s).map(|(dt, _)| dt); + let datetime = |y, m, d, h, n, s, nano, o| { + FixedOffset::east_opt(o) + .unwrap() + .with_ymd_and_hms(y, m, d, h, n, s) + .unwrap() + .with_nanosecond(nano) + .unwrap() + }; + + // Taken from ISO 8601 + assert_eq!(parse("19850412T101530Z"), Ok(datetime(1985, 4, 12, 10, 15, 30, 0, 0))); + assert_eq!(parse("19850412T101530+0400"), Ok(datetime(1985, 4, 12, 10, 15, 30, 0, 14400))); + assert_eq!(parse("19850412T101530-04"), Ok(datetime(1985, 4, 12, 10, 15, 30, 0, -14400))); + assert_eq!(parse("1985-04-12T10:15:30Z"), Ok(datetime(1985, 4, 12, 10, 15, 30, 0, 0))); + assert_eq!(parse("1985-04-12T10:15:30+04:00"), Ok(datetime(1985, 4, 12, 10, 15, 30, 0, 14400))); + assert_eq!(parse("1985-04-12T10:15:30-04"), Ok(datetime(1985, 4, 12, 10, 15, 30, 0, -14400))); + assert_eq!(parse("1985W155T1015+0400"), Ok(datetime(1985, 4, 12, 10, 15, 0, 0, 14400))); + assert_eq!(parse("1985-W15-5T10:15+04"), Ok(datetime(1985, 4, 12, 10, 15, 0, 0, 14400))); + assert_eq!(parse("1985102T235030Z"), Ok(datetime(1985, 4, 12, 23, 50, 30, 0, 0))); + // With fractions + assert_eq!( + parse("1985102T235030,5+01"), + Ok(datetime(1985, 4, 12, 23, 50, 30, 500_000_000, 3600)) + ); + assert_eq!(parse("1985102T2350,5+01"), Ok(datetime(1985, 4, 12, 23, 50, 30, 0, 3600))); +} diff --git a/src/format/mod.rs b/src/format/mod.rs index eb9af2b53..5a45ba171 100644 --- a/src/format/mod.rs +++ b/src/format/mod.rs @@ -72,7 +72,9 @@ pub use locales::Locale; pub(crate) use locales::Locale; pub(crate) use parse::parse_rfc3339; pub use parse::{parse, parse_and_remainder}; -pub(crate) use parse_iso8601::{parse_iso8601_date, parse_iso8601_datetime, parse_iso8601_time}; +pub(crate) use parse_iso8601::{ + parse_iso8601, parse_iso8601_date, parse_iso8601_datetime, parse_iso8601_time, +}; pub use parsed::Parsed; pub use strftime::StrftimeItems; diff --git a/src/format/parse_iso8601.rs b/src/format/parse_iso8601.rs index 49ed8d6f7..6b3fb58f7 100644 --- a/src/format/parse_iso8601.rs +++ b/src/format/parse_iso8601.rs @@ -1,7 +1,7 @@ use super::parse::set_weekday_with_number_from_monday; use super::scan; use super::{ParseResult, Parsed, INVALID, OUT_OF_RANGE, TOO_SHORT}; -use crate::{Days, NaiveDateTime}; +use crate::{DateTime, Days, FixedOffset, NaiveDateTime}; #[derive(Copy, Clone, PartialEq, Eq)] pub(crate) enum Iso8601Format { @@ -10,6 +10,20 @@ pub(crate) enum Iso8601Format { Unknown, } +/// Returns `(DateTime, remainder)`. +pub(crate) fn parse_iso8601(s: &str) -> ParseResult<(DateTime, &str)> { + let (dt, s, format) = parse_iso8601_datetime(s)?; + + let (s, offset) = if format == Iso8601Format::Extended { + scan::timezone_offset(s, |s| scan::char(s, b':'), true, true, true)? + } else { + scan::timezone_offset(s, |s| Ok(s), true, true, true)? + }; + let offset = FixedOffset::east_opt(offset).ok_or(OUT_OF_RANGE)?; + + dt.and_local_timezone(offset).single().ok_or(OUT_OF_RANGE).map(|dt| (dt, s)) +} + /// Returns `(NaiveDateTime, remainder, Iso8601Format)`. /// - This method returns a `NaiveDateTime` instead of working with `Parsed` because `Parsed` can't /// handle a time of `24:00:00` (which should parse to `00:00:00` the next day). From 4231047b4824f76e6d738d604dc8465afbbaf464 Mon Sep 17 00:00:00 2001 From: Paul Dicker Date: Fri, 9 Jun 2023 20:16:04 +0200 Subject: [PATCH 7/8] Test ISO 8601 parser can parse `Debug` output --- src/datetime/tests.rs | 9 +++++++++ src/naive/datetime/tests.rs | 9 +++++++++ 2 files changed, 18 insertions(+) diff --git a/src/datetime/tests.rs b/src/datetime/tests.rs index c0560c3e5..ecee72ae3 100644 --- a/src/datetime/tests.rs +++ b/src/datetime/tests.rs @@ -1549,3 +1549,12 @@ fn test_parse_from_iso8601() { ); assert_eq!(parse("1985102T2350,5+01"), Ok(datetime(1985, 4, 12, 23, 50, 30, 0, 3600))); } + +#[test] +fn test_iso8601_parses_debug() { + let parse = |s| DateTime::::parse_from_iso8601(s).map(|(dt, _)| dt); + + let dt = FixedOffset::east_opt(3600).unwrap().with_ymd_and_hms(12345, 6, 7, 8, 9, 10).unwrap(); + let debug = format!("{:?}", dt); + assert_eq!(parse(&debug), Ok(dt)); +} diff --git a/src/naive/datetime/tests.rs b/src/naive/datetime/tests.rs index 6753e97b0..a9258471c 100644 --- a/src/naive/datetime/tests.rs +++ b/src/naive/datetime/tests.rs @@ -453,3 +453,12 @@ fn test_parse_from_iso8601() { // Test 24:00:00 wraps to the next day assert_eq!(parse("2023-06-09T24:00:00"), Ok(datetime(2023, 6, 10, 0, 0, 0, 0))); } + +#[test] +fn test_iso8601_parses_debug() { + let parse = |s| NaiveDateTime::parse_from_iso8601(s).map(|(dt, _)| dt); + + let dt = NaiveDate::from_ymd_opt(12345, 6, 7).unwrap().and_hms_nano_opt(8, 9, 10, 11).unwrap(); + let debug = format!("{:?}", dt); + assert_eq!(parse(&debug), Ok(dt)); +} From afa45c6332d29e53f6c52fc24c3e31ffa787339e Mon Sep 17 00:00:00 2001 From: Paul Dicker Date: Sat, 10 Jun 2023 07:55:02 +0200 Subject: [PATCH 8/8] Adjust documentation of `DateTime::parse_from_rfc3339` --- src/datetime/mod.rs | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/src/datetime/mod.rs b/src/datetime/mod.rs index b26d3eaa3..379bc7d04 100644 --- a/src/datetime/mod.rs +++ b/src/datetime/mod.rs @@ -770,17 +770,24 @@ impl DateTime { /// Parses an RFC 3339 date-and-time string into a `DateTime` value. /// - /// Parses all valid RFC 3339 values (as well as the subset of valid ISO 8601 values that are - /// also valid RFC 3339 date-and-time values) and returns a new [`DateTime`] with a - /// [`FixedOffset`] corresponding to the parsed timezone. While RFC 3339 values come in a wide - /// variety of shapes and sizes, `1996-12-19T16:39:57-08:00` is an example of the most commonly - /// encountered variety of RFC 3339 formats. - /// - /// Why isn't this named `parse_from_iso8601`? That's because ISO 8601 allows representing - /// values in a wide range of formats, only some of which represent actual date-and-time - /// instances (rather than periods, ranges, dates, or times). Some valid ISO 8601 values are - /// also simultaneously valid RFC 3339 values, but not all RFC 3339 values are valid ISO 8601 - /// values (or the other way around). + /// This parses valid RFC 3339 datetime strings (such as `1996-12-19T16:39:57-08:00`) + /// and returns a new [`DateTime`] instance with the parsed timezone as the [`FixedOffset`]. + /// + /// RFC 3339 is a clearly defined subset or profile of ISO 8601. + /// + /// # Example + /// + /// ``` + /// # use chrono::{DateTime, FixedOffset, TimeZone}; + /// assert_eq!( + /// DateTime::parse_from_rfc3339("1996-12-19T16:39:57-08:00").unwrap(), + /// FixedOffset::east_opt(-8 * 3600).unwrap().with_ymd_and_hms(1996, 12, 19, 16, 39, 57).unwrap() + /// ); + /// assert_eq!( + /// DateTime::parse_from_rfc3339("2023-06-10T07:15:00Z").unwrap(), + /// FixedOffset::east_opt(0).unwrap().with_ymd_and_hms(2023, 6, 10, 7, 15, 0).unwrap() + /// ); + /// ``` pub fn parse_from_rfc3339(s: &str) -> ParseResult> { let mut parsed = Parsed::new(); let (s, _) = parse_rfc3339(&mut parsed, s)?;