jiff/fmt/
offset.rs

1/*!
2This module provides facilities for parsing time zone offsets.
3
4The parsing here follows primarily from [RFC 3339] and [ISO 8601], but also
5from [Temporal's hybrid grammar].
6
7[RFC 3339]: https://www.rfc-editor.org/rfc/rfc3339
8[ISO 8601]: https://www.iso.org/iso-8601-date-and-time-format.html
9[Temporal's hybrid grammar]: https://tc39.es/proposal-temporal/#sec-temporal-iso8601grammar
10*/
11
12// Here's the specific part of Temporal's grammar that is implemented below:
13//
14// DateTimeUTCOffset :::
15//   UTCDesignator
16//   UTCOffsetSubMinutePrecision
17//
18// TimeZoneUTCOffsetName :::
19//   UTCOffsetMinutePrecision
20//
21// UTCDesignator ::: one of
22//   Z z
23//
24// UTCOffsetSubMinutePrecision :::
25//   UTCOffsetMinutePrecision
26//   UTCOffsetWithSubMinuteComponents[+Extended]
27//   UTCOffsetWithSubMinuteComponents[~Extended]
28//
29// UTCOffsetMinutePrecision :::
30//   TemporalSign Hour
31//   TemporalSign Hour TimeSeparator[+Extended] MinuteSecond
32//   TemporalSign Hour TimeSeparator[~Extended] MinuteSecond
33//
34// UTCOffsetWithSubMinuteComponents[Extended] :::
35//   TemporalSign Hour
36//     TimeSeparator[?Extended] MinuteSecond
37//     TimeSeparator[?Extended] MinuteSecond
38//     TemporalDecimalFraction[opt]
39//
40// TimeSeparator[Extended] :::
41//   [+Extended] :
42//   [~Extended] [empty]
43//
44// TemporalSign :::
45//   ASCIISign
46//   <MINUS>
47//
48// ASCIISign ::: one of
49//   + -
50//
51// Hour :::
52//   0 DecimalDigit
53//   1 DecimalDigit
54//   20
55//   21
56//   22
57//   23
58//
59// MinuteSecond :::
60//   0 DecimalDigit
61//   1 DecimalDigit
62//   2 DecimalDigit
63//   3 DecimalDigit
64//   4 DecimalDigit
65//   5 DecimalDigit
66//
67// DecimalDigit :: one of
68//   0 1 2 3 4 5 6 7 8 9
69//
70// TemporalDecimalFraction :::
71//   TemporalDecimalSeparator DecimalDigit
72//   TemporalDecimalSeparator DecimalDigit DecimalDigit
73//   TemporalDecimalSeparator DecimalDigit DecimalDigit DecimalDigit
74//   TemporalDecimalSeparator DecimalDigit DecimalDigit DecimalDigit
75//                            DecimalDigit
76//   TemporalDecimalSeparator DecimalDigit DecimalDigit DecimalDigit
77//                            DecimalDigit DecimalDigit
78//   TemporalDecimalSeparator DecimalDigit DecimalDigit DecimalDigit
79//                            DecimalDigit DecimalDigit DecimalDigit
80//   TemporalDecimalSeparator DecimalDigit DecimalDigit DecimalDigit
81//                            DecimalDigit DecimalDigit DecimalDigit
82//                            DecimalDigit
83//   TemporalDecimalSeparator DecimalDigit DecimalDigit DecimalDigit
84//                            DecimalDigit DecimalDigit DecimalDigit
85//                            DecimalDigit DecimalDigit
86//   TemporalDecimalSeparator DecimalDigit DecimalDigit DecimalDigit
87//                            DecimalDigit DecimalDigit DecimalDigit
88//                            DecimalDigit DecimalDigit DecimalDigit
89//   TemporalDecimalSeparator ::: one of
90//   . ,
91//
92// The quick summary of the above is that offsets up to nanosecond precision
93// are supported. The general format is `{+,-}HH[:MM[:SS[.NNNNNNNNN]]]`. But
94// ISO 8601 extended or basic formats are also supported. For example, the
95// basic format `-0530` is equivalent to the extended format `-05:30`.
96//
97// Note that even though we support parsing up to nanosecond precision, Jiff
98// currently only supports offsets up to second precision. I don't think there
99// is any real practical need for any greater precision, but I don't think it
100// would be too hard to switch an `Offset` from an `i32` representation in
101// seconds to a `i64` representation in nanoseconds. (Since it only needs to
102// support a span of time of about 52 hours or so.)
103
104use crate::{
105    error::{err, Error, ErrorContext},
106    fmt::{
107        temporal::{PiecesNumericOffset, PiecesOffset},
108        util::{parse_temporal_fraction, FractionalFormatter},
109        Parsed,
110    },
111    tz::Offset,
112    util::{
113        escape, parse,
114        rangeint::{ri8, RFrom},
115        t::{self, C},
116    },
117};
118
119// We define our own ranged types because we want them to only be positive. We
120// represent the sign explicitly as a separate field. But the range supported
121// is the same as the component fields of `Offset`.
122type ParsedOffsetHours = ri8<0, { t::SpanZoneOffsetHours::MAX }>;
123type ParsedOffsetMinutes = ri8<0, { t::SpanZoneOffsetMinutes::MAX }>;
124type ParsedOffsetSeconds = ri8<0, { t::SpanZoneOffsetSeconds::MAX }>;
125
126/// An offset that has been parsed from a datetime string.
127///
128/// This represents either a Zulu offset (corresponding to UTC with an unknown
129/// time zone offset), or a specific numeric offset given in hours, minutes,
130/// seconds and nanoseconds (with everything except hours being optional).
131#[derive(Debug)]
132pub(crate) struct ParsedOffset {
133    /// The kind of offset parsed.
134    kind: ParsedOffsetKind,
135}
136
137impl ParsedOffset {
138    /// Convert a parsed offset into a Jiff offset.
139    ///
140    /// If the offset was parsed from a Zulu designator, then the offset
141    /// returned is indistinguishable from `+00` or `-00`.
142    ///
143    /// # Errors
144    ///
145    /// A variety of parsing errors are possible.
146    ///
147    /// Also, beyond normal range checks on the allowed components of a UTC
148    /// offset, this does rounding based on the fractional nanosecond part. As
149    /// a result, if the parsed value would be rounded to a value not in bounds
150    /// for a Jiff offset, this returns an error.
151    pub(crate) fn to_offset(&self) -> Result<Offset, Error> {
152        match self.kind {
153            ParsedOffsetKind::Zulu => Ok(Offset::UTC),
154            ParsedOffsetKind::Numeric(ref numeric) => numeric.to_offset(),
155        }
156    }
157
158    /// Convert a parsed offset to a more structured representation.
159    ///
160    /// This is like `to_offset`, but preserves `Z` and `-00:00` versus
161    /// `+00:00`. This does still attempt to create an `Offset`, and that
162    /// construction can fail.
163    pub(crate) fn to_pieces_offset(&self) -> Result<PiecesOffset, Error> {
164        match self.kind {
165            ParsedOffsetKind::Zulu => Ok(PiecesOffset::Zulu),
166            ParsedOffsetKind::Numeric(ref numeric) => {
167                let mut off = PiecesNumericOffset::from(numeric.to_offset()?);
168                if numeric.sign < C(0) {
169                    off = off.with_negative_zero();
170                }
171                Ok(PiecesOffset::from(off))
172            }
173        }
174    }
175
176    /// Whether this parsed offset corresponds to Zulu time or not.
177    ///
178    /// This is useful in error reporting for parsing civil times. Namely, we
179    /// report an error when parsing a civil time with a Zulu offset since it
180    /// is almost always the wrong thing to do.
181    pub(crate) fn is_zulu(&self) -> bool {
182        matches!(self.kind, ParsedOffsetKind::Zulu)
183    }
184
185    /// Whether the parsed offset had an explicit sub-minute component or not.
186    pub(crate) fn has_subminute(&self) -> bool {
187        let ParsedOffsetKind::Numeric(ref numeric) = self.kind else {
188            return false;
189        };
190        numeric.seconds.is_some()
191    }
192}
193
194/// The kind of a parsed offset.
195#[derive(Debug)]
196enum ParsedOffsetKind {
197    /// The zulu offset, corresponding to UTC in a context where the offset for
198    /// civil time is unknown or unavailable.
199    Zulu,
200    /// The specific numeric offset.
201    Numeric(Numeric),
202}
203
204/// A numeric representation of a UTC offset.
205struct Numeric {
206    /// The sign that was parsed from the numeric UTC offset. This is always
207    /// either `1` or `-1`, never `0`.
208    sign: t::Sign,
209    /// The hours component. This is non-optional because every UTC offset must
210    /// have at least hours.
211    hours: ParsedOffsetHours,
212    /// The minutes component.
213    minutes: Option<ParsedOffsetMinutes>,
214    /// The seconds component. This is only possible when subminute resolution
215    /// is enabled.
216    seconds: Option<ParsedOffsetSeconds>,
217    /// The nanoseconds fractional component. This is only possible when
218    /// subminute resolution is enabled.
219    nanoseconds: Option<t::SubsecNanosecond>,
220}
221
222impl Numeric {
223    /// Convert a parsed numeric offset into a Jiff offset.
224    ///
225    /// This does rounding based on the fractional nanosecond part. As a
226    /// result, if the parsed value would be rounded to a value not in bounds
227    /// for a Jiff offset, this returns an error.
228    fn to_offset(&self) -> Result<Offset, Error> {
229        let mut seconds = t::SpanZoneOffset::rfrom(C(3_600) * self.hours);
230        if let Some(part_minutes) = self.minutes {
231            seconds += C(60) * part_minutes;
232        }
233        if let Some(part_seconds) = self.seconds {
234            seconds += part_seconds;
235        }
236        if let Some(part_nanoseconds) = self.nanoseconds {
237            if part_nanoseconds >= C(500_000_000) {
238                seconds = seconds
239                    .try_checked_add("offset-seconds", C(1))
240                    .with_context(|| {
241                        err!(
242                            "due to precision loss, UTC offset '{}' is \
243                             rounded to a value that is out of bounds",
244                            self,
245                        )
246                    })?;
247            }
248        }
249        Ok(Offset::from_seconds_ranged(seconds * self.sign))
250    }
251}
252
253// This impl is just used for error messages when converting a `Numeric` to an
254// `Offset` fails.
255impl core::fmt::Display for Numeric {
256    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
257        if self.sign == C(-1) {
258            write!(f, "-")?;
259        } else {
260            write!(f, "+")?;
261        }
262        write!(f, "{:02}", self.hours)?;
263        if let Some(minutes) = self.minutes {
264            write!(f, ":{:02}", minutes)?;
265        }
266        if let Some(seconds) = self.seconds {
267            write!(f, ":{:02}", seconds)?;
268        }
269        if let Some(nanos) = self.nanoseconds {
270            static FMT: FractionalFormatter = FractionalFormatter::new();
271            write!(f, ".{}", FMT.format(i64::from(nanos)).as_str())?;
272        }
273        Ok(())
274    }
275}
276
277// We give a succinct Debug impl (identical to Display) to make snapshot
278// testing a bit nicer.
279impl core::fmt::Debug for Numeric {
280    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
281        core::fmt::Display::fmt(self, f)
282    }
283}
284
285/// A parser for UTC offsets.
286///
287/// At time of writing, the typical configuration for offset parsing is to
288/// enable Zulu support and subminute precision. But when parsing zoned
289/// datetimes, and specifically, offsets within time zone annotations (the RFC
290/// 9557 extension to RFC 3339), then neither zulu nor subminute support are
291/// enabled.
292///
293/// N.B. I'm not actually totally clear on why zulu/subminute aren't allowed in
294/// time zone annotations, but that's what Temporal's grammar seems to dictate.
295/// One might argue that this is what RFCs 3339 and 9557 require, but the
296/// Temporal grammar is already recognizing a superset anyway.
297#[derive(Debug)]
298pub(crate) struct Parser {
299    zulu: bool,
300    require_minute: bool,
301    require_second: bool,
302    subminute: bool,
303    subsecond: bool,
304    colon: Colon,
305}
306
307impl Parser {
308    /// Create a new UTC offset parser with the default configuration.
309    pub(crate) const fn new() -> Parser {
310        Parser {
311            zulu: true,
312            require_minute: false,
313            require_second: false,
314            subminute: true,
315            subsecond: true,
316            colon: Colon::Optional,
317        }
318    }
319
320    /// When enabled, the `z` and `Z` designators are recognized as a "zulu"
321    /// indicator for UTC when the civil time offset is unknown or unavailable.
322    ///
323    /// When disabled, neither `z` nor `Z` will be recognized and a parser
324    /// error will occur if one is found.
325    ///
326    /// This is enabled by default.
327    pub(crate) const fn zulu(self, yes: bool) -> Parser {
328        Parser { zulu: yes, ..self }
329    }
330
331    /// When enabled, the minute component of a time zone offset is required.
332    /// If no minutes are found, then an error is returned.
333    ///
334    /// This is disabled by default.
335    pub(crate) const fn require_minute(self, yes: bool) -> Parser {
336        Parser { require_minute: yes, ..self }
337    }
338
339    /// When enabled, the second component of a time zone offset is required.
340    /// If no seconds (or minutes) are found, then an error is returned.
341    ///
342    /// When `subminute` is disabled, this setting has no effect.
343    ///
344    /// This is disabled by default.
345    pub(crate) const fn require_second(self, yes: bool) -> Parser {
346        Parser { require_second: yes, ..self }
347    }
348
349    /// When enabled, offsets with precision greater than integral minutes
350    /// are supported. Specifically, when enabled, nanosecond precision is
351    /// supported.
352    ///
353    /// When disabled, offsets must be integral minutes. And the `subsecond`
354    /// option is ignored.
355    pub(crate) const fn subminute(self, yes: bool) -> Parser {
356        Parser { subminute: yes, ..self }
357    }
358
359    /// When enabled, offsets with precision greater than integral seconds
360    /// are supported. Specifically, when enabled, nanosecond precision is
361    /// supported. Note though that when a fractional second is found, it is
362    /// used to round to the nearest second. (Jiff's `Offset` type only has
363    /// second resolution.)
364    ///
365    /// When disabled, offsets must be integral seconds (or integrate minutes
366    /// if the `subminute` option is disabled as well).
367    ///
368    /// This is ignored if `subminute` is disabled.
369    pub(crate) const fn subsecond(self, yes: bool) -> Parser {
370        Parser { subsecond: yes, ..self }
371    }
372
373    /// Sets how to handle parsing of colons in a time zone offset.
374    ///
375    /// This is set to `Colon::Optional` by default.
376    pub(crate) const fn colon(self, colon: Colon) -> Parser {
377        Parser { colon, ..self }
378    }
379
380    /// Parse an offset from the beginning of `input`.
381    ///
382    /// If no offset could be found or it was otherwise invalid, then an error
383    /// is returned.
384    ///
385    /// In general, parsing stops when, after all required components are seen,
386    /// an optional component is not present (either because of the end of the
387    /// input or because of a character that cannot possibly begin said optional
388    /// component). This does mean that there are some corner cases where error
389    /// messages will not be as good as they possibly can be. But there are
390    /// two exceptions here:
391    ///
392    /// 1. When Zulu support is disabled and a `Z` or `z` are found, then an
393    /// error is returned indicating that `Z` was recognized but specifically
394    /// not allowed.
395    /// 2. When subminute precision is disabled and a `:` is found after the
396    /// minutes component, then an error is returned indicating that the
397    /// seconds component was recognized but specifically not allowed.
398    ///
399    /// Otherwise, for example, if `input` is `-0512:34`, then the `-0512`
400    /// will be parsed as `-5 hours, 12 minutes` with an offset of `5`.
401    /// Presumably, whatever higher level parser is invoking this routine will
402    /// then see an unexpected `:`. But it's likely that a better error message
403    /// would call out the fact that mixed basic and extended formats (from
404    /// ISO 8601) aren't allowed, and that the offset needs to be written as
405    /// either `-05:12:34` or `-051234`. But... these are odd corner cases, so
406    /// we abide them.
407    pub(crate) fn parse<'i>(
408        &self,
409        mut input: &'i [u8],
410    ) -> Result<Parsed<'i, ParsedOffset>, Error> {
411        if input.is_empty() {
412            return Err(err!("expected UTC offset, but found end of input"));
413        }
414
415        if input[0] == b'Z' || input[0] == b'z' {
416            if !self.zulu {
417                return Err(err!(
418                    "found {z:?} in {original:?} where a numeric UTC offset \
419                     was expected (this context does not permit \
420                     the Zulu offset)",
421                    z = escape::Byte(input[0]),
422                    original = escape::Bytes(input),
423                ));
424            }
425            input = &input[1..];
426            let value = ParsedOffset { kind: ParsedOffsetKind::Zulu };
427            return Ok(Parsed { value, input });
428        }
429        let Parsed { value: numeric, input } = self.parse_numeric(input)?;
430        let value = ParsedOffset { kind: ParsedOffsetKind::Numeric(numeric) };
431        Ok(Parsed { value, input })
432    }
433
434    /// Like `parse`, but will return `None` if `input` cannot possibly start
435    /// with an offset.
436    ///
437    /// Basically, if `input` is empty, or is not one of `z`, `Z`, `+` or `-`
438    /// then this returns `None`.
439    #[cfg_attr(feature = "perf-inline", inline(always))]
440    pub(crate) fn parse_optional<'i>(
441        &self,
442        input: &'i [u8],
443    ) -> Result<Parsed<'i, Option<ParsedOffset>>, Error> {
444        let Some(first) = input.first().copied() else {
445            return Ok(Parsed { value: None, input });
446        };
447        if !matches!(first, b'z' | b'Z' | b'+' | b'-') {
448            return Ok(Parsed { value: None, input });
449        }
450        let Parsed { value, input } = self.parse(input)?;
451        Ok(Parsed { value: Some(value), input })
452    }
453
454    /// Parses a numeric offset from the beginning of `input`.
455    ///
456    /// The beginning of the input is expected to start with a `+` or a `-`.
457    /// Any other case (including an empty string) will result in an error.
458    #[cfg_attr(feature = "perf-inline", inline(always))]
459    fn parse_numeric<'i>(
460        &self,
461        input: &'i [u8],
462    ) -> Result<Parsed<'i, Numeric>, Error> {
463        let original = escape::Bytes(input);
464
465        // Parse sign component.
466        let Parsed { value: sign, input } =
467            self.parse_sign(input).with_context(|| {
468                err!("failed to parse sign in UTC numeric offset {original:?}")
469            })?;
470
471        // Parse hours component.
472        let Parsed { value: hours, input } =
473            self.parse_hours(input).with_context(|| {
474                err!(
475                    "failed to parse hours in UTC numeric offset {original:?}"
476                )
477            })?;
478        let extended = match self.colon {
479            Colon::Optional => input.starts_with(b":"),
480            Colon::Required => {
481                if !input.is_empty() && !input.starts_with(b":") {
482                    return Err(err!(
483                        "parsed hour component of time zone offset from \
484                         {original:?}, but could not find required colon \
485                         separator",
486                    ));
487                }
488                true
489            }
490            Colon::Absent => {
491                if !input.is_empty() && input.starts_with(b":") {
492                    return Err(err!(
493                        "parsed hour component of time zone offset from \
494                         {original:?}, but found colon after hours which \
495                         is not allowed",
496                    ));
497                }
498                false
499            }
500        };
501
502        // Start building up our numeric offset value.
503        let mut numeric = Numeric {
504            sign,
505            hours,
506            minutes: None,
507            seconds: None,
508            nanoseconds: None,
509        };
510
511        // Parse optional separator after hours.
512        let Parsed { value: has_minutes, input } =
513            self.parse_separator(input, extended).with_context(|| {
514                err!(
515                    "failed to parse separator after hours in \
516                     UTC numeric offset {original:?}"
517                )
518            })?;
519        if !has_minutes {
520            if self.require_minute || (self.subminute && self.require_second) {
521                return Err(err!(
522                    "parsed hour component of time zone offset from \
523                     {original:?}, but could not find required minute \
524                     component",
525                ));
526            }
527            return Ok(Parsed { value: numeric, input });
528        }
529
530        // Parse minutes component.
531        let Parsed { value: minutes, input } =
532            self.parse_minutes(input).with_context(|| {
533                err!(
534                    "failed to parse minutes in UTC numeric offset \
535                     {original:?}"
536                )
537            })?;
538        numeric.minutes = Some(minutes);
539
540        // If subminute resolution is not supported, then we're done here.
541        if !self.subminute {
542            // While we generally try to "stop" parsing once we're done
543            // seeing things we expect, in this case, if we see a colon, it
544            // almost certainly indicates that someone has tried to provide
545            // more precision than is supported. So we return an error here.
546            // If this winds up being problematic, we can make this error
547            // configurable or remove it altogether (unfortunate).
548            if input.get(0).map_or(false, |&b| b == b':') {
549                return Err(err!(
550                    "subminute precision for UTC numeric offset {original:?} \
551                     is not enabled in this context (must provide only \
552                     integral minutes)",
553                ));
554            }
555            return Ok(Parsed { value: numeric, input });
556        }
557
558        // Parse optional separator after minutes.
559        let Parsed { value: has_seconds, input } =
560            self.parse_separator(input, extended).with_context(|| {
561                err!(
562                    "failed to parse separator after minutes in \
563                     UTC numeric offset {original:?}"
564                )
565            })?;
566        if !has_seconds {
567            if self.require_second {
568                return Err(err!(
569                    "parsed hour and minute components of time zone offset \
570                     from {original:?}, but could not find required second \
571                     component",
572                ));
573            }
574            return Ok(Parsed { value: numeric, input });
575        }
576
577        // Parse seconds component.
578        let Parsed { value: seconds, input } =
579            self.parse_seconds(input).with_context(|| {
580                err!(
581                    "failed to parse seconds in UTC numeric offset \
582                     {original:?}"
583                )
584            })?;
585        numeric.seconds = Some(seconds);
586
587        // If subsecond resolution is not supported, then we're done here.
588        if !self.subsecond {
589            if input.get(0).map_or(false, |&b| b == b'.' || b == b',') {
590                return Err(err!(
591                    "subsecond precision for UTC numeric offset {original:?} \
592                     is not enabled in this context (must provide only \
593                     integral minutes or seconds)",
594                ));
595            }
596            return Ok(Parsed { value: numeric, input });
597        }
598
599        // Parse an optional fractional component.
600        let Parsed { value: nanoseconds, input } =
601            parse_temporal_fraction(input).with_context(|| {
602                err!(
603                    "failed to parse fractional nanoseconds in \
604                     UTC numeric offset {original:?}",
605                )
606            })?;
607        numeric.nanoseconds = nanoseconds;
608        Ok(Parsed { value: numeric, input })
609    }
610
611    #[cfg_attr(feature = "perf-inline", inline(always))]
612    fn parse_sign<'i>(
613        &self,
614        input: &'i [u8],
615    ) -> Result<Parsed<'i, t::Sign>, Error> {
616        let sign = input.get(0).copied().ok_or_else(|| {
617            err!("expected UTC numeric offset, but found end of input")
618        })?;
619        let sign = if sign == b'+' {
620            t::Sign::N::<1>()
621        } else if sign == b'-' {
622            t::Sign::N::<-1>()
623        } else {
624            return Err(err!(
625                "expected '+' or '-' sign at start of UTC numeric offset, \
626                 but found {found:?} instead",
627                found = escape::Byte(sign),
628            ));
629        };
630        Ok(Parsed { value: sign, input: &input[1..] })
631    }
632
633    #[cfg_attr(feature = "perf-inline", inline(always))]
634    fn parse_hours<'i>(
635        &self,
636        input: &'i [u8],
637    ) -> Result<Parsed<'i, ParsedOffsetHours>, Error> {
638        let (hours, input) = parse::split(input, 2).ok_or_else(|| {
639            err!("expected two digit hour after sign, but found end of input",)
640        })?;
641        let hours = parse::i64(hours).with_context(|| {
642            err!(
643                "failed to parse {hours:?} as hours (a two digit integer)",
644                hours = escape::Bytes(hours),
645            )
646        })?;
647        // Note that we support a slightly bigger range of offsets than
648        // Temporal. Temporal seems to support only up to 23 hours, but
649        // we go up to 25 hours. This is done to support POSIX time zone
650        // strings, which also require 25 hours (plus the maximal minute/second
651        // components).
652        let hours = ParsedOffsetHours::try_new("hours", hours)
653            .context("offset hours are not valid")?;
654        Ok(Parsed { value: hours, input })
655    }
656
657    #[cfg_attr(feature = "perf-inline", inline(always))]
658    fn parse_minutes<'i>(
659        &self,
660        input: &'i [u8],
661    ) -> Result<Parsed<'i, ParsedOffsetMinutes>, Error> {
662        let (minutes, input) = parse::split(input, 2).ok_or_else(|| {
663            err!(
664                "expected two digit minute after hours, \
665                 but found end of input",
666            )
667        })?;
668        let minutes = parse::i64(minutes).with_context(|| {
669            err!(
670                "failed to parse {minutes:?} as minutes (a two digit integer)",
671                minutes = escape::Bytes(minutes),
672            )
673        })?;
674        let minutes = ParsedOffsetMinutes::try_new("minutes", minutes)
675            .context("minutes are not valid")?;
676        Ok(Parsed { value: minutes, input })
677    }
678
679    #[cfg_attr(feature = "perf-inline", inline(always))]
680    fn parse_seconds<'i>(
681        &self,
682        input: &'i [u8],
683    ) -> Result<Parsed<'i, ParsedOffsetSeconds>, Error> {
684        let (seconds, input) = parse::split(input, 2).ok_or_else(|| {
685            err!(
686                "expected two digit second after hours, \
687                 but found end of input",
688            )
689        })?;
690        let seconds = parse::i64(seconds).with_context(|| {
691            err!(
692                "failed to parse {seconds:?} as seconds (a two digit integer)",
693                seconds = escape::Bytes(seconds),
694            )
695        })?;
696        let seconds = ParsedOffsetSeconds::try_new("seconds", seconds)
697            .context("time zone offset seconds are not valid")?;
698        Ok(Parsed { value: seconds, input })
699    }
700
701    /// Parses a separator between hours/minutes or minutes/seconds. When
702    /// `true` is returned, we expect to parse the next component. When `false`
703    /// is returned, then no separator was found and there is no expectation of
704    /// finding another component.
705    ///
706    /// When in extended mode, true is returned if and only if a separator is
707    /// found.
708    ///
709    /// When in basic mode (not extended), then a subsequent component is only
710    /// expected when `input` begins with two ASCII digits.
711    #[cfg_attr(feature = "perf-inline", inline(always))]
712    fn parse_separator<'i>(
713        &self,
714        mut input: &'i [u8],
715        extended: bool,
716    ) -> Result<Parsed<'i, bool>, Error> {
717        if !extended {
718            let expected =
719                input.len() >= 2 && input[..2].iter().all(u8::is_ascii_digit);
720            return Ok(Parsed { value: expected, input });
721        }
722        let is_separator = input.get(0).map_or(false, |&b| b == b':');
723        if is_separator {
724            input = &input[1..];
725        }
726        Ok(Parsed { value: is_separator, input })
727    }
728}
729
730/// How to handle parsing of colons in a time zone offset.
731#[derive(Debug)]
732pub(crate) enum Colon {
733    /// Colons may be present or not. When present, colons must be used
734    /// consistently. For example, `+05:3015` and `-0530:15` are not allowed.
735    Optional,
736    /// Colons must be present.
737    Required,
738    /// Colons must be absent.
739    Absent,
740}
741
742#[cfg(test)]
743mod tests {
744    use crate::util::rangeint::RInto;
745
746    use super::*;
747
748    #[test]
749    fn ok_zulu() {
750        let p = |input| Parser::new().parse(input).unwrap();
751
752        insta::assert_debug_snapshot!(p(b"Z"), @r###"
753        Parsed {
754            value: ParsedOffset {
755                kind: Zulu,
756            },
757            input: "",
758        }
759        "###);
760        insta::assert_debug_snapshot!(p(b"z"), @r###"
761        Parsed {
762            value: ParsedOffset {
763                kind: Zulu,
764            },
765            input: "",
766        }
767        "###);
768    }
769
770    #[test]
771    fn ok_numeric() {
772        let p = |input| Parser::new().parse(input).unwrap();
773
774        insta::assert_debug_snapshot!(p(b"-05"), @r###"
775        Parsed {
776            value: ParsedOffset {
777                kind: Numeric(
778                    -05,
779                ),
780            },
781            input: "",
782        }
783        "###);
784    }
785
786    // Successful parse tests where the offset ends at the end of the string.
787    #[test]
788    fn ok_numeric_complete() {
789        let p = |input| Parser::new().parse_numeric(input).unwrap();
790
791        insta::assert_debug_snapshot!(p(b"-05"), @r###"
792        Parsed {
793            value: -05,
794            input: "",
795        }
796        "###);
797        insta::assert_debug_snapshot!(p(b"+05"), @r###"
798        Parsed {
799            value: +05,
800            input: "",
801        }
802        "###);
803
804        insta::assert_debug_snapshot!(p(b"+25:59"), @r###"
805        Parsed {
806            value: +25:59,
807            input: "",
808        }
809        "###);
810        insta::assert_debug_snapshot!(p(b"+2559"), @r###"
811        Parsed {
812            value: +25:59,
813            input: "",
814        }
815        "###);
816
817        insta::assert_debug_snapshot!(p(b"+25:59:59"), @r###"
818        Parsed {
819            value: +25:59:59,
820            input: "",
821        }
822        "###);
823        insta::assert_debug_snapshot!(p(b"+255959"), @r###"
824        Parsed {
825            value: +25:59:59,
826            input: "",
827        }
828        "###);
829
830        insta::assert_debug_snapshot!(p(b"+25:59:59.999"), @r###"
831        Parsed {
832            value: +25:59:59.999,
833            input: "",
834        }
835        "###);
836        insta::assert_debug_snapshot!(p(b"+25:59:59,999"), @r###"
837        Parsed {
838            value: +25:59:59.999,
839            input: "",
840        }
841        "###);
842        insta::assert_debug_snapshot!(p(b"+255959.999"), @r###"
843        Parsed {
844            value: +25:59:59.999,
845            input: "",
846        }
847        "###);
848        insta::assert_debug_snapshot!(p(b"+255959,999"), @r###"
849        Parsed {
850            value: +25:59:59.999,
851            input: "",
852        }
853        "###);
854
855        insta::assert_debug_snapshot!(p(b"+25:59:59.999999999"), @r###"
856        Parsed {
857            value: +25:59:59.999999999,
858            input: "",
859        }
860        "###);
861    }
862
863    // Successful parse tests where the offset ends before the end of the
864    // string.
865    #[test]
866    fn ok_numeric_incomplete() {
867        let p = |input| Parser::new().parse_numeric(input).unwrap();
868
869        insta::assert_debug_snapshot!(p(b"-05a"), @r###"
870        Parsed {
871            value: -05,
872            input: "a",
873        }
874        "###);
875        insta::assert_debug_snapshot!(p(b"-05:12a"), @r###"
876        Parsed {
877            value: -05:12,
878            input: "a",
879        }
880        "###);
881        insta::assert_debug_snapshot!(p(b"-05:12."), @r###"
882        Parsed {
883            value: -05:12,
884            input: ".",
885        }
886        "###);
887        insta::assert_debug_snapshot!(p(b"-05:12,"), @r###"
888        Parsed {
889            value: -05:12,
890            input: ",",
891        }
892        "###);
893        insta::assert_debug_snapshot!(p(b"-0512a"), @r###"
894        Parsed {
895            value: -05:12,
896            input: "a",
897        }
898        "###);
899        insta::assert_debug_snapshot!(p(b"-0512:"), @r###"
900        Parsed {
901            value: -05:12,
902            input: ":",
903        }
904        "###);
905        insta::assert_debug_snapshot!(p(b"-05:12:34a"), @r###"
906        Parsed {
907            value: -05:12:34,
908            input: "a",
909        }
910        "###);
911        insta::assert_debug_snapshot!(p(b"-05:12:34.9a"), @r###"
912        Parsed {
913            value: -05:12:34.9,
914            input: "a",
915        }
916        "###);
917        insta::assert_debug_snapshot!(p(b"-05:12:34.9."), @r###"
918        Parsed {
919            value: -05:12:34.9,
920            input: ".",
921        }
922        "###);
923        insta::assert_debug_snapshot!(p(b"-05:12:34.9,"), @r###"
924        Parsed {
925            value: -05:12:34.9,
926            input: ",",
927        }
928        "###);
929    }
930
931    // An empty string is invalid. The parser is written from the perspective
932    // that if it's called, then the caller expects a numeric UTC offset at
933    // that position.
934    #[test]
935    fn err_numeric_empty() {
936        insta::assert_snapshot!(
937            Parser::new().parse_numeric(b"").unwrap_err(),
938            @r###"failed to parse sign in UTC numeric offset "": expected UTC numeric offset, but found end of input"###,
939        );
940    }
941
942    // A numeric offset always has to begin with a '+' or a '-'.
943    #[test]
944    fn err_numeric_notsign() {
945        insta::assert_snapshot!(
946            Parser::new().parse_numeric(b"*").unwrap_err(),
947            @r###"failed to parse sign in UTC numeric offset "*": expected '+' or '-' sign at start of UTC numeric offset, but found "*" instead"###,
948        );
949    }
950
951    // The hours component must be at least two bytes.
952    #[test]
953    fn err_numeric_hours_too_short() {
954        insta::assert_snapshot!(
955            Parser::new().parse_numeric(b"+a").unwrap_err(),
956            @r###"failed to parse hours in UTC numeric offset "+a": expected two digit hour after sign, but found end of input"###,
957        );
958    }
959
960    // The hours component must be at least two ASCII digits.
961    #[test]
962    fn err_numeric_hours_invalid_digits() {
963        insta::assert_snapshot!(
964            Parser::new().parse_numeric(b"+ab").unwrap_err(),
965            @r###"failed to parse hours in UTC numeric offset "+ab": failed to parse "ab" as hours (a two digit integer): invalid digit, expected 0-9 but got a"###,
966        );
967    }
968
969    // The hours component must be in range.
970    #[test]
971    fn err_numeric_hours_out_of_range() {
972        insta::assert_snapshot!(
973            Parser::new().parse_numeric(b"-26").unwrap_err(),
974            @r###"failed to parse hours in UTC numeric offset "-26": offset hours are not valid: parameter 'hours' with value 26 is not in the required range of 0..=25"###,
975        );
976    }
977
978    // The minutes component must be at least two bytes.
979    #[test]
980    fn err_numeric_minutes_too_short() {
981        insta::assert_snapshot!(
982            Parser::new().parse_numeric(b"+05:a").unwrap_err(),
983            @r###"failed to parse minutes in UTC numeric offset "+05:a": expected two digit minute after hours, but found end of input"###,
984        );
985    }
986
987    // The minutes component must be at least two ASCII digits.
988    #[test]
989    fn err_numeric_minutes_invalid_digits() {
990        insta::assert_snapshot!(
991            Parser::new().parse_numeric(b"+05:ab").unwrap_err(),
992            @r###"failed to parse minutes in UTC numeric offset "+05:ab": failed to parse "ab" as minutes (a two digit integer): invalid digit, expected 0-9 but got a"###,
993        );
994    }
995
996    // The minutes component must be in range.
997    #[test]
998    fn err_numeric_minutes_out_of_range() {
999        insta::assert_snapshot!(
1000            Parser::new().parse_numeric(b"-05:60").unwrap_err(),
1001            @r###"failed to parse minutes in UTC numeric offset "-05:60": minutes are not valid: parameter 'minutes' with value 60 is not in the required range of 0..=59"###,
1002        );
1003    }
1004
1005    // The seconds component must be at least two bytes.
1006    #[test]
1007    fn err_numeric_seconds_too_short() {
1008        insta::assert_snapshot!(
1009            Parser::new().parse_numeric(b"+05:30:a").unwrap_err(),
1010            @r###"failed to parse seconds in UTC numeric offset "+05:30:a": expected two digit second after hours, but found end of input"###,
1011        );
1012    }
1013
1014    // The seconds component must be at least two ASCII digits.
1015    #[test]
1016    fn err_numeric_seconds_invalid_digits() {
1017        insta::assert_snapshot!(
1018            Parser::new().parse_numeric(b"+05:30:ab").unwrap_err(),
1019            @r###"failed to parse seconds in UTC numeric offset "+05:30:ab": failed to parse "ab" as seconds (a two digit integer): invalid digit, expected 0-9 but got a"###,
1020        );
1021    }
1022
1023    // The seconds component must be in range.
1024    #[test]
1025    fn err_numeric_seconds_out_of_range() {
1026        insta::assert_snapshot!(
1027            Parser::new().parse_numeric(b"-05:30:60").unwrap_err(),
1028            @r###"failed to parse seconds in UTC numeric offset "-05:30:60": time zone offset seconds are not valid: parameter 'seconds' with value 60 is not in the required range of 0..=59"###,
1029        );
1030    }
1031
1032    // The fraction component, if present as indicated by a separator, must be
1033    // non-empty.
1034    #[test]
1035    fn err_numeric_fraction_non_empty() {
1036        insta::assert_snapshot!(
1037            Parser::new().parse_numeric(b"-05:30:44.").unwrap_err(),
1038            @r###"failed to parse fractional nanoseconds in UTC numeric offset "-05:30:44.": found decimal after seconds component, but did not find any decimal digits after decimal"###,
1039        );
1040        insta::assert_snapshot!(
1041            Parser::new().parse_numeric(b"-05:30:44,").unwrap_err(),
1042            @r###"failed to parse fractional nanoseconds in UTC numeric offset "-05:30:44,": found decimal after seconds component, but did not find any decimal digits after decimal"###,
1043        );
1044
1045        // Instead of end-of-string, add invalid digit.
1046        insta::assert_snapshot!(
1047            Parser::new().parse_numeric(b"-05:30:44.a").unwrap_err(),
1048            @r###"failed to parse fractional nanoseconds in UTC numeric offset "-05:30:44.a": found decimal after seconds component, but did not find any decimal digits after decimal"###,
1049        );
1050        insta::assert_snapshot!(
1051            Parser::new().parse_numeric(b"-05:30:44,a").unwrap_err(),
1052            @r###"failed to parse fractional nanoseconds in UTC numeric offset "-05:30:44,a": found decimal after seconds component, but did not find any decimal digits after decimal"###,
1053        );
1054
1055        // And also test basic format.
1056        insta::assert_snapshot!(
1057            Parser::new().parse_numeric(b"-053044.a").unwrap_err(),
1058            @r###"failed to parse fractional nanoseconds in UTC numeric offset "-053044.a": found decimal after seconds component, but did not find any decimal digits after decimal"###,
1059        );
1060        insta::assert_snapshot!(
1061            Parser::new().parse_numeric(b"-053044,a").unwrap_err(),
1062            @r###"failed to parse fractional nanoseconds in UTC numeric offset "-053044,a": found decimal after seconds component, but did not find any decimal digits after decimal"###,
1063        );
1064    }
1065
1066    // A special case where it is clear that sub-minute precision has been
1067    // requested, but that it is has been forcefully disabled. This error is
1068    // meant to make what is likely a subtle failure mode more explicit.
1069    #[test]
1070    fn err_numeric_subminute_disabled_but_desired() {
1071        insta::assert_snapshot!(
1072            Parser::new().subminute(false).parse_numeric(b"-05:59:32").unwrap_err(),
1073            @r###"subminute precision for UTC numeric offset "-05:59:32" is not enabled in this context (must provide only integral minutes)"###,
1074        );
1075    }
1076
1077    // Another special case where Zulu parsing has been explicitly disabled,
1078    // but a Zulu string was found.
1079    #[test]
1080    fn err_zulu_disabled_but_desired() {
1081        insta::assert_snapshot!(
1082            Parser::new().zulu(false).parse(b"Z").unwrap_err(),
1083            @r###"found "Z" in "Z" where a numeric UTC offset was expected (this context does not permit the Zulu offset)"###,
1084        );
1085        insta::assert_snapshot!(
1086            Parser::new().zulu(false).parse(b"z").unwrap_err(),
1087            @r###"found "z" in "z" where a numeric UTC offset was expected (this context does not permit the Zulu offset)"###,
1088        );
1089    }
1090
1091    // Once a `Numeric` has been parsed, it is almost possible to assume that
1092    // it can be infallibly converted to an `Offset`. The one case where this
1093    // isn't true is when there is a fractional nanosecond part along with
1094    // maximal
1095    #[test]
1096    fn err_numeric_too_big_for_offset() {
1097        let numeric = Numeric {
1098            sign: t::Sign::MAX_SELF,
1099            hours: ParsedOffsetHours::MAX_SELF,
1100            minutes: Some(ParsedOffsetMinutes::MAX_SELF),
1101            seconds: Some(ParsedOffsetSeconds::MAX_SELF),
1102            nanoseconds: Some(C(499_999_999).rinto()),
1103        };
1104        assert_eq!(numeric.to_offset().unwrap(), Offset::MAX);
1105
1106        let numeric = Numeric {
1107            sign: t::Sign::MAX_SELF,
1108            hours: ParsedOffsetHours::MAX_SELF,
1109            minutes: Some(ParsedOffsetMinutes::MAX_SELF),
1110            seconds: Some(ParsedOffsetSeconds::MAX_SELF),
1111            nanoseconds: Some(C(500_000_000).rinto()),
1112        };
1113        insta::assert_snapshot!(
1114            numeric.to_offset().unwrap_err(),
1115            @"due to precision loss, UTC offset '+25:59:59.5' is rounded to a value that is out of bounds: parameter 'offset-seconds' with value 1 is not in the required range of -93599..=93599",
1116        );
1117    }
1118
1119    // Same as numeric_too_big_for_offset, but at the minimum boundary.
1120    #[test]
1121    fn err_numeric_too_small_for_offset() {
1122        let numeric = Numeric {
1123            sign: t::Sign::MIN_SELF,
1124            hours: ParsedOffsetHours::MAX_SELF,
1125            minutes: Some(ParsedOffsetMinutes::MAX_SELF),
1126            seconds: Some(ParsedOffsetSeconds::MAX_SELF),
1127            nanoseconds: Some(C(499_999_999).rinto()),
1128        };
1129        assert_eq!(numeric.to_offset().unwrap(), Offset::MIN);
1130
1131        let numeric = Numeric {
1132            sign: t::Sign::MIN_SELF,
1133            hours: ParsedOffsetHours::MAX_SELF,
1134            minutes: Some(ParsedOffsetMinutes::MAX_SELF),
1135            seconds: Some(ParsedOffsetSeconds::MAX_SELF),
1136            nanoseconds: Some(C(500_000_000).rinto()),
1137        };
1138        insta::assert_snapshot!(
1139            numeric.to_offset().unwrap_err(),
1140            @"due to precision loss, UTC offset '-25:59:59.5' is rounded to a value that is out of bounds: parameter 'offset-seconds' with value 1 is not in the required range of -93599..=93599",
1141        );
1142    }
1143}