jiff/tz/
tzif.rs

1/*!
2This module provides support for TZif binary files from the [Time Zone
3Database].
4
5These binary files are the ones commonly found in Unix distributions in the
6`/usr/share/zoneinfo` directory.
7
8[Time Zone Database]: https://www.iana.org/time-zones
9*/
10
11use core::ops::Range;
12
13#[cfg(feature = "alloc")]
14use alloc::{string::String, vec::Vec};
15
16use crate::{
17    civil::DateTime,
18    error::Error,
19    shared::{self, util::array_str::Abbreviation},
20    timestamp::Timestamp,
21    tz::{
22        posix::PosixTimeZone, timezone::TimeZoneAbbreviation, AmbiguousOffset,
23        Dst, Offset, TimeZoneOffsetInfo, TimeZoneTransition,
24    },
25};
26
27/// The owned variant of `Tzif`.
28#[cfg(feature = "alloc")]
29pub(crate) type TzifOwned = Tzif<
30    String,
31    Abbreviation,
32    Vec<shared::TzifLocalTimeType>,
33    Vec<i64>,
34    Vec<shared::TzifDateTime>,
35    Vec<shared::TzifDateTime>,
36    Vec<shared::TzifTransitionInfo>,
37>;
38
39/// The static variant of `Tzif`.
40pub(crate) type TzifStatic = Tzif<
41    &'static str,
42    &'static str,
43    &'static [shared::TzifLocalTimeType],
44    &'static [i64],
45    &'static [shared::TzifDateTime],
46    &'static [shared::TzifDateTime],
47    &'static [shared::TzifTransitionInfo],
48>;
49
50/// A time zone based on IANA TZif formatted data.
51///
52/// TZif is a binary format described by RFC 8536. Its typical structure is to
53/// define a single time zone per file in the `/usr/share/zoneinfo` directory
54/// on Unix systems. The name of a time zone is its file path with the
55/// `/usr/share/zoneinfo/` prefix stripped from it.
56///
57/// This type doesn't provide any facilities for dealing with files on disk
58/// or the `/usr/share/zoneinfo` directory. This type is just for parsing the
59/// contents of TZif formatted data in memory, and turning it into a data type
60/// that can be used as a time zone.
61#[derive(Debug)]
62// not part of Jiff's public API
63#[doc(hidden)]
64// This ensures the alignment of this type is always *at least* 8 bytes. This
65// is required for the pointer tagging inside of `TimeZone` to be sound. At
66// time of writing (2024-02-24), this explicit `repr` isn't required on 64-bit
67// systems since the type definition is such that it will have an alignment of
68// at least 8 bytes anyway. But this *is* required for 32-bit systems, where
69// the type definition at present only has an alignment of 4 bytes.
70#[repr(align(8))]
71pub struct Tzif<STR, ABBREV, TYPES, TIMESTAMPS, STARTS, ENDS, INFOS> {
72    inner: shared::Tzif<STR, ABBREV, TYPES, TIMESTAMPS, STARTS, ENDS, INFOS>,
73    /// The POSIX time zone for this TZif data, if present.
74    ///
75    /// Note that this is also present on `shared::Tzif`, but uses the
76    /// `shared::PosixTimeZone` type, which isn't quite what we want here.
77    ///
78    /// For now we just duplicate it, which is slightly unfortunate. But this
79    /// is small and not a huge deal. Ideally we can clean this up later.
80    posix_tz: Option<PosixTimeZone<ABBREV>>,
81}
82
83impl TzifStatic {
84    /// Converts from the shared-but-internal API for use in proc macros.
85    ///
86    /// This specifically works in a `const` context. And it requires that
87    /// caller to pass in the parsed `Tzif` in its fixed form along with the
88    /// variable length local time types and transitions. (Technically, the
89    /// TZ identifier and the designations are also variable length despite
90    /// being parsed of `TzifFixed`, but in practice they can be handled just
91    /// fine via `&'static str`.)
92    ///
93    /// Notice that the `types` and `transitions` are *not* from the `shared`
94    /// API, but rather, from the types defined in this module. They have to
95    /// be this way because there's a conversion step that occurs. In practice,
96    /// this sort of thing is embedded as a literal in source code via a proc
97    /// macro. Like this:
98    ///
99    /// ```text
100    /// static TZIF: Tzif<&str, &str, &[LocalTimeType], &[Transition]> =
101    ///     Tzif::from_shared_const(
102    ///         shared::TzifFixed {
103    ///             name: Some("America/New_York"),
104    ///             version: b'3',
105    ///             checksum: 0xDEADBEEF,
106    ///             designations: "ESTEDT",
107    ///             posix_tz: None,
108    ///         },
109    ///         &[
110    ///             shared::TzifLocalTimeType {
111    ///                 offset: -5 * 60 * 60,
112    ///                 is_dst: false,
113    ///                 designation: 0..3,
114    ///                 indicator: shared::TzifIndicator::LocalWall,
115    ///             }.to_jiff(),
116    ///         ],
117    ///         &[
118    ///             shared::TzifTransition {
119    ///                 timestamp: 123456789,
120    ///                 type_index: 0,
121    ///             }.to_jiff(-5, -5),
122    ///         ],
123    ///     );
124    /// ```
125    ///
126    /// Or something like that anyway. The point is, our `static` slices are
127    /// variable length and they need to be the right types. At least, I
128    /// couldn't see a simpler way to arrange this.
129    pub(crate) const fn from_shared_const(
130        sh: shared::TzifStatic,
131    ) -> TzifStatic {
132        let posix_tz = match sh.fixed.posix_tz {
133            None => None,
134            Some(posix_tz) => Some(PosixTimeZone::from_shared_const(posix_tz)),
135        };
136        Tzif { inner: sh, posix_tz }
137    }
138}
139
140#[cfg(feature = "alloc")]
141impl TzifOwned {
142    /// Parses the given data as a TZif formatted file.
143    ///
144    /// The name given is attached to the `Tzif` value returned, but is
145    /// otherwise not significant.
146    ///
147    /// If the given data is not recognized to be valid TZif, then an error is
148    /// returned.
149    ///
150    /// In general, callers may assume that it is safe to pass arbitrary or
151    /// even untrusted data to this function and count on it not panicking
152    /// or using resources that aren't limited to a small constant factor of
153    /// the size of the data itself. That is, callers can reliably limit the
154    /// resources used by limiting the size of the data given to this parse
155    /// function.
156    pub(crate) fn parse(
157        name: Option<String>,
158        bytes: &[u8],
159    ) -> Result<Self, Error> {
160        let sh =
161            shared::TzifOwned::parse(name, bytes).map_err(Error::shared)?;
162        Ok(TzifOwned::from_shared_owned(sh))
163    }
164
165    /// Converts from the shared-but-internal API for use in proc macros.
166    ///
167    /// This is not `const` since it accepts owned values on the heap for
168    /// variable length data inside `Tzif`.
169    pub(crate) fn from_shared_owned(sh: shared::TzifOwned) -> TzifOwned {
170        let posix_tz = match sh.fixed.posix_tz {
171            None => None,
172            Some(posix_tz) => Some(PosixTimeZone::from_shared_owned(posix_tz)),
173        };
174        Tzif { inner: sh, posix_tz }
175    }
176}
177
178impl<
179        STR: AsRef<str>,
180        ABBREV: AsRef<str>,
181        TYPES: AsRef<[shared::TzifLocalTimeType]>,
182        TIMESTAMPS: AsRef<[i64]>,
183        STARTS: AsRef<[shared::TzifDateTime]>,
184        ENDS: AsRef<[shared::TzifDateTime]>,
185        INFOS: AsRef<[shared::TzifTransitionInfo]>,
186    > Tzif<STR, ABBREV, TYPES, TIMESTAMPS, STARTS, ENDS, INFOS>
187{
188    /// Returns the name given to this TZif data in its constructor.
189    pub(crate) fn name(&self) -> Option<&str> {
190        self.inner.fixed.name.as_ref().map(|n| n.as_ref())
191    }
192
193    /// Returns the appropriate time zone offset to use for the given
194    /// timestamp.
195    pub(crate) fn to_offset(&self, timestamp: Timestamp) -> Offset {
196        match self.to_local_time_type(timestamp) {
197            Ok(typ) => Offset::from_seconds_unchecked(typ.offset),
198            Err(tz) => tz.to_offset(timestamp),
199        }
200    }
201
202    /// Returns the appropriate time zone offset to use for the given
203    /// timestamp.
204    ///
205    /// This also includes whether the offset returned should be considered to
206    /// be DST or not, along with the time zone abbreviation (e.g., EST for
207    /// standard time in New York, and EDT for DST in New York).
208    pub(crate) fn to_offset_info(
209        &self,
210        timestamp: Timestamp,
211    ) -> TimeZoneOffsetInfo<'_> {
212        let typ = match self.to_local_time_type(timestamp) {
213            Ok(typ) => typ,
214            Err(tz) => return tz.to_offset_info(timestamp),
215        };
216        let abbreviation =
217            TimeZoneAbbreviation::Borrowed(self.designation(typ));
218        TimeZoneOffsetInfo {
219            offset: Offset::from_seconds_unchecked(typ.offset),
220            dst: Dst::from(typ.is_dst),
221            abbreviation,
222        }
223    }
224
225    /// Returns the local time type for the timestamp given.
226    ///
227    /// If one could not be found, then this implies that the caller should
228    /// use the POSIX time zone returned in the error variant.
229    fn to_local_time_type(
230        &self,
231        timestamp: Timestamp,
232    ) -> Result<&shared::TzifLocalTimeType, &PosixTimeZone<ABBREV>> {
233        let timestamp = timestamp.as_second();
234        // This is guaranteed because we always push at least one transition.
235        // This isn't guaranteed by TZif since it might have 0 transitions,
236        // but we always add a "dummy" first transition with our minimum
237        // `Timestamp` value. TZif doesn't do this because there is no
238        // universal minimum timestamp. (`i64::MIN` is a candidate, but that's
239        // likely to cause overflow in readers that don't do error checking.)
240        //
241        // The result of the dummy transition is that the code below is simpler
242        // with fewer special cases.
243        let timestamps = self.timestamps();
244        assert!(!timestamps.is_empty(), "transitions is non-empty");
245        let index = if timestamp > *timestamps.last().unwrap() {
246            timestamps.len() - 1
247        } else {
248            let search = self.timestamps().binary_search(&timestamp);
249            match search {
250                // Since the first transition is always Timestamp::MIN, it's
251                // impossible for any timestamp to sort before it.
252                Err(0) => {
253                    unreachable!("impossible to come before Timestamp::MIN")
254                }
255                Ok(i) => i,
256                // i points to the position immediately after the matching
257                // timestamp. And since we know that i>0 because of the i==0
258                // check above, we can safely subtract 1.
259                Err(i) => i.checked_sub(1).expect("i is non-zero"),
260            }
261        };
262        // Our index is always in bounds. The only way it couldn't be is if
263        // binary search returns an Err(len) for a time greater than the
264        // maximum transition. But we account for that above by converting
265        // Err(len) to Err(len-1).
266        debug_assert!(index < timestamps.len());
267        // RFC 8536 says: "Local time for timestamps on or after the last
268        // transition is specified by the TZ string in the footer (Section 3.3)
269        // if present and nonempty; otherwise, it is unspecified."
270        //
271        // Subtracting 1 is OK because we know self.transitions is not empty.
272        let index = if index < timestamps.len() - 1 {
273            // This is the typical case in "fat" TZif files: we found a
274            // matching transition.
275            index
276        } else {
277            match self.posix_tz() {
278                // This is the typical case in "slim" TZif files, where the
279                // last transition is, as I understand it, the transition at
280                // which a consistent rule started that a POSIX TZ string can
281                // fully describe. For example, (as of 2024-03-27) the last
282                // transition in the "fat" America/New_York TZif file is
283                // in 2037, where as in the "slim" version it is 2007.
284                //
285                // This is likely why some things break with the "slim"
286                // version: they don't support POSIX TZ strings (or don't
287                // support them correctly).
288                Some(tz) => return Err(tz),
289                // This case is technically unspecified, but I think the
290                // typical thing to do is to just use the last transition.
291                // I'm not 100% sure on this one.
292                None => index,
293            }
294        };
295        Ok(self.local_time_type(index))
296    }
297
298    /// Returns a possibly ambiguous timestamp for the given civil datetime.
299    ///
300    /// The given datetime should correspond to the "wall" clock time of what
301    /// humans use to tell time for this time zone.
302    ///
303    /// Note that "ambiguous timestamp" is represented by the possible
304    /// selection of offsets that could be applied to the given datetime. In
305    /// general, it is only ambiguous around transitions to-and-from DST. The
306    /// ambiguity can arise as a "fold" (when a particular wall clock time is
307    /// repeated) or as a "gap" (when a particular wall clock time is skipped
308    /// entirely).
309    pub(crate) fn to_ambiguous_kind(&self, dt: DateTime) -> AmbiguousOffset {
310        // This implementation very nearly mirrors `to_local_time_type`
311        // above in the beginning: we do a binary search to find transition
312        // applicable for the given datetime. Except, we do it on wall clock
313        // times instead of timestamps. And in particular, each transition
314        // begins with a possibly ambiguous range of wall clock times
315        // corresponding to either a "gap" or "fold" in time.
316        let dtt = shared::TzifDateTime::new(
317            dt.year(),
318            dt.month(),
319            dt.day(),
320            dt.hour(),
321            dt.minute(),
322            dt.second(),
323        );
324        let (starts, ends) = (self.civil_starts(), self.civil_ends());
325        assert!(!starts.is_empty(), "transitions is non-empty");
326        let this_index = match starts.binary_search(&dtt) {
327            Err(0) => unreachable!("impossible to come before DateTime::MIN"),
328            Ok(i) => i,
329            Err(i) => i.checked_sub(1).expect("i is non-zero"),
330        };
331        debug_assert!(this_index < starts.len());
332
333        let this_offset = self.local_time_type(this_index).offset;
334        // This is a little tricky, but we need to check for ambiguous civil
335        // datetimes before possibly using the POSIX TZ string. Namely, a
336        // datetime could be ambiguous with respect to the last transition,
337        // and we should handle that according to the gap/fold determined for
338        // that transition. We cover this case in tests in tz/mod.rs for the
339        // Pacific/Honolulu time zone, whose last transition begins with a gap.
340        match self.transition_kind(this_index) {
341            shared::TzifTransitionKind::Gap if dtt < ends[this_index] => {
342                // A gap/fold can only appear when there exists a previous
343                // transition.
344                let prev_index = this_index.checked_sub(1).unwrap();
345                let prev_offset = self.local_time_type(prev_index).offset;
346                return AmbiguousOffset::Gap {
347                    before: Offset::from_seconds_unchecked(prev_offset),
348                    after: Offset::from_seconds_unchecked(this_offset),
349                };
350            }
351            shared::TzifTransitionKind::Fold if dtt < ends[this_index] => {
352                // A gap/fold can only appear when there exists a previous
353                // transition.
354                let prev_index = this_index.checked_sub(1).unwrap();
355                let prev_offset = self.local_time_type(prev_index).offset;
356                return AmbiguousOffset::Fold {
357                    before: Offset::from_seconds_unchecked(prev_offset),
358                    after: Offset::from_seconds_unchecked(this_offset),
359                };
360            }
361            _ => {}
362        }
363        // The datetime given is not ambiguous with respect to any of the
364        // transitions in the TZif data. But, if we matched at or after the
365        // last transition, then we need to use the POSIX TZ string (which
366        // could still return an ambiguous offset).
367        if this_index == starts.len() - 1 {
368            if let Some(tz) = self.posix_tz() {
369                return tz.to_ambiguous_kind(dt);
370            }
371            // This case is unspecified according to RFC 8536. It means that
372            // the given datetime exceeds all transitions *and* there is no
373            // POSIX TZ string. So this can happen in V1 files for example.
374            // But those should hopefully be essentially non-existent nowadays
375            // (2024-03). In any case, we just fall through to using the last
376            // transition, which does seem likely to be wrong ~half the time
377            // in time zones with DST. But there really isn't much else we can
378            // do I think.
379        }
380        AmbiguousOffset::Unambiguous {
381            offset: Offset::from_seconds_unchecked(this_offset),
382        }
383    }
384
385    /// Returns the timestamp of the most recent time zone transition prior
386    /// to the timestamp given. If one doesn't exist, `None` is returned.
387    pub(crate) fn previous_transition(
388        &self,
389        ts: Timestamp,
390    ) -> Option<TimeZoneTransition> {
391        assert!(!self.timestamps().is_empty(), "transitions is non-empty");
392        let mut timestamp = ts.as_second();
393        if ts.subsec_nanosecond() != 0 {
394            timestamp = timestamp.saturating_add(1);
395        }
396        let search = self.timestamps().binary_search(&timestamp);
397        let index = match search {
398            Ok(i) | Err(i) => i.checked_sub(1)?,
399        };
400        let index = if index == 0 {
401            // The first transition is a dummy that we insert, so if we land on
402            // it here, treat it as if it doesn't exist.
403            return None;
404        } else if index == self.timestamps().len() - 1 {
405            if let Some(ref posix_tz) = self.posix_tz() {
406                // Since the POSIX TZ must be consistent with the last
407                // transition, it must be the case that tzif_last <=
408                // posix_prev_trans in all cases. So the transition according
409                // to the POSIX TZ is always correct here.
410                //
411                // What if this returns `None` though? I'm not sure in which
412                // cases that could matter, and I think it might be a violation
413                // of the TZif format if it does.
414                //
415                // It can return `None`! In the case of a time zone that
416                // has eliminated DST, it might have historical time zone
417                // transitions but a POSIX time zone without DST. (For example,
418                // `America/Sao_Paulo`.) And thus, this would return `None`.
419                // So if it does, we pretend as if the POSIX time zone doesn't
420                // exist.
421                if let Some(trans) = posix_tz.previous_transition(ts) {
422                    return Some(trans);
423                }
424            }
425            index
426        } else {
427            index
428        };
429        let timestamp = self.timestamps()[index];
430        let typ = self.local_time_type(index);
431        Some(TimeZoneTransition {
432            timestamp: Timestamp::constant(timestamp, 0),
433            offset: Offset::from_seconds_unchecked(typ.offset),
434            abbrev: self.designation(typ),
435            dst: Dst::from(typ.is_dst),
436        })
437    }
438
439    /// Returns the timestamp of the soonest time zone transition after the
440    /// timestamp given. If one doesn't exist, `None` is returned.
441    pub(crate) fn next_transition(
442        &self,
443        ts: Timestamp,
444    ) -> Option<TimeZoneTransition> {
445        assert!(!self.timestamps().is_empty(), "transitions is non-empty");
446        let timestamp = ts.as_second();
447        let search = self.timestamps().binary_search(&timestamp);
448        let index = match search {
449            Ok(i) => i.checked_add(1)?,
450            Err(i) => i,
451        };
452        let index = if index == 0 {
453            // The first transition is a dummy that we insert, so if we land on
454            // it here, treat it as if it doesn't exist.
455            return None;
456        } else if index >= self.timestamps().len() {
457            if let Some(posix_tz) = self.posix_tz() {
458                // Since the POSIX TZ must be consistent with the last
459                // transition, it must be the case that next.timestamp <=
460                // posix_next_tans in all cases. So the transition according to
461                // the POSIX TZ is always correct here.
462                //
463                // What if this returns `None` though? I'm not sure in which
464                // cases that could matter, and I think it might be a violation
465                // of the TZif format if it does.
466                //
467                // In the "previous" case above, this could return `None` even
468                // when there are historical time zone transitions in the case
469                // of a time zone eliminating DST (e.g., `America/Sao_Paulo`).
470                // But unlike the previous case, if we get `None` here, then
471                // that is the real answer because there are no other known
472                // future time zone transitions.
473                //
474                // 2025-05-05: OK, this could return `None` and this is fine.
475                // It happens for time zones that had DST but then stopped
476                // it at some point in the past. The POSIX time zone has no
477                // DST and thus returns `None`. That's fine. But there was a
478                // problem: we were using the POSIX time zone even when there
479                // was a historical time zone transition after the timestamp
480                // given. That was fixed by changing the condition when we get
481                // here: it can only happen when the timestamp given comes at
482                // or after all historical time zone transitions.
483                return posix_tz.next_transition(ts);
484            }
485            self.timestamps().len() - 1
486        } else {
487            index
488        };
489        let timestamp = self.timestamps()[index];
490        let typ = self.local_time_type(index);
491        Some(TimeZoneTransition {
492            timestamp: Timestamp::constant(timestamp, 0),
493            offset: Offset::from_seconds_unchecked(typ.offset),
494            abbrev: self.designation(typ),
495            dst: Dst::from(typ.is_dst),
496        })
497    }
498
499    fn designation(&self, typ: &shared::TzifLocalTimeType) -> &str {
500        // OK because we verify that the designation range on every local
501        // time type is a valid range into `self.designations`.
502        &self.designations()[typ.designation()]
503    }
504
505    fn local_time_type(
506        &self,
507        transition_index: usize,
508    ) -> &shared::TzifLocalTimeType {
509        // OK because we require that `type_index` always points to a valid
510        // local time type.
511        &self.types()[usize::from(self.infos()[transition_index].type_index)]
512    }
513
514    fn transition_kind(
515        &self,
516        transition_index: usize,
517    ) -> shared::TzifTransitionKind {
518        self.infos()[transition_index].kind
519    }
520
521    fn posix_tz(&self) -> Option<&PosixTimeZone<ABBREV>> {
522        self.posix_tz.as_ref()
523    }
524
525    fn designations(&self) -> &str {
526        self.inner.fixed.designations.as_ref()
527    }
528
529    fn types(&self) -> &[shared::TzifLocalTimeType] {
530        self.inner.types.as_ref()
531    }
532
533    fn timestamps(&self) -> &[i64] {
534        self.inner.transitions.timestamps.as_ref()
535    }
536
537    fn civil_starts(&self) -> &[shared::TzifDateTime] {
538        self.inner.transitions.civil_starts.as_ref()
539    }
540
541    fn civil_ends(&self) -> &[shared::TzifDateTime] {
542        self.inner.transitions.civil_ends.as_ref()
543    }
544
545    fn infos(&self) -> &[shared::TzifTransitionInfo] {
546        self.inner.transitions.infos.as_ref()
547    }
548}
549
550impl<STR: AsRef<str>, ABBREV, TYPES, TIMESTAMPS, STARTS, ENDS, INFOS> Eq
551    for Tzif<STR, ABBREV, TYPES, TIMESTAMPS, STARTS, ENDS, INFOS>
552{
553}
554
555impl<STR: AsRef<str>, ABBREV, TYPES, TIMESTAMPS, STARTS, ENDS, INFOS> PartialEq
556    for Tzif<STR, ABBREV, TYPES, TIMESTAMPS, STARTS, ENDS, INFOS>
557{
558    fn eq(&self, rhs: &Self) -> bool {
559        self.inner.fixed.name.as_ref().map(|n| n.as_ref())
560            == rhs.inner.fixed.name.as_ref().map(|n| n.as_ref())
561            && self.inner.fixed.checksum == rhs.inner.fixed.checksum
562    }
563}
564
565impl shared::TzifLocalTimeType {
566    fn designation(&self) -> Range<usize> {
567        usize::from(self.designation.0)..usize::from(self.designation.1)
568    }
569}
570
571impl core::fmt::Display for shared::TzifIndicator {
572    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
573        match *self {
574            shared::TzifIndicator::LocalWall => write!(f, "local/wall"),
575            shared::TzifIndicator::LocalStandard => write!(f, "local/std"),
576            shared::TzifIndicator::UTStandard => write!(f, "ut/std"),
577        }
578    }
579}
580
581/// Does a quick check that returns true if the data might be in TZif format.
582///
583/// It is possible that this returns true even if the given data is not in TZif
584/// format. However, it is impossible for this to return false when the given
585/// data is TZif. That is, a false positive is allowed but a false negative is
586/// not.
587#[cfg(feature = "tzdb-zoneinfo")]
588pub(crate) fn is_possibly_tzif(data: &[u8]) -> bool {
589    data.starts_with(b"TZif")
590}
591
592#[cfg(all(test, feature = "alloc"))]
593mod tests {
594    use alloc::{string::ToString, vec};
595
596    #[cfg(not(miri))]
597    use crate::tz::testdata::TZIF_TEST_FILES;
598
599    use super::*;
600
601    /// This converts TZif data into a human readable format.
602    ///
603    /// This is useful for debugging (via `./scripts/jiff-debug tzif`), but we
604    /// also use it for snapshot testing to make reading the test output at
605    /// least *somewhat* comprehensible for humans. Otherwise, one needs to
606    /// read and understand Unix timestamps. That ain't going to fly.
607    ///
608    /// For this to work, we make sure everything in a `Tzif` value is
609    /// represented in some way in this output.
610    fn tzif_to_human_readable(tzif: &TzifOwned) -> String {
611        use std::io::Write;
612
613        fn datetime(dt: shared::TzifDateTime) -> DateTime {
614            DateTime::constant(
615                dt.year(),
616                dt.month(),
617                dt.day(),
618                dt.hour(),
619                dt.minute(),
620                dt.second(),
621                0,
622            )
623        }
624
625        let mut out = tabwriter::TabWriter::new(vec![])
626            .alignment(tabwriter::Alignment::Left);
627
628        writeln!(out, "TIME ZONE NAME").unwrap();
629        writeln!(out, "  {}", tzif.name().unwrap_or("UNNAMED")).unwrap();
630
631        writeln!(out, "TIME ZONE VERSION").unwrap();
632        writeln!(
633            out,
634            "  {}",
635            char::try_from(tzif.inner.fixed.version).unwrap()
636        )
637        .unwrap();
638
639        writeln!(out, "LOCAL TIME TYPES").unwrap();
640        for (i, typ) in tzif.inner.types.iter().enumerate() {
641            writeln!(
642                out,
643                "  {i:03}:\toffset={off}\t\
644                   designation={desig}\t{dst}\tindicator={ind}",
645                off = Offset::from_seconds_unchecked(typ.offset),
646                desig = tzif.designation(&typ),
647                dst = if typ.is_dst { "dst" } else { "" },
648                ind = typ.indicator,
649            )
650            .unwrap();
651        }
652        if !tzif.timestamps().is_empty() {
653            writeln!(out, "TRANSITIONS").unwrap();
654            for i in 0..tzif.timestamps().len() {
655                let timestamp = Timestamp::constant(tzif.timestamps()[i], 0);
656                let dt = Offset::UTC.to_datetime(timestamp);
657                let typ = tzif.local_time_type(i);
658                let wall =
659                    alloc::format!("{}", datetime(tzif.civil_starts()[i]));
660                let ambiguous = match tzif.transition_kind(i) {
661                    shared::TzifTransitionKind::Unambiguous => {
662                        "unambiguous".to_string()
663                    }
664                    shared::TzifTransitionKind::Gap => {
665                        let end = datetime(tzif.civil_ends()[i]);
666                        alloc::format!(" gap-until({end})")
667                    }
668                    shared::TzifTransitionKind::Fold => {
669                        let end = datetime(tzif.civil_ends()[i]);
670                        alloc::format!("fold-until({end})")
671                    }
672                };
673
674                writeln!(
675                    out,
676                    "  {i:04}:\t{dt:?}Z\tunix={ts}\twall={wall}\t\
677                       {ambiguous}\t\
678                       type={type_index}\t{off}\t\
679                       {desig}\t{dst}",
680                    ts = timestamp.as_second(),
681                    type_index = tzif.infos()[i].type_index,
682                    off = Offset::from_seconds_unchecked(typ.offset),
683                    desig = tzif.designation(typ),
684                    dst = if typ.is_dst { "dst" } else { "" },
685                )
686                .unwrap();
687            }
688        }
689        if let Some(ref posix_tz) = tzif.posix_tz {
690            writeln!(out, "POSIX TIME ZONE STRING").unwrap();
691            writeln!(out, "  {}", posix_tz).unwrap();
692        }
693        String::from_utf8(out.into_inner().unwrap()).unwrap()
694    }
695
696    /// DEBUG COMMAND
697    ///
698    /// Takes environment variable `JIFF_DEBUG_TZIF_PATH` as input, and treats
699    /// the value as a TZif file path. This test will open the file, parse it
700    /// as a TZif and then dump debug data about the file in a human readable
701    /// plain text format.
702    #[cfg(feature = "std")]
703    #[test]
704    fn debug_tzif() -> anyhow::Result<()> {
705        use anyhow::Context;
706
707        let _ = crate::logging::Logger::init();
708
709        const ENV: &str = "JIFF_DEBUG_TZIF_PATH";
710        let Some(val) = std::env::var_os(ENV) else { return Ok(()) };
711        let Ok(val) = val.into_string() else {
712            anyhow::bail!("{ENV} has invalid UTF-8")
713        };
714        let bytes =
715            std::fs::read(&val).with_context(|| alloc::format!("{val:?}"))?;
716        let tzif = Tzif::parse(Some(val.to_string()), &bytes)?;
717        std::eprint!("{}", tzif_to_human_readable(&tzif));
718        Ok(())
719    }
720
721    #[cfg(not(miri))]
722    #[test]
723    fn tzif_parse_v2plus() {
724        for tzif_test in TZIF_TEST_FILES {
725            insta::assert_snapshot!(
726                alloc::format!("{}_v2+", tzif_test.name),
727                tzif_to_human_readable(&tzif_test.parse())
728            );
729        }
730    }
731
732    #[cfg(not(miri))]
733    #[test]
734    fn tzif_parse_v1() {
735        for tzif_test in TZIF_TEST_FILES {
736            insta::assert_snapshot!(
737                alloc::format!("{}_v1", tzif_test.name),
738                tzif_to_human_readable(&tzif_test.parse_v1())
739            );
740        }
741    }
742
743    /// This tests walks the /usr/share/zoneinfo directory (if it exists) and
744    /// tries to parse every TZif formatted file it can find. We don't really
745    /// do much with it other than to ensure we don't panic or return an error.
746    /// That is, we check that we can parse each file, but not that we do so
747    /// correctly.
748    #[cfg(not(miri))]
749    #[cfg(feature = "tzdb-zoneinfo")]
750    #[cfg(target_os = "linux")]
751    #[test]
752    fn zoneinfo() {
753        const TZDIR: &str = "/usr/share/zoneinfo";
754
755        for result in walkdir::WalkDir::new(TZDIR) {
756            // Just skip if we got an error traversing the directory tree.
757            // These aren't related to our parsing, so it's some other problem
758            // (like the directory not existing).
759            let Ok(dent) = result else { continue };
760            // This test can take some time in debug mode, so skip parsing
761            // some of the less frequently used TZif files.
762            let Some(name) = dent.path().to_str() else { continue };
763            if name.contains("right/") || name.contains("posix/") {
764                continue;
765            }
766            // Again, skip if we can't read. Not my monkeys, not my circus.
767            let Ok(bytes) = std::fs::read(dent.path()) else { continue };
768            if !is_possibly_tzif(&bytes) {
769                continue;
770            }
771            let tzname = dent
772                .path()
773                .strip_prefix(TZDIR)
774                .unwrap_or_else(|_| {
775                    panic!("all paths in TZDIR have {TZDIR:?} prefix")
776                })
777                .to_str()
778                .expect("all paths to be valid UTF-8")
779                .to_string();
780            // OK at this point, we're pretty sure `bytes` should be a TZif
781            // binary file. So try to parse it and fail the test if it fails.
782            if let Err(err) = Tzif::parse(Some(tzname), &bytes) {
783                panic!("failed to parse TZif file {:?}: {err}", dent.path());
784            }
785        }
786    }
787}