jiff/shared/
tzif.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
use alloc::{string::String, vec};

use super::{
    util::{
        array_str::Abbreviation,
        error::{err, Error},
        escape::{Byte, Bytes},
        itime::{IOffset, ITimestamp},
    },
    PosixTimeZone, TzifDateTime, TzifFixed, TzifIndicator, TzifLocalTimeType,
    TzifOwned, TzifTransitionInfo, TzifTransitionKind, TzifTransitions,
    TzifTransitionsOwned,
};

// These are Jiff min and max timestamp (in seconds) values.
//
// The TZif parser will clamp timestamps to this range. It's
// not ideal, but Jiff can't handle values outside of this range
// and completely refusing to use TZif data with pathological
// timestamps in typically irrelevant transitions is bad juju.
//
// Ref: https://github.com/BurntSushi/jiff/issues/163
// Ref: https://github.com/BurntSushi/jiff/pull/164
const TIMESTAMP_MIN: i64 = -377705023201;
const TIMESTAMP_MAX: i64 = 253402207200;

// Similarly for offsets, although in this case, if we find
// an offset outside of this range, we do actually error. This
// is because it could result in true incorrect datetimes for
// actual transitions.
//
// But our supported offset range is `-25:59:59..=+25:59:59`.
// There's no real time zone with offsets even close to those
// boundaries.
//
// If there is pathological data that we should ignore, then
// we should wait for a real bug report in order to determine
// the right way to ignore/clamp it.
const OFFSET_MIN: i32 = -93599;
const OFFSET_MAX: i32 = 93599;

// When fattening TZif data, this is the year to go up to.
//
// This year was chosen because it's what the "fat" TZif data generated
// by `zic` uses.
const FATTEN_UP_TO_YEAR: i16 = 2038;

// This is a "sanity" limit on the maximum number of transitions we'll
// add to TZif data when fattening them up.
//
// This is mostly just a defense-in-depth limit to avoid weird cases
// where a pathological POSIX time zone could be defined to create
// many transitions. It's not clear that this is actually possible,
// but I felt a little uneasy doing unbounded work that isn't linearly
// proportional to the input data. So, this limit is put into place for
// reasons of "good sense."
//
// For "normal" cases, there should be at most two transitions per
// year. So this limit permits 300/2=150 years of transition data.
// (Although we won't go above 2036. See above.)
const FATTEN_MAX_TRANSITIONS: usize = 300;

impl TzifOwned {
    /// Parses the given data as a TZif formatted file.
    ///
    /// The name given is attached to the `Tzif` value returned, but is
    /// otherwise not significant.
    ///
    /// If the given data is not recognized to be valid TZif, then an error is
    /// returned.
    ///
    /// In general, callers may assume that it is safe to pass arbitrary or
    /// even untrusted data to this function and count on it not panicking
    /// or using resources that aren't limited to a small constant factor of
    /// the size of the data itself. That is, callers can reliably limit the
    /// resources used by limiting the size of the data given to this parse
    /// function.
    pub(crate) fn parse(
        name: Option<String>,
        bytes: &[u8],
    ) -> Result<TzifOwned, Error> {
        let original = bytes;
        let name = name.into();
        let (header32, rest) = Header::parse(4, bytes)
            .map_err(|e| err!("failed to parse 32-bit header: {e}"))?;
        let (mut tzif, rest) = if header32.version == 0 {
            TzifOwned::parse32(name, header32, rest)?
        } else {
            TzifOwned::parse64(name, header32, rest)?
        };
        tzif.fatten();
        // This should come after fattening, because fattening may add new
        // transitions and we want to add civil datetimes to those.
        tzif.add_civil_datetimes_to_transitions();
        tzif.verify_posix_time_zone_consistency()?;
        // Compute the checksum using the entire contents of the TZif data.
        let tzif_raw_len = (rest.as_ptr() as usize)
            .checked_sub(original.as_ptr() as usize)
            .unwrap();
        let tzif_raw_bytes = &original[..tzif_raw_len];
        tzif.fixed.checksum = super::crc32::sum(tzif_raw_bytes);

        // Shrink all of our allocs so we don't keep excess capacity around.
        tzif.fixed.designations.shrink_to_fit();
        tzif.types.shrink_to_fit();
        tzif.transitions.timestamps.shrink_to_fit();
        tzif.transitions.civil_starts.shrink_to_fit();
        tzif.transitions.civil_ends.shrink_to_fit();
        tzif.transitions.infos.shrink_to_fit();

        Ok(tzif)
    }

    fn parse32<'b>(
        name: Option<String>,
        header32: Header,
        bytes: &'b [u8],
    ) -> Result<(TzifOwned, &'b [u8]), Error> {
        let mut tzif = TzifOwned {
            fixed: TzifFixed {
                name,
                version: header32.version,
                // filled in later
                checksum: 0,
                designations: String::new(),
                posix_tz: None,
            },
            types: vec![],
            transitions: TzifTransitions {
                timestamps: vec![],
                civil_starts: vec![],
                civil_ends: vec![],
                infos: vec![],
            },
        };
        let rest = tzif.parse_transitions(&header32, bytes)?;
        let rest = tzif.parse_transition_types(&header32, rest)?;
        let rest = tzif.parse_local_time_types(&header32, rest)?;
        let rest = tzif.parse_time_zone_designations(&header32, rest)?;
        let rest = tzif.parse_leap_seconds(&header32, rest)?;
        let rest = tzif.parse_indicators(&header32, rest)?;
        Ok((tzif, rest))
    }

    fn parse64<'b>(
        name: Option<String>,
        header32: Header,
        bytes: &'b [u8],
    ) -> Result<(TzifOwned, &'b [u8]), Error> {
        let (_, rest) = try_split_at(
            "V1 TZif data block",
            bytes,
            header32.data_block_len()?,
        )?;
        let (header64, rest) = Header::parse(8, rest)
            .map_err(|e| err!("failed to parse 64-bit header: {e}"))?;
        let mut tzif = TzifOwned {
            fixed: TzifFixed {
                name,
                version: header64.version,
                // filled in later
                checksum: 0,
                designations: String::new(),
                posix_tz: None,
            },
            types: vec![],
            transitions: TzifTransitions {
                timestamps: vec![],
                civil_starts: vec![],
                civil_ends: vec![],
                infos: vec![],
            },
        };
        let rest = tzif.parse_transitions(&header64, rest)?;
        let rest = tzif.parse_transition_types(&header64, rest)?;
        let rest = tzif.parse_local_time_types(&header64, rest)?;
        let rest = tzif.parse_time_zone_designations(&header64, rest)?;
        let rest = tzif.parse_leap_seconds(&header64, rest)?;
        let rest = tzif.parse_indicators(&header64, rest)?;
        let rest = tzif.parse_footer(&header64, rest)?;
        // Note that we don't check that the TZif data is fully valid. It is
        // possible for it to contain superfluous information. For example, a
        // non-zero local time type that is never referenced by a transition.
        Ok((tzif, rest))
    }

    fn parse_transitions<'b>(
        &mut self,
        header: &Header,
        bytes: &'b [u8],
    ) -> Result<&'b [u8], Error> {
        let (bytes, rest) = try_split_at(
            "transition times data block",
            bytes,
            header.transition_times_len()?,
        )?;
        let mut it = bytes.chunks_exact(header.time_size);
        // RFC 8536 says: "If there are no transitions, local time for all
        // timestamps is specified by the TZ string in the footer if present
        // and nonempty; otherwise, it is specified by time type 0."
        //
        // RFC 8536 also says: "Local time for timestamps before the first
        // transition is specified by the first time type (time type
        // 0)."
        //
        // So if there are no transitions, pushing this dummy one will result
        // in the desired behavior even when it's the only transition.
        // Similarly, since this is the minimum timestamp value, it will
        // trigger for any times before the first transition found in the TZif
        // data.
        self.transitions.add_with_type_index(TIMESTAMP_MIN, 0);
        while let Some(chunk) = it.next() {
            let mut timestamp = if header.is_32bit() {
                i64::from(from_be_bytes_i32(chunk))
            } else {
                from_be_bytes_i64(chunk)
            };
            if !(TIMESTAMP_MIN <= timestamp && timestamp <= TIMESTAMP_MAX) {
                // We really shouldn't error here just because the Unix
                // timestamp is outside what Jiff supports. Since what Jiff
                // supports is _somewhat_ arbitrary. But Jiff's supported
                // range is good enough for all realistic purposes, so we
                // just clamp an out-of-range Unix timestamp to the Jiff
                // min or max value.
                //
                // This can't result in the sorting order being wrong, but
                // it can result in a transition that is duplicative with
                // the dummy transition we inserted above. This should be
                // fine.
                let clamped = timestamp.clamp(TIMESTAMP_MIN, TIMESTAMP_MAX);
                // only-jiff-start
                warn!(
                    "found Unix timestamp {timestamp} that is outside \
                     Jiff's supported range, clamping to {clamped}",
                );
                // only-jiff-end
                timestamp = clamped;
            }
            self.transitions.add(timestamp);
        }
        assert!(it.remainder().is_empty());
        Ok(rest)
    }

    fn parse_transition_types<'b>(
        &mut self,
        header: &Header,
        bytes: &'b [u8],
    ) -> Result<&'b [u8], Error> {
        let (bytes, rest) = try_split_at(
            "transition types data block",
            bytes,
            header.transition_types_len()?,
        )?;
        // We skip the first transition because it is our minimum dummy
        // transition.
        for (transition_index, &type_index) in (1..).zip(bytes) {
            if usize::from(type_index) >= header.tzh_typecnt {
                return Err(err!(
                    "found transition type index {type_index},
                     but there are only {} local time types",
                    header.tzh_typecnt,
                ));
            }
            self.transitions.infos[transition_index].type_index = type_index;
        }
        Ok(rest)
    }

    fn parse_local_time_types<'b>(
        &mut self,
        header: &Header,
        bytes: &'b [u8],
    ) -> Result<&'b [u8], Error> {
        let (bytes, rest) = try_split_at(
            "local time types data block",
            bytes,
            header.local_time_types_len()?,
        )?;
        let mut it = bytes.chunks_exact(6);
        while let Some(chunk) = it.next() {
            let offset = from_be_bytes_i32(&chunk[..4]);
            if !(OFFSET_MIN <= offset && offset <= OFFSET_MAX) {
                return Err(err!(
                    "found local time type with out-of-bounds offset: {offset}"
                ));
            }
            let is_dst = chunk[4] == 1;
            let designation = (chunk[5], chunk[5]);
            self.types.push(TzifLocalTimeType {
                offset,
                is_dst,
                designation,
                indicator: TzifIndicator::LocalWall,
            });
        }
        assert!(it.remainder().is_empty());
        Ok(rest)
    }

    fn parse_time_zone_designations<'b>(
        &mut self,
        header: &Header,
        bytes: &'b [u8],
    ) -> Result<&'b [u8], Error> {
        let (bytes, rest) = try_split_at(
            "time zone designations data block",
            bytes,
            header.time_zone_designations_len()?,
        )?;
        self.fixed.designations =
            String::from_utf8(bytes.to_vec()).map_err(|_| {
                err!(
                    "time zone designations are not valid UTF-8: {:?}",
                    Bytes(bytes),
                )
            })?;
        // Holy hell, this is brutal. The boundary conditions are crazy.
        for (i, typ) in self.types.iter_mut().enumerate() {
            let start = usize::from(typ.designation.0);
            let Some(suffix) = self.fixed.designations.get(start..) else {
                return Err(err!(
                    "local time type {i} has designation index of {start}, \
                     but cannot be more than {}",
                    self.fixed.designations.len(),
                ));
            };
            let Some(len) = suffix.find('\x00') else {
                return Err(err!(
                    "local time type {i} has designation index of {start}, \
                     but could not find NUL terminator after it in \
                     designations: {:?}",
                    self.fixed.designations,
                ));
            };
            let Some(end) = start.checked_add(len) else {
                return Err(err!(
                    "local time type {i} has designation index of {start}, \
                     but its length {len} is too big",
                ));
            };
            typ.designation.1 = u8::try_from(end).map_err(|_| {
                err!(
                    "local time type {i} has designation range of \
                     {start}..{end}, but end is too big",
                )
            })?;
        }
        Ok(rest)
    }

    /// This parses the leap second corrections in the TZif data.
    ///
    /// Note that we only parse and verify them. We don't actually use them.
    /// Jiff effectively ignores leap seconds.
    fn parse_leap_seconds<'b>(
        &mut self,
        header: &Header,
        bytes: &'b [u8],
    ) -> Result<&'b [u8], Error> {
        let (bytes, rest) = try_split_at(
            "leap seconds data block",
            bytes,
            header.leap_second_len()?,
        )?;
        let chunk_len = header
            .time_size
            .checked_add(4)
            .expect("time_size plus 4 fits in usize");
        let mut it = bytes.chunks_exact(chunk_len);
        while let Some(chunk) = it.next() {
            let (occur_bytes, _corr_bytes) = chunk.split_at(header.time_size);
            let occur = if header.is_32bit() {
                i64::from(from_be_bytes_i32(occur_bytes))
            } else {
                from_be_bytes_i64(occur_bytes)
            };
            if !(TIMESTAMP_MIN <= occur && occur <= TIMESTAMP_MAX) {
                // only-jiff-start
                warn!(
                    "leap second occurrence {occur} is \
                     not in Jiff's supported range"
                )
                // only-jiff-end
            }
        }
        assert!(it.remainder().is_empty());
        Ok(rest)
    }

    fn parse_indicators<'b>(
        &mut self,
        header: &Header,
        bytes: &'b [u8],
    ) -> Result<&'b [u8], Error> {
        let (std_wall_bytes, rest) = try_split_at(
            "standard/wall indicators data block",
            bytes,
            header.standard_wall_len()?,
        )?;
        let (ut_local_bytes, rest) = try_split_at(
            "UT/local indicators data block",
            rest,
            header.ut_local_len()?,
        )?;
        if std_wall_bytes.is_empty() && !ut_local_bytes.is_empty() {
            // This is a weird case, but technically possible only if all
            // UT/local indicators are 0. If any are 1, then it's an error,
            // because it would require the corresponding std/wall indicator
            // to be 1 too. Which it can't be, because there aren't any. So
            // we just check that they're all zeros.
            for (i, &byte) in ut_local_bytes.iter().enumerate() {
                if byte != 0 {
                    return Err(err!(
                        "found UT/local indicator '{byte}' for local time \
                         type {i}, but it must be 0 since all std/wall \
                         indicators are 0",
                    ));
                }
            }
        } else if !std_wall_bytes.is_empty() && ut_local_bytes.is_empty() {
            for (i, &byte) in std_wall_bytes.iter().enumerate() {
                // Indexing is OK because Header guarantees that the number of
                // indicators is 0 or equal to the number of types.
                self.types[i].indicator = if byte == 0 {
                    TzifIndicator::LocalWall
                } else if byte == 1 {
                    TzifIndicator::LocalStandard
                } else {
                    return Err(err!(
                        "found invalid std/wall indicator '{byte}' for \
                         local time type {i}, it must be 0 or 1",
                    ));
                };
            }
        } else if !std_wall_bytes.is_empty() && !ut_local_bytes.is_empty() {
            assert_eq!(std_wall_bytes.len(), ut_local_bytes.len());
            let it = std_wall_bytes.iter().zip(ut_local_bytes);
            for (i, (&stdwall, &utlocal)) in it.enumerate() {
                // Indexing is OK because Header guarantees that the number of
                // indicators is 0 or equal to the number of types.
                self.types[i].indicator = match (stdwall, utlocal) {
                    (0, 0) => TzifIndicator::LocalWall,
                    (1, 0) => TzifIndicator::LocalStandard,
                    (1, 1) => TzifIndicator::UTStandard,
                    (0, 1) => {
                        return Err(err!(
                            "found illegal ut-wall combination for \
                             local time type {i}, only local-wall, \
                             local-standard and ut-standard are allowed",
                        ))
                    }
                    _ => {
                        return Err(err!(
                            "found illegal std/wall or ut/local value for \
                             local time type {i}, each must be 0 or 1",
                        ))
                    }
                };
            }
        } else {
            // If they're both empty then we don't need to do anything. Every
            // local time type record already has the correct default for this
            // case set.
            debug_assert!(std_wall_bytes.is_empty());
            debug_assert!(ut_local_bytes.is_empty());
        }
        Ok(rest)
    }

    fn parse_footer<'b>(
        &mut self,
        _header: &Header,
        bytes: &'b [u8],
    ) -> Result<&'b [u8], Error> {
        if bytes.is_empty() {
            return Err(err!(
                "invalid V2+ TZif footer, expected \\n, \
                 but found unexpected end of data",
            ));
        }
        if bytes[0] != b'\n' {
            return Err(err!(
                "invalid V2+ TZif footer, expected {:?}, but found {:?}",
                Byte(b'\n'),
                Byte(bytes[0]),
            ));
        }
        let bytes = &bytes[1..];
        // Only scan up to 1KB for a NUL terminator in case we somehow got
        // passed a huge block of bytes.
        let toscan = &bytes[..bytes.len().min(1024)];
        let Some(nlat) = toscan.iter().position(|&b| b == b'\n') else {
            return Err(err!(
                "invalid V2 TZif footer, could not find {:?} \
                 terminator in: {:?}",
                Byte(b'\n'),
                Bytes(toscan),
            ));
        };
        let (bytes, rest) = bytes.split_at(nlat);
        if !bytes.is_empty() {
            // We could in theory limit TZ strings to their strict POSIX
            // definition here for TZif V2, but I don't think there is any
            // harm in allowing the extensions in V2 formatted TZif data. Note
            // that the GNU tooling allow it via the `TZ` environment variable
            // even though POSIX doesn't specify it. This all seems okay to me
            // because the V3+ extension is a strict superset of functionality.
            let posix_tz =
                PosixTimeZone::parse(bytes).map_err(|e| err!("{e}"))?;
            self.fixed.posix_tz = Some(posix_tz);
        }
        Ok(&rest[1..])
    }

    /// Validates that the POSIX TZ string we parsed (if one exists) is
    /// consistent with the last transition in this time zone. This is
    /// required by RFC 8536.
    ///
    /// RFC 8536 says, "If the string is nonempty and one or more
    /// transitions appear in the version 2+ data, the string MUST be
    /// consistent with the last version 2+ transition."
    fn verify_posix_time_zone_consistency(&self) -> Result<(), Error> {
        // We need to be a little careful, since we always have at least one
        // transition (accounting for the dummy `Timestamp::MIN` transition).
        // So if we only have 1 transition and a POSIX TZ string, then we
        // should not validate it since it's equivalent to the case of 0
        // transitions and a POSIX TZ string.
        if self.transitions.timestamps.len() <= 1 {
            return Ok(());
        }
        let Some(ref tz) = self.fixed.posix_tz else {
            return Ok(());
        };
        let last = self
            .transitions
            .timestamps
            .last()
            .expect("last transition timestamp");
        let type_index = self
            .transitions
            .infos
            .last()
            .expect("last transition info")
            .type_index;
        let typ = &self.types[usize::from(type_index)];
        let (ioff, abbrev, is_dst) =
            tz.to_offset_info(ITimestamp::from_second(*last));
        if ioff.second != typ.offset {
            return Err(err!(
                "expected last transition to have DST offset \
                 of {expected_offset}, but got {got_offset} \
                 according to POSIX TZ string {tz}",
                expected_offset = typ.offset,
                got_offset = ioff.second,
                tz = tz,
            ));
        }
        if is_dst != typ.is_dst {
            return Err(err!(
                "expected last transition to have is_dst={expected_dst}, \
                 but got is_dst={got_dst} according to POSIX TZ \
                 string {tz}",
                expected_dst = typ.is_dst,
                got_dst = is_dst,
                tz = tz,
            ));
        }
        if abbrev != self.designation(&typ) {
            return Err(err!(
                "expected last transition to have \
                 designation={expected_abbrev}, \
                 but got designation={got_abbrev} according to POSIX TZ \
                 string {tz}",
                expected_abbrev = self.designation(&typ),
                got_abbrev = abbrev,
                tz = tz,
            ));
        }
        Ok(())
    }

    /// Add civil datetimes to our transitions.
    ///
    /// This isn't strictly necessary, but it speeds up time zone lookups when
    /// the input is a civil datetime. It lets us do comparisons directly on
    /// the civil datetime as given, instead of needing to convert the civil
    /// datetime given to a timestamp first. (Even if we didn't do this, I
    /// believe we'd still need at least one additional timestamp that is
    /// offset, because TZ lookups for a civil datetime are done in local time,
    /// and the timestamps in TZif data are, of course, all in UTC.)
    fn add_civil_datetimes_to_transitions(&mut self) {
        fn to_datetime(timestamp: i64, offset: i32) -> TzifDateTime {
            use crate::shared::util::itime::{IOffset, ITimestamp};
            let its = ITimestamp { second: timestamp, nanosecond: 0 };
            let ioff = IOffset { second: offset };
            let dt = its.to_datetime(ioff);
            TzifDateTime::new(
                dt.date.year,
                dt.date.month,
                dt.date.day,
                dt.time.hour,
                dt.time.minute,
                dt.time.second,
            )
        }

        let trans = &mut self.transitions;
        for i in 0..trans.timestamps.len() {
            let timestamp = trans.timestamps[i];
            let offset = {
                let type_index = trans.infos[i].type_index;
                self.types[usize::from(type_index)].offset
            };
            let prev_offset = {
                let type_index = trans.infos[i.saturating_sub(1)].type_index;
                self.types[usize::from(type_index)].offset
            };

            if prev_offset == offset {
                // Equivalent offsets means there can never be any ambiguity.
                let start = to_datetime(timestamp, prev_offset);
                trans.infos[i].kind = TzifTransitionKind::Unambiguous;
                trans.civil_starts[i] = start;
            } else if prev_offset < offset {
                // When the offset of the previous transition is less, that
                // means there is some non-zero amount of time that is
                // "skipped" when moving to the next transition. Thus, we have
                // a gap. The start of the gap is the offset which gets us the
                // earliest time, i.e., the smaller of the two offsets.
                trans.infos[i].kind = TzifTransitionKind::Gap;
                trans.civil_starts[i] = to_datetime(timestamp, prev_offset);
                trans.civil_ends[i] = to_datetime(timestamp, offset);
            } else {
                // When the offset of the previous transition is greater, that
                // means there is some non-zero amount of time that will be
                // replayed on a wall clock in this time zone. Thus, we have
                // a fold. The start of the gold is the offset which gets us
                // the earliest time, i.e., the smaller of the two offsets.
                assert!(prev_offset > offset);
                trans.infos[i].kind = TzifTransitionKind::Fold;
                trans.civil_starts[i] = to_datetime(timestamp, offset);
                trans.civil_ends[i] = to_datetime(timestamp, prev_offset);
            }
        }
    }

    /// Fatten up this TZif data with additional transitions.
    ///
    /// These additional transitions often make time zone lookups faster, and
    /// they smooth out the performance difference between using "slim" and
    /// "fat" tzdbs.
    fn fatten(&mut self) {
        // Note that this is a crate feature for *both* `jiff` and
        // `jiff-static`.
        if !cfg!(feature = "tz-fat") {
            return;
        }
        let Some(posix_tz) = self.fixed.posix_tz.clone() else { return };
        let last =
            self.transitions.timestamps.last().expect("last transition");
        let mut i = 0;
        let mut prev = ITimestamp::from_second(*last);
        loop {
            if i > FATTEN_MAX_TRANSITIONS {
                // only-jiff-start
                warn!(
                    "fattening TZif data for `{name:?}` somehow generated \
                     more than {max} transitions, so giving up to avoid \
                     doing too much work",
                    name = self.fixed.name,
                    max = FATTEN_MAX_TRANSITIONS,
                );
                // only-jiff-end
                return;
            }
            i += 1;
            prev = match self.add_transition(&posix_tz, prev) {
                None => break,
                Some(next) => next,
            };
        }
    }

    /// If there's a transition strictly after the given timestamp for the
    /// given POSIX time zone, then add it to this TZif data.
    fn add_transition(
        &mut self,
        posix_tz: &PosixTimeZone<Abbreviation>,
        prev: ITimestamp,
    ) -> Option<ITimestamp> {
        let (its, ioff, abbrev, is_dst) = posix_tz.next_transition(prev)?;
        if its.to_datetime(IOffset::UTC).date.year >= FATTEN_UP_TO_YEAR {
            return None;
        }
        let type_index =
            self.find_or_create_local_time_type(ioff, abbrev, is_dst)?;
        self.transitions.add_with_type_index(its.second, type_index);
        Some(its)
    }

    /// Look for a local time type matching the data given.
    ///
    /// If one could not be found, then one is created and its index is
    /// returned.
    ///
    /// If one could not be found and one could not be created (e.g., the index
    /// would overflow `u8`), then `None` is returned.
    fn find_or_create_local_time_type(
        &mut self,
        offset: IOffset,
        abbrev: &str,
        is_dst: bool,
    ) -> Option<u8> {
        for (i, typ) in self.types.iter().enumerate() {
            if offset.second == typ.offset
                && abbrev == self.designation(typ)
                && is_dst == typ.is_dst
            {
                return u8::try_from(i).ok();
            }
        }
        let i = u8::try_from(self.types.len()).ok()?;
        let designation = self.find_or_create_designation(abbrev)?;
        self.types.push(TzifLocalTimeType {
            offset: offset.second,
            is_dst,
            designation,
            // Not really clear if this is correct, but Jiff
            // ignores this anyway, so ¯\_(ツ)_/¯.
            indicator: TzifIndicator::LocalWall,
        });
        Some(i)
    }

    /// Look for a designation (i.e., time zone abbreviation) matching the data
    /// given, and return its range into `self.fixed.designations`.
    ///
    /// If one could not be found, then one is created and its range is
    /// returned.
    ///
    /// If one could not be found and one could not be created (e.g., the range
    /// would overflow `u8`), then `None` is returned.
    fn find_or_create_designation(
        &mut self,
        needle: &str,
    ) -> Option<(u8, u8)> {
        let mut start = 0;
        while let Some(offset) = self.fixed.designations[start..].find('\0') {
            let end = start + offset;
            let abbrev = &self.fixed.designations[start..end];
            if needle == abbrev {
                return Some((start.try_into().ok()?, end.try_into().ok()?));
            }
            start = end + 1;
        }
        self.fixed.designations.push_str(needle);
        self.fixed.designations.push('\0');
        let end = start + needle.len();
        Some((start.try_into().ok()?, end.try_into().ok()?))
    }

    fn designation(&self, typ: &TzifLocalTimeType) -> &str {
        let range =
            usize::from(typ.designation.0)..usize::from(typ.designation.1);
        // OK because we verify that the designation range on every local
        // time type is a valid range into `self.designations`.
        &self.fixed.designations[range]
    }
}

impl TzifTransitionsOwned {
    /// Add a single transition with the given timestamp.
    ///
    /// This also fills in the other columns (civil starts, civil ends and
    /// infos) with sensible default values. It is expected that callers will
    /// later fill them in.
    fn add(&mut self, timestamp: i64) {
        self.add_with_type_index(timestamp, 0);
    }

    /// Like `TzifTransitionsOwned::add`, but let's the caller provide a type
    /// index if it is known.
    fn add_with_type_index(&mut self, timestamp: i64, type_index: u8) {
        self.timestamps.push(timestamp);
        self.civil_starts.push(TzifDateTime::ZERO);
        self.civil_ends.push(TzifDateTime::ZERO);
        self.infos.push(TzifTransitionInfo {
            type_index,
            kind: TzifTransitionKind::Unambiguous,
        });
    }
}

/// The header for a TZif formatted file.
///
/// V2+ TZif format have two headers: one for V1 data, and then a second
/// following the V1 data block that describes another data block which uses
/// 64-bit timestamps. The two headers both have the same format and both
/// use 32-bit big-endian encoded integers.
#[derive(Debug)]
struct Header {
    /// The size of the timestamps encoded in the data block.
    ///
    /// This is guaranteed to be either 4 (for V1) or 8 (for the 64-bit header
    /// block in V2+).
    time_size: usize,
    /// The file format version.
    ///
    /// Note that this is either a NUL byte (for version 1), or an ASCII byte
    /// corresponding to the version number. That is, `0x32` for `2`, `0x33`
    /// for `3` or `0x34` for `4`. Note also that just because zoneinfo might
    /// have been recently generated does not mean it uses the latest format
    /// version. It seems like newer versions are only compiled by `zic` when
    /// they are needed. For example, `America/New_York` on my system (as of
    /// `2024-03-25`) has version `0x32`, but `Asia/Jerusalem` has version
    /// `0x33`.
    version: u8,
    /// Number of UT/local indicators stored in the file.
    ///
    /// This is checked to be either equal to `0` or equal to `tzh_typecnt`.
    tzh_ttisutcnt: usize,
    /// The number of standard/wall indicators stored in the file.
    ///
    /// This is checked to be either equal to `0` or equal to `tzh_typecnt`.
    tzh_ttisstdcnt: usize,
    /// The number of leap seconds for which data entries are stored in the
    /// file.
    tzh_leapcnt: usize,
    /// The number of transition times for which data entries are stored in
    /// the file.
    tzh_timecnt: usize,
    /// The number of local time types for which data entries are stored in the
    /// file.
    ///
    /// This is checked to be at least `1`.
    tzh_typecnt: usize,
    /// The number of bytes of time zone abbreviation strings stored in the
    /// file.
    ///
    /// This is checked to be at least `1`.
    tzh_charcnt: usize,
}

impl Header {
    /// Parse the header record from the given bytes.
    ///
    /// Upon success, return the header and all bytes after the header.
    ///
    /// The given `time_size` must be 4 or 8, corresponding to either the
    /// V1 header block or the V2+ header block, respectively.
    fn parse(
        time_size: usize,
        bytes: &[u8],
    ) -> Result<(Header, &[u8]), Error> {
        assert!(time_size == 4 || time_size == 8, "time size must be 4 or 8");
        if bytes.len() < 44 {
            return Err(err!("invalid header: too short"));
        }
        let (magic, rest) = bytes.split_at(4);
        if magic != b"TZif" {
            return Err(err!("invalid header: magic bytes mismatch"));
        }
        let (version, rest) = rest.split_at(1);
        let (_reserved, rest) = rest.split_at(15);

        let (tzh_ttisutcnt_bytes, rest) = rest.split_at(4);
        let (tzh_ttisstdcnt_bytes, rest) = rest.split_at(4);
        let (tzh_leapcnt_bytes, rest) = rest.split_at(4);
        let (tzh_timecnt_bytes, rest) = rest.split_at(4);
        let (tzh_typecnt_bytes, rest) = rest.split_at(4);
        let (tzh_charcnt_bytes, rest) = rest.split_at(4);

        let tzh_ttisutcnt = from_be_bytes_u32_to_usize(tzh_ttisutcnt_bytes)
            .map_err(|e| err!("failed to parse tzh_ttisutcnt: {e}"))?;
        let tzh_ttisstdcnt = from_be_bytes_u32_to_usize(tzh_ttisstdcnt_bytes)
            .map_err(|e| err!("failed to parse tzh_ttisstdcnt: {e}"))?;
        let tzh_leapcnt = from_be_bytes_u32_to_usize(tzh_leapcnt_bytes)
            .map_err(|e| err!("failed to parse tzh_leapcnt: {e}"))?;
        let tzh_timecnt = from_be_bytes_u32_to_usize(tzh_timecnt_bytes)
            .map_err(|e| err!("failed to parse tzh_timecnt: {e}"))?;
        let tzh_typecnt = from_be_bytes_u32_to_usize(tzh_typecnt_bytes)
            .map_err(|e| err!("failed to parse tzh_typecnt: {e}"))?;
        let tzh_charcnt = from_be_bytes_u32_to_usize(tzh_charcnt_bytes)
            .map_err(|e| err!("failed to parse tzh_charcnt: {e}"))?;

        if tzh_ttisutcnt != 0 && tzh_ttisutcnt != tzh_typecnt {
            return Err(err!(
                "expected tzh_ttisutcnt={tzh_ttisutcnt} to be zero \
                 or equal to tzh_typecnt={tzh_typecnt}",
            ));
        }
        if tzh_ttisstdcnt != 0 && tzh_ttisstdcnt != tzh_typecnt {
            return Err(err!(
                "expected tzh_ttisstdcnt={tzh_ttisstdcnt} to be zero \
                 or equal to tzh_typecnt={tzh_typecnt}",
            ));
        }
        if tzh_typecnt < 1 {
            return Err(err!(
                "expected tzh_typecnt={tzh_typecnt} to be at least 1",
            ));
        }
        if tzh_charcnt < 1 {
            return Err(err!(
                "expected tzh_charcnt={tzh_charcnt} to be at least 1",
            ));
        }

        let header = Header {
            time_size,
            version: version[0],
            tzh_ttisutcnt,
            tzh_ttisstdcnt,
            tzh_leapcnt,
            tzh_timecnt,
            tzh_typecnt,
            tzh_charcnt,
        };
        Ok((header, rest))
    }

    /// Returns true if this header is for a 32-bit data block.
    ///
    /// When false, it is guaranteed that this header is for a 64-bit data
    /// block.
    fn is_32bit(&self) -> bool {
        self.time_size == 4
    }

    /// Returns the size of the data block, in bytes, for this header.
    ///
    /// This returns an error if the arithmetic required to compute the
    /// length would overflow.
    ///
    /// This is useful for, e.g., skipping over the 32-bit V1 data block in
    /// V2+ TZif formatted files.
    fn data_block_len(&self) -> Result<usize, Error> {
        let a = self.transition_times_len()?;
        let b = self.transition_types_len()?;
        let c = self.local_time_types_len()?;
        let d = self.time_zone_designations_len()?;
        let e = self.leap_second_len()?;
        let f = self.standard_wall_len()?;
        let g = self.ut_local_len()?;
        a.checked_add(b)
            .and_then(|z| z.checked_add(c))
            .and_then(|z| z.checked_add(d))
            .and_then(|z| z.checked_add(e))
            .and_then(|z| z.checked_add(f))
            .and_then(|z| z.checked_add(g))
            .ok_or_else(|| {
                err!(
                    "length of data block in V{} tzfile is too big",
                    self.version
                )
            })
    }

    fn transition_times_len(&self) -> Result<usize, Error> {
        self.tzh_timecnt.checked_mul(self.time_size).ok_or_else(|| {
            err!("tzh_timecnt value {} is too big", self.tzh_timecnt)
        })
    }

    fn transition_types_len(&self) -> Result<usize, Error> {
        Ok(self.tzh_timecnt)
    }

    fn local_time_types_len(&self) -> Result<usize, Error> {
        self.tzh_typecnt.checked_mul(6).ok_or_else(|| {
            err!("tzh_typecnt value {} is too big", self.tzh_typecnt)
        })
    }

    fn time_zone_designations_len(&self) -> Result<usize, Error> {
        Ok(self.tzh_charcnt)
    }

    fn leap_second_len(&self) -> Result<usize, Error> {
        let record_len = self
            .time_size
            .checked_add(4)
            .expect("4-or-8 plus 4 always fits in usize");
        self.tzh_leapcnt.checked_mul(record_len).ok_or_else(|| {
            err!("tzh_leapcnt value {} is too big", self.tzh_leapcnt)
        })
    }

    fn standard_wall_len(&self) -> Result<usize, Error> {
        Ok(self.tzh_ttisstdcnt)
    }

    fn ut_local_len(&self) -> Result<usize, Error> {
        Ok(self.tzh_ttisutcnt)
    }
}

/// Splits the given slice of bytes at the index given.
///
/// If the index is out of range (greater than `bytes.len()`) then an error is
/// returned. The error message will include the `what` string given, which is
/// meant to describe the thing being split.
fn try_split_at<'b>(
    what: &'static str,
    bytes: &'b [u8],
    at: usize,
) -> Result<(&'b [u8], &'b [u8]), Error> {
    if at > bytes.len() {
        Err(err!(
            "expected at least {at} bytes for {what}, \
             but found only {} bytes",
            bytes.len(),
        ))
    } else {
        Ok(bytes.split_at(at))
    }
}

/// Interprets the given slice as an unsigned 32-bit big endian integer,
/// attempts to convert it to a `usize` and returns it.
///
/// # Panics
///
/// When `bytes.len() != 4`.
///
/// # Errors
///
/// This errors if the `u32` parsed from the given bytes cannot fit in a
/// `usize`.
fn from_be_bytes_u32_to_usize(bytes: &[u8]) -> Result<usize, Error> {
    let n = from_be_bytes_u32(bytes);
    usize::try_from(n).map_err(|_| {
        err!(
            "failed to parse integer {n} (too big, max allowed is {}",
            usize::MAX
        )
    })
}

/// Interprets the given slice as an unsigned 32-bit big endian integer and
/// returns it.
///
/// # Panics
///
/// When `bytes.len() != 4`.
fn from_be_bytes_u32(bytes: &[u8]) -> u32 {
    u32::from_be_bytes(bytes.try_into().unwrap())
}

/// Interprets the given slice as a signed 32-bit big endian integer and
/// returns it.
///
/// # Panics
///
/// When `bytes.len() != 4`.
fn from_be_bytes_i32(bytes: &[u8]) -> i32 {
    i32::from_be_bytes(bytes.try_into().unwrap())
}

/// Interprets the given slice as a signed 64-bit big endian integer and
/// returns it.
///
/// # Panics
///
/// When `bytes.len() != 8`.
fn from_be_bytes_i64(bytes: &[u8]) -> i64 {
    i64::from_be_bytes(bytes.try_into().unwrap())
}