jiff/tz/
concatenated.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
use alloc::{
    string::{String, ToString},
    vec::Vec,
};

use crate::{
    error::{err, Error, ErrorContext},
    tz::TimeZone,
    util::{array_str::ArrayStr, escape, utf8},
};

/// An abstraction for reading data from Android's concatenated TZif data file.
///
/// This abstraction is designed in a way that the data is reads from is
/// largely untrusted. This means that, no matter what sequence of bytes is
/// given, this should never panic (or else there is a bug). Moreover, there is
/// some guarding against disproportionate allocation. While big allocations
/// can still happen, they require a proportionally large data file. (Thus,
/// callers can guard against this by considering the size of the data.) What
/// this implementation prevents against is accidentally OOMing or panicking as
/// a result of naively doing `Vec::with_capacity(rdr.decode_integer())`.
///
/// This is also designed to work in alloc-only contexts mostly out of "good
/// sense." Technically we don't (currently) use this outside of `std`, since
/// it's only used for reading tzdb on Android from the file system. But we do
/// things this way in case we end up wanting to use it for something else.
/// If we needed this for no-alloc environments, then that's a much bigger
/// change, if only because it would require making the TZif parser no-alloc
/// compatible, and it's not quite clear what the best way to do that is. We
/// achieve the alloc-only API be introducing a trait that abstracts over a
/// `File` for random access to bytes.
#[derive(Debug)]
pub(crate) struct ConcatenatedTzif<R> {
    rdr: R,
    header: Header,
}

impl<R: Read> ConcatenatedTzif<R> {
    /// Open the concatenated TZif file using the reader given.
    ///
    /// This reads the header and will return an error if the header is
    /// invalid.
    pub(crate) fn open(rdr: R) -> Result<ConcatenatedTzif<R>, Error> {
        let header = Header::read(&rdr)?;
        Ok(ConcatenatedTzif { rdr, header })
    }

    /// Returns the version of this `tzdata` database.
    pub(crate) fn version(&self) -> ArrayStr<5> {
        self.header.version
    }

    /// Returns a `TimeZone` extracted from this concatenated TZif data.
    ///
    /// This is only successful if an index entry with the corresponding
    /// IANA time zone identifier could be found.
    ///
    /// Callers must provide two scratch buffers that are used for temporary
    /// allocation internally. Callers can create a new buffer for each call,
    /// but it's likely faster to reuse them if possible.
    ///
    /// If a `TimeZone` is returned, it is guaranteed to have a present IANA
    /// name (accessible via `TimeZone::iana_name`).
    pub(crate) fn get(
        &self,
        query: &str,
        scratch1: &mut Vec<u8>,
        scratch2: &mut Vec<u8>,
    ) -> Result<Option<TimeZone>, Error> {
        scratch1.clear();
        alloc(scratch1, self.header.index_len())?;
        self.rdr
            .read_exact_at(scratch1, self.header.index_offset)
            .context("failed to read index block")?;

        let mut index = &**scratch1;
        while !index.is_empty() {
            let entry = IndexEntry::new(&index[..IndexEntry::LEN]);
            index = &index[IndexEntry::LEN..];
            let ordering = utf8::cmp_ignore_ascii_case_bytes(
                entry.name_bytes(),
                query.as_bytes(),
            );
            if ordering.is_ne() {
                continue;
            }

            // OK because `entry.name_bytes()` is equal to `query`,
            // ignoring ASCII case. The only way this can be true is is
            // `entry.name_bytes()` is itself valid UTF-8.
            let name = entry.name().unwrap();
            scratch2.clear();
            alloc(scratch2, entry.len())?;
            let start = self.header.data_offset.saturating_add(entry.start());
            self.rdr
                .read_exact_at(scratch2, start)
                .context("failed to read TZif data block")?;
            return TimeZone::tzif(name, scratch2).map(Some);
        }
        Ok(None)
    }

    /// Returns a list of all IANA time zone identifiers in this concatenated
    /// TZif data.
    ///
    /// Callers must provide a scratch buffer that is used for temporary
    /// allocation internally. Callers can create a new buffer for each call,
    /// but it's likely faster to reuse them if possible.
    pub(crate) fn available(
        &self,
        scratch: &mut Vec<u8>,
    ) -> Result<Vec<String>, Error> {
        scratch.clear();
        alloc(scratch, self.header.index_len())?;
        self.rdr
            .read_exact_at(scratch, self.header.index_offset)
            .context("failed to read index block")?;

        let names_len = self.header.index_len() / IndexEntry::LEN;
        // Why are we careless with this alloc? Well, its size is proportional
        // to the actual amount of data in the file. So the only way to get a
        // big alloc is to create a huge file. This seems... fine... I guess.
        // Where as the `alloc` above is done on the basis of an arbitrary
        // 32-bit integer.
        let mut names = Vec::with_capacity(names_len);
        let mut index = &**scratch;
        while !index.is_empty() {
            let entry = IndexEntry::new(&index[..IndexEntry::LEN]);
            index = &index[IndexEntry::LEN..];
            names.push(entry.name()?.to_string());
        }
        Ok(names)
    }
}

/// The header of Android concatenated TZif data.
///
/// The header has the version and some offsets indicating the location of
/// the index entry (a list of IANA time zone identifiers and offsets into
/// the data block) and the actual TZif data.
#[derive(Debug)]
struct Header {
    version: ArrayStr<5>,
    index_offset: u64,
    data_offset: u64,
}

impl Header {
    /// Reads the header from Android's concatenated TZif concatenated data
    /// file.
    ///
    /// Basically, this gives us the version and some offsets for where to find
    /// data.
    fn read<R: Read + ?Sized>(rdr: &R) -> Result<Header, Error> {
        // 12 bytes plus 3 4-byte big endian integers.
        let mut buf = [0; 12 + 3 * 4];
        rdr.read_exact_at(&mut buf, 0)
            .context("failed to read concatenated TZif header")?;
        if &buf[..6] != b"tzdata" {
            return Err(err!(
                "expected first 6 bytes of concatenated TZif header \
                 to be `tzdata`, but found `{found}`",
                found = escape::Bytes(&buf[..6]),
            ));
        }
        if buf[11] != 0 {
            return Err(err!(
                "expected last byte of concatenated TZif header \
                 to be NUL, but found `{found}`",
                found = escape::Bytes(&buf[..12]),
            ));
        }

        let version = {
            let version = core::str::from_utf8(&buf[6..11]).map_err(|_| {
                err!(
                    "expected version in concatenated TZif header to \
                     be valid UTF-8, but found `{found}`",
                    found = escape::Bytes(&buf[6..11]),
                )
            })?;
            // OK because `version` is exactly 5 bytes, by construction.
            ArrayStr::new(version).unwrap()
        };
        // OK because the sub-slice is sized to exactly 4 bytes.
        let index_offset = u64::from(read_be32(&buf[12..16]));
        // OK because the sub-slice is sized to exactly 4 bytes.
        let data_offset = u64::from(read_be32(&buf[16..20]));
        if index_offset > data_offset {
            return Err(err!(
                "invalid index ({index_offset}) and data ({data_offset}) \
                 offsets, expected index offset to be less than or equal \
                 to data offset",
            ));
        }
        // we don't read 20..24 since we don't care about zonetab (yet)
        let header = Header { version, index_offset, data_offset };
        if header.index_len() % IndexEntry::LEN != 0 {
            return Err(err!(
                "length of index block is not a multiple {len}",
                len = IndexEntry::LEN,
            ));
        }
        Ok(header)
    }

    /// Returns the length of the index section of the concatenated tzdb.
    ///
    /// Beware of using this to create allocations. In theory, this should be
    /// trusted data, but the length can be any 32-bit integer. If it's used to
    /// create an allocation, it could potentially be up to 4GB.
    fn index_len(&self) -> usize {
        // OK because `Header` parsing returns an error if this overflows.
        let len = self.data_offset.checked_sub(self.index_offset).unwrap();
        // N.B. Overflow only occurs here on 16-bit (or smaller) platforms,
        // which at the time of writing, is not supported by Jiff. Instead,
        // a `usize::MAX` will trigger an allocation error.
        usize::try_from(len).unwrap_or(usize::MAX)
    }
}

/// A view into a single index entry in the index block of concatenated TZif
/// data.
///
/// If we had safe transmute, it would be much nicer to define this as
///
/// ```text
/// #[derive(Clone, Copy)]
/// #[repr(transparent, align(1))]
/// struct IndexEntry {
///     name: [u8; 40],
///     start: u32,
///     len: u32,
///     _raw_utc_offset: u32, // we don't use this here
/// }
/// ```
///
/// And probably implement a trait asserting that this is plain old data (or
/// derive it safely). And then we could cast `&[u8]` to `&[IndexEntry]`
/// safely and access the individual fields as is. We could do this today,
/// but not in safe code. And since this isn't performance critical, it's just
/// not worth flagging this code as potentially containing undefined behavior.
#[derive(Clone, Copy)]
struct IndexEntry<'a>(&'a [u8]);

impl<'a> IndexEntry<'a> {
    /// The length of an index entry. It's fixed size. 40 bytes for the IANA
    /// time zone identifier. 4 bytes for each of 3 big-endian integers. The
    /// first is the start of the corresponding TZif data within the data
    /// block. The second is the length of said TZif data. And the third is
    /// the "raw UTC offset" of the time zone. (I'm unclear on the semantics
    /// of this third, since some time zones have more than one because of
    /// DST. And of course, it can change over time. Since I don't know what
    /// Android uses this for, I'm not sure how I'm supposed to interpret it.)
    const LEN: usize = 40 + 3 * 4;

    /// Creates a new view into an entry in the concatenated TZif index.
    ///
    /// # Panics
    ///
    /// When `slice` does not have the expected length (`IndexEntry::LEN`).
    fn new(slice: &'a [u8]) -> IndexEntry<'a> {
        assert_eq!(slice.len(), IndexEntry::LEN, "invalid index entry length");
        IndexEntry(slice)
    }

    /// Like `name_bytes`, but as a `&str`.
    ///
    /// This returns an error if the name isn't valid UTF-8.
    fn name(&self) -> Result<&str, Error> {
        core::str::from_utf8(self.name_bytes()).map_err(|_| {
            err!(
                "IANA time zone identifier `{name}` is not valid UTF-8",
                name = escape::Bytes(self.name_bytes()),
            )
        })
    }

    /// Returns the IANA time zone identifier as a byte slice.
    ///
    /// In theory, an empty slice could be returned. But if that happens,
    /// then there is probably a bug in this code somewhere, the format
    /// changed or the source data is corrupt somehow.
    fn name_bytes(&self) -> &'a [u8] {
        let mut block = &self.0[..40];
        while block.last().copied() == Some(0) {
            block = &block[..block.len() - 1];
        }
        block
    }

    /// Returns the starting offset (relative to the beginning of the TZif
    /// data block) of the corresponding TZif data.
    fn start(&self) -> u64 {
        u64::from(read_be32(&self.0[40..44]))
    }

    /// Returns the length of the TZif data block.
    ///
    /// Beware of using this to create allocations. In theory, this should be
    /// trusted data, but the length can be any 32-bit integer. If it's used to
    /// create an allocation, it could potentially be up to 4GB.
    fn len(&self) -> usize {
        // N.B. Overflow only occurs here on 16-bit (or smaller) platforms,
        // which at the time of writing, is not supported by Jiff. Instead,
        // a `usize::MAX` will trigger an allocation error.
        usize::try_from(read_be32(&self.0[44..48])).unwrap_or(usize::MAX)
    }
}

impl<'a> core::fmt::Debug for IndexEntry<'a> {
    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
        f.debug_struct("IndexEntry")
            .field("name", &escape::Bytes(self.name_bytes()))
            .field("start", &self.start())
            .field("len", &self.len())
            .finish()
    }
}

/// A crate-internal trait defining the source of concatenated TZif data.
///
/// Basically, this just provides a way to read a fixed amount of data at a
/// particular offset. This is obviously trivial to implement on `&[u8]` (and
/// indeed, we do so for testing), but we use it to abstract over platform
/// differences when reading from a `File`.
///
/// The intent is that on Unix, this will use `pread`, which avoids a file
/// seek followed by a `read` call.
pub(crate) trait Read {
    fn read_exact_at(&self, buf: &mut [u8], offset: u64) -> Result<(), Error>;
}

impl<'a, R: Read + ?Sized> Read for &'a R {
    fn read_exact_at(&self, buf: &mut [u8], offset: u64) -> Result<(), Error> {
        (**self).read_exact_at(buf, offset)
    }
}

/// Reads a 32-bit big endian encoded integer from `bytes`.
///
/// # Panics
///
/// If `bytes.len() != 4`.
fn read_be32(bytes: &[u8]) -> u32 {
    u32::from_be_bytes(bytes.try_into().expect("slice of length 4"))
}

#[cfg(test)]
impl Read for [u8] {
    fn read_exact_at(&self, buf: &mut [u8], offset: u64) -> Result<(), Error> {
        let offset = usize::try_from(offset)
            .map_err(|_| err!("offset `{offset}` overflowed `usize`"))?;
        let Some(slice) = self.get(offset..) else {
            return Err(err!(
                "given offset `{offset}` is not valid \
                 (only {len} bytes are available)",
                len = self.len(),
            ));
        };
        if buf.len() > slice.len() {
            return Err(err!(
                "unexpected EOF, expected {len} bytes but only have {have}",
                len = buf.len(),
                have = slice.len()
            ));
        }
        buf.copy_from_slice(&slice[..buf.len()]);
        Ok(())
    }
}

#[cfg(all(feature = "std", unix))]
impl Read for std::fs::File {
    fn read_exact_at(&self, buf: &mut [u8], offset: u64) -> Result<(), Error> {
        use std::os::unix::fs::FileExt;
        FileExt::read_exact_at(self, buf, offset).map_err(Error::io)
    }
}

#[cfg(all(feature = "std", windows))]
impl Read for std::fs::File {
    fn read_exact_at(
        &self,
        mut buf: &mut [u8],
        mut offset: u64,
    ) -> Result<(), Error> {
        use std::{io, os::windows::fs::FileExt};

        while !buf.is_empty() {
            match self.seek_read(buf, offset) {
                Ok(0) => break,
                Ok(n) => {
                    buf = &mut buf[n..];
                    offset = u64::try_from(n)
                        .ok()
                        .and_then(|n| n.checked_add(offset))
                        .ok_or_else(|| {
                            err!("offset overflow when reading from `File`")
                        })?;
                }
                Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {}
                Err(e) => return Err(Error::io(e)),
            }
        }
        if !buf.is_empty() {
            Err(Error::io(io::Error::new(
                io::ErrorKind::UnexpectedEof,
                "failed to fill whole buffer",
            )))
        } else {
            Ok(())
        }
    }
}

#[cfg(all(feature = "std", all(not(unix), not(windows))))]
impl Read for std::fs::File {
    fn read_exact_at(&self, buf: &mut [u8], offset: u64) -> Result<(), Error> {
        use std::io::{Read as _, Seek as _, SeekFrom};
        let mut file = self;
        file.seek(SeekFrom::Start(offset)).map_err(Error::io).with_context(
            || err!("failed to seek to offset {offset} in `File`"),
        )?;
        file.read_exact(buf).map_err(Error::io)
    }
}

/// Allocates `additional` extra bytes on the `Vec` given and set them to `0`.
///
/// This specifically will never do an "OOM panic" and will instead return an
/// error (courtesy of `Vec::try_reserve_exact`). It will also return an error
/// without even trying the allocation if it's deemed to be "too big."
///
/// This is used so that we are extra careful about creating allocations based
/// on integers parsed from concatenated TZif data. Generally speaking, the
/// data we parse should be "trusted" (since it's probably not writable by
/// anyone other than `root`), but who knows where this code will ultimately be
/// used. So we try pretty hard to avoid panicking (even for OOM).
///
/// To be clear, we probably could panic on the error path. The goal here
/// isn't to avoid OOM because you can't allocate 10 bytes---Jiff isn't robust
/// enough in that kind of environment by far. The goal is to avoid OOM for
/// exorbitantly large allocations through some kind of attack vector.
fn alloc(bytes: &mut Vec<u8>, additional: usize) -> Result<(), Error> {
    // At time of writing, the biggest TZif data file is a few KB. And the
    // index block is tens of KB. So impose a limit that is a couple of orders
    // of magnitude bigger, but still overall pretty small for... some systems.
    // Anyway, I welcome improvements to this heuristic!
    const LIMIT: usize = 10 * 1 << 20;

    if additional > LIMIT {
        return Err(err!(
            "attempted to allocate more than {LIMIT} bytes \
             while reading concatenated TZif data, which \
             exceeds a heuristic limit to prevent huge allocations \
             (please file a bug if this error is inappropriate)",
        ));
    }
    bytes.try_reserve_exact(additional).map_err(|_| {
        err!(
            "failed to allocation {additional} bytes \
             for reading concatenated TZif data"
        )
    })?;
    // This... can't actually happen right?
    let new_len = bytes
        .len()
        .checked_add(additional)
        .ok_or_else(|| err!("total allocation length overflowed `usize`"))?;
    bytes.resize(new_len, 0);
    Ok(())
}

#[cfg(test)]
mod tests {
    use crate::{
        civil::date,
        tz::{
            offset, testdata::ANDROID_CONCATENATED_TZIF, AmbiguousOffset,
            Offset,
        },
        Timestamp,
    };

    use super::*;

    fn unambiguous(offset_hours: i8) -> AmbiguousOffset {
        let offset = offset(offset_hours);
        o_unambiguous(offset)
    }

    fn gap(
        earlier_offset_hours: i8,
        later_offset_hours: i8,
    ) -> AmbiguousOffset {
        let earlier = offset(earlier_offset_hours);
        let later = offset(later_offset_hours);
        o_gap(earlier, later)
    }

    fn fold(
        earlier_offset_hours: i8,
        later_offset_hours: i8,
    ) -> AmbiguousOffset {
        let earlier = offset(earlier_offset_hours);
        let later = offset(later_offset_hours);
        o_fold(earlier, later)
    }

    fn o_unambiguous(offset: Offset) -> AmbiguousOffset {
        AmbiguousOffset::Unambiguous { offset }
    }

    fn o_gap(earlier: Offset, later: Offset) -> AmbiguousOffset {
        AmbiguousOffset::Gap { before: earlier, after: later }
    }

    fn o_fold(earlier: Offset, later: Offset) -> AmbiguousOffset {
        AmbiguousOffset::Fold { before: earlier, after: later }
    }

    // Copied from src/tz/mod.rs.
    #[test]
    fn time_zone_tzif_to_ambiguous_timestamp() {
        let tests: &[(&str, &[_])] = &[
            (
                "America/New_York",
                &[
                    ((1969, 12, 31, 19, 0, 0, 0), unambiguous(-5)),
                    ((2024, 3, 10, 1, 59, 59, 999_999_999), unambiguous(-5)),
                    ((2024, 3, 10, 2, 0, 0, 0), gap(-5, -4)),
                    ((2024, 3, 10, 2, 59, 59, 999_999_999), gap(-5, -4)),
                    ((2024, 3, 10, 3, 0, 0, 0), unambiguous(-4)),
                    ((2024, 11, 3, 0, 59, 59, 999_999_999), unambiguous(-4)),
                    ((2024, 11, 3, 1, 0, 0, 0), fold(-4, -5)),
                    ((2024, 11, 3, 1, 59, 59, 999_999_999), fold(-4, -5)),
                    ((2024, 11, 3, 2, 0, 0, 0), unambiguous(-5)),
                ],
            ),
            (
                "Europe/Dublin",
                &[
                    ((1970, 1, 1, 0, 0, 0, 0), unambiguous(1)),
                    ((2024, 3, 31, 0, 59, 59, 999_999_999), unambiguous(0)),
                    ((2024, 3, 31, 1, 0, 0, 0), gap(0, 1)),
                    ((2024, 3, 31, 1, 59, 59, 999_999_999), gap(0, 1)),
                    ((2024, 3, 31, 2, 0, 0, 0), unambiguous(1)),
                    ((2024, 10, 27, 0, 59, 59, 999_999_999), unambiguous(1)),
                    ((2024, 10, 27, 1, 0, 0, 0), fold(1, 0)),
                    ((2024, 10, 27, 1, 59, 59, 999_999_999), fold(1, 0)),
                    ((2024, 10, 27, 2, 0, 0, 0), unambiguous(0)),
                ],
            ),
            (
                "Australia/Tasmania",
                &[
                    ((1970, 1, 1, 11, 0, 0, 0), unambiguous(11)),
                    ((2024, 4, 7, 1, 59, 59, 999_999_999), unambiguous(11)),
                    ((2024, 4, 7, 2, 0, 0, 0), fold(11, 10)),
                    ((2024, 4, 7, 2, 59, 59, 999_999_999), fold(11, 10)),
                    ((2024, 4, 7, 3, 0, 0, 0), unambiguous(10)),
                    ((2024, 10, 6, 1, 59, 59, 999_999_999), unambiguous(10)),
                    ((2024, 10, 6, 2, 0, 0, 0), gap(10, 11)),
                    ((2024, 10, 6, 2, 59, 59, 999_999_999), gap(10, 11)),
                    ((2024, 10, 6, 3, 0, 0, 0), unambiguous(11)),
                ],
            ),
            (
                "Antarctica/Troll",
                &[
                    ((1970, 1, 1, 0, 0, 0, 0), unambiguous(0)),
                    // test the gap
                    ((2024, 3, 31, 0, 59, 59, 999_999_999), unambiguous(0)),
                    ((2024, 3, 31, 1, 0, 0, 0), gap(0, 2)),
                    ((2024, 3, 31, 1, 59, 59, 999_999_999), gap(0, 2)),
                    // still in the gap!
                    ((2024, 3, 31, 2, 0, 0, 0), gap(0, 2)),
                    ((2024, 3, 31, 2, 59, 59, 999_999_999), gap(0, 2)),
                    // finally out
                    ((2024, 3, 31, 3, 0, 0, 0), unambiguous(2)),
                    // test the fold
                    ((2024, 10, 27, 0, 59, 59, 999_999_999), unambiguous(2)),
                    ((2024, 10, 27, 1, 0, 0, 0), fold(2, 0)),
                    ((2024, 10, 27, 1, 59, 59, 999_999_999), fold(2, 0)),
                    // still in the fold!
                    ((2024, 10, 27, 2, 0, 0, 0), fold(2, 0)),
                    ((2024, 10, 27, 2, 59, 59, 999_999_999), fold(2, 0)),
                    // finally out
                    ((2024, 10, 27, 3, 0, 0, 0), unambiguous(0)),
                ],
            ),
            (
                "America/St_Johns",
                &[
                    (
                        (1969, 12, 31, 20, 30, 0, 0),
                        o_unambiguous(-Offset::hms(3, 30, 0)),
                    ),
                    (
                        (2024, 3, 10, 1, 59, 59, 999_999_999),
                        o_unambiguous(-Offset::hms(3, 30, 0)),
                    ),
                    (
                        (2024, 3, 10, 2, 0, 0, 0),
                        o_gap(-Offset::hms(3, 30, 0), -Offset::hms(2, 30, 0)),
                    ),
                    (
                        (2024, 3, 10, 2, 59, 59, 999_999_999),
                        o_gap(-Offset::hms(3, 30, 0), -Offset::hms(2, 30, 0)),
                    ),
                    (
                        (2024, 3, 10, 3, 0, 0, 0),
                        o_unambiguous(-Offset::hms(2, 30, 0)),
                    ),
                    (
                        (2024, 11, 3, 0, 59, 59, 999_999_999),
                        o_unambiguous(-Offset::hms(2, 30, 0)),
                    ),
                    (
                        (2024, 11, 3, 1, 0, 0, 0),
                        o_fold(-Offset::hms(2, 30, 0), -Offset::hms(3, 30, 0)),
                    ),
                    (
                        (2024, 11, 3, 1, 59, 59, 999_999_999),
                        o_fold(-Offset::hms(2, 30, 0), -Offset::hms(3, 30, 0)),
                    ),
                    (
                        (2024, 11, 3, 2, 0, 0, 0),
                        o_unambiguous(-Offset::hms(3, 30, 0)),
                    ),
                ],
            ),
            // This time zone has an interesting transition where it jumps
            // backwards a full day at 1867-10-19T15:30:00.
            (
                "America/Sitka",
                &[
                    ((1969, 12, 31, 16, 0, 0, 0), unambiguous(-8)),
                    (
                        (-9999, 1, 2, 16, 58, 46, 0),
                        o_unambiguous(Offset::hms(14, 58, 47)),
                    ),
                    (
                        (1867, 10, 18, 15, 29, 59, 0),
                        o_unambiguous(Offset::hms(14, 58, 47)),
                    ),
                    (
                        (1867, 10, 18, 15, 30, 0, 0),
                        // A fold of 24 hours!!!
                        o_fold(
                            Offset::hms(14, 58, 47),
                            -Offset::hms(9, 1, 13),
                        ),
                    ),
                    (
                        (1867, 10, 19, 15, 29, 59, 999_999_999),
                        // Still in the fold...
                        o_fold(
                            Offset::hms(14, 58, 47),
                            -Offset::hms(9, 1, 13),
                        ),
                    ),
                    (
                        (1867, 10, 19, 15, 30, 0, 0),
                        // Finally out.
                        o_unambiguous(-Offset::hms(9, 1, 13)),
                    ),
                ],
            ),
            // As with to_datetime, we test every possible transition
            // point here since this time zone has a small number of them.
            (
                "Pacific/Honolulu",
                &[
                    (
                        (1896, 1, 13, 11, 59, 59, 0),
                        o_unambiguous(-Offset::hms(10, 31, 26)),
                    ),
                    (
                        (1896, 1, 13, 12, 0, 0, 0),
                        o_gap(
                            -Offset::hms(10, 31, 26),
                            -Offset::hms(10, 30, 0),
                        ),
                    ),
                    (
                        (1896, 1, 13, 12, 1, 25, 0),
                        o_gap(
                            -Offset::hms(10, 31, 26),
                            -Offset::hms(10, 30, 0),
                        ),
                    ),
                    (
                        (1896, 1, 13, 12, 1, 26, 0),
                        o_unambiguous(-Offset::hms(10, 30, 0)),
                    ),
                    (
                        (1933, 4, 30, 1, 59, 59, 0),
                        o_unambiguous(-Offset::hms(10, 30, 0)),
                    ),
                    (
                        (1933, 4, 30, 2, 0, 0, 0),
                        o_gap(-Offset::hms(10, 30, 0), -Offset::hms(9, 30, 0)),
                    ),
                    (
                        (1933, 4, 30, 2, 59, 59, 0),
                        o_gap(-Offset::hms(10, 30, 0), -Offset::hms(9, 30, 0)),
                    ),
                    (
                        (1933, 4, 30, 3, 0, 0, 0),
                        o_unambiguous(-Offset::hms(9, 30, 0)),
                    ),
                    (
                        (1933, 5, 21, 10, 59, 59, 0),
                        o_unambiguous(-Offset::hms(9, 30, 0)),
                    ),
                    (
                        (1933, 5, 21, 11, 0, 0, 0),
                        o_fold(
                            -Offset::hms(9, 30, 0),
                            -Offset::hms(10, 30, 0),
                        ),
                    ),
                    (
                        (1933, 5, 21, 11, 59, 59, 0),
                        o_fold(
                            -Offset::hms(9, 30, 0),
                            -Offset::hms(10, 30, 0),
                        ),
                    ),
                    (
                        (1933, 5, 21, 12, 0, 0, 0),
                        o_unambiguous(-Offset::hms(10, 30, 0)),
                    ),
                    (
                        (1942, 2, 9, 1, 59, 59, 0),
                        o_unambiguous(-Offset::hms(10, 30, 0)),
                    ),
                    (
                        (1942, 2, 9, 2, 0, 0, 0),
                        o_gap(-Offset::hms(10, 30, 0), -Offset::hms(9, 30, 0)),
                    ),
                    (
                        (1942, 2, 9, 2, 59, 59, 0),
                        o_gap(-Offset::hms(10, 30, 0), -Offset::hms(9, 30, 0)),
                    ),
                    (
                        (1942, 2, 9, 3, 0, 0, 0),
                        o_unambiguous(-Offset::hms(9, 30, 0)),
                    ),
                    (
                        (1945, 8, 14, 13, 29, 59, 0),
                        o_unambiguous(-Offset::hms(9, 30, 0)),
                    ),
                    (
                        (1945, 8, 14, 13, 30, 0, 0),
                        o_unambiguous(-Offset::hms(9, 30, 0)),
                    ),
                    (
                        (1945, 8, 14, 13, 30, 1, 0),
                        o_unambiguous(-Offset::hms(9, 30, 0)),
                    ),
                    (
                        (1945, 9, 30, 0, 59, 59, 0),
                        o_unambiguous(-Offset::hms(9, 30, 0)),
                    ),
                    (
                        (1945, 9, 30, 1, 0, 0, 0),
                        o_fold(
                            -Offset::hms(9, 30, 0),
                            -Offset::hms(10, 30, 0),
                        ),
                    ),
                    (
                        (1945, 9, 30, 1, 59, 59, 0),
                        o_fold(
                            -Offset::hms(9, 30, 0),
                            -Offset::hms(10, 30, 0),
                        ),
                    ),
                    (
                        (1945, 9, 30, 2, 0, 0, 0),
                        o_unambiguous(-Offset::hms(10, 30, 0)),
                    ),
                    (
                        (1947, 6, 8, 1, 59, 59, 0),
                        o_unambiguous(-Offset::hms(10, 30, 0)),
                    ),
                    (
                        (1947, 6, 8, 2, 0, 0, 0),
                        o_gap(-Offset::hms(10, 30, 0), -offset(10)),
                    ),
                    (
                        (1947, 6, 8, 2, 29, 59, 0),
                        o_gap(-Offset::hms(10, 30, 0), -offset(10)),
                    ),
                    ((1947, 6, 8, 2, 30, 0, 0), unambiguous(-10)),
                ],
            ),
        ];
        let db = ConcatenatedTzif::open(ANDROID_CONCATENATED_TZIF).unwrap();
        let (mut buf1, mut buf2) = (alloc::vec![], alloc::vec![]);
        for &(tzname, datetimes_to_ambiguous) in tests {
            let tz = db.get(tzname, &mut buf1, &mut buf2).unwrap().unwrap();
            for &(datetime, ambiguous_kind) in datetimes_to_ambiguous {
                let (year, month, day, hour, min, sec, nano) = datetime;
                let dt = date(year, month, day).at(hour, min, sec, nano);
                let got = tz.to_ambiguous_zoned(dt);
                assert_eq!(
                    got.offset(),
                    ambiguous_kind,
                    "\nTZ: {tzname}\ndatetime: \
                     {year:04}-{month:02}-{day:02}T\
                     {hour:02}:{min:02}:{sec:02}.{nano:09}",
                );
            }
        }
    }

    // Copied from src/tz/mod.rs.
    #[test]
    fn time_zone_tzif_to_datetime() {
        let o = |hours| offset(hours);
        let tests: &[(&str, &[_])] = &[
            (
                "America/New_York",
                &[
                    ((0, 0), o(-5), "EST", (1969, 12, 31, 19, 0, 0, 0)),
                    (
                        (1710052200, 0),
                        o(-5),
                        "EST",
                        (2024, 3, 10, 1, 30, 0, 0),
                    ),
                    (
                        (1710053999, 999_999_999),
                        o(-5),
                        "EST",
                        (2024, 3, 10, 1, 59, 59, 999_999_999),
                    ),
                    ((1710054000, 0), o(-4), "EDT", (2024, 3, 10, 3, 0, 0, 0)),
                    (
                        (1710055800, 0),
                        o(-4),
                        "EDT",
                        (2024, 3, 10, 3, 30, 0, 0),
                    ),
                    ((1730610000, 0), o(-4), "EDT", (2024, 11, 3, 1, 0, 0, 0)),
                    (
                        (1730611800, 0),
                        o(-4),
                        "EDT",
                        (2024, 11, 3, 1, 30, 0, 0),
                    ),
                    (
                        (1730613599, 999_999_999),
                        o(-4),
                        "EDT",
                        (2024, 11, 3, 1, 59, 59, 999_999_999),
                    ),
                    ((1730613600, 0), o(-5), "EST", (2024, 11, 3, 1, 0, 0, 0)),
                    (
                        (1730615400, 0),
                        o(-5),
                        "EST",
                        (2024, 11, 3, 1, 30, 0, 0),
                    ),
                ],
            ),
            (
                "Australia/Tasmania",
                &[
                    ((0, 0), o(11), "AEDT", (1970, 1, 1, 11, 0, 0, 0)),
                    (
                        (1728142200, 0),
                        o(10),
                        "AEST",
                        (2024, 10, 6, 1, 30, 0, 0),
                    ),
                    (
                        (1728143999, 999_999_999),
                        o(10),
                        "AEST",
                        (2024, 10, 6, 1, 59, 59, 999_999_999),
                    ),
                    (
                        (1728144000, 0),
                        o(11),
                        "AEDT",
                        (2024, 10, 6, 3, 0, 0, 0),
                    ),
                    (
                        (1728145800, 0),
                        o(11),
                        "AEDT",
                        (2024, 10, 6, 3, 30, 0, 0),
                    ),
                    ((1712415600, 0), o(11), "AEDT", (2024, 4, 7, 2, 0, 0, 0)),
                    (
                        (1712417400, 0),
                        o(11),
                        "AEDT",
                        (2024, 4, 7, 2, 30, 0, 0),
                    ),
                    (
                        (1712419199, 999_999_999),
                        o(11),
                        "AEDT",
                        (2024, 4, 7, 2, 59, 59, 999_999_999),
                    ),
                    ((1712419200, 0), o(10), "AEST", (2024, 4, 7, 2, 0, 0, 0)),
                    (
                        (1712421000, 0),
                        o(10),
                        "AEST",
                        (2024, 4, 7, 2, 30, 0, 0),
                    ),
                ],
            ),
            // Pacific/Honolulu is small eough that we just test every
            // possible instant before, at and after each transition.
            (
                "Pacific/Honolulu",
                &[
                    (
                        (-2334101315, 0),
                        -Offset::hms(10, 31, 26),
                        "LMT",
                        (1896, 1, 13, 11, 59, 59, 0),
                    ),
                    (
                        (-2334101314, 0),
                        -Offset::hms(10, 30, 0),
                        "HST",
                        (1896, 1, 13, 12, 1, 26, 0),
                    ),
                    (
                        (-2334101313, 0),
                        -Offset::hms(10, 30, 0),
                        "HST",
                        (1896, 1, 13, 12, 1, 27, 0),
                    ),
                    (
                        (-1157283001, 0),
                        -Offset::hms(10, 30, 0),
                        "HST",
                        (1933, 4, 30, 1, 59, 59, 0),
                    ),
                    (
                        (-1157283000, 0),
                        -Offset::hms(9, 30, 0),
                        "HDT",
                        (1933, 4, 30, 3, 0, 0, 0),
                    ),
                    (
                        (-1157282999, 0),
                        -Offset::hms(9, 30, 0),
                        "HDT",
                        (1933, 4, 30, 3, 0, 1, 0),
                    ),
                    (
                        (-1155436201, 0),
                        -Offset::hms(9, 30, 0),
                        "HDT",
                        (1933, 5, 21, 11, 59, 59, 0),
                    ),
                    (
                        (-1155436200, 0),
                        -Offset::hms(10, 30, 0),
                        "HST",
                        (1933, 5, 21, 11, 0, 0, 0),
                    ),
                    (
                        (-1155436199, 0),
                        -Offset::hms(10, 30, 0),
                        "HST",
                        (1933, 5, 21, 11, 0, 1, 0),
                    ),
                    (
                        (-880198201, 0),
                        -Offset::hms(10, 30, 0),
                        "HST",
                        (1942, 2, 9, 1, 59, 59, 0),
                    ),
                    (
                        (-880198200, 0),
                        -Offset::hms(9, 30, 0),
                        "HWT",
                        (1942, 2, 9, 3, 0, 0, 0),
                    ),
                    (
                        (-880198199, 0),
                        -Offset::hms(9, 30, 0),
                        "HWT",
                        (1942, 2, 9, 3, 0, 1, 0),
                    ),
                    (
                        (-769395601, 0),
                        -Offset::hms(9, 30, 0),
                        "HWT",
                        (1945, 8, 14, 13, 29, 59, 0),
                    ),
                    (
                        (-769395600, 0),
                        -Offset::hms(9, 30, 0),
                        "HPT",
                        (1945, 8, 14, 13, 30, 0, 0),
                    ),
                    (
                        (-769395599, 0),
                        -Offset::hms(9, 30, 0),
                        "HPT",
                        (1945, 8, 14, 13, 30, 1, 0),
                    ),
                    (
                        (-765376201, 0),
                        -Offset::hms(9, 30, 0),
                        "HPT",
                        (1945, 9, 30, 1, 59, 59, 0),
                    ),
                    (
                        (-765376200, 0),
                        -Offset::hms(10, 30, 0),
                        "HST",
                        (1945, 9, 30, 1, 0, 0, 0),
                    ),
                    (
                        (-765376199, 0),
                        -Offset::hms(10, 30, 0),
                        "HST",
                        (1945, 9, 30, 1, 0, 1, 0),
                    ),
                    (
                        (-712150201, 0),
                        -Offset::hms(10, 30, 0),
                        "HST",
                        (1947, 6, 8, 1, 59, 59, 0),
                    ),
                    // At this point, we hit the last transition and the POSIX
                    // TZ string takes over.
                    (
                        (-712150200, 0),
                        -Offset::hms(10, 0, 0),
                        "HST",
                        (1947, 6, 8, 2, 30, 0, 0),
                    ),
                    (
                        (-712150199, 0),
                        -Offset::hms(10, 0, 0),
                        "HST",
                        (1947, 6, 8, 2, 30, 1, 0),
                    ),
                ],
            ),
            // This time zone has an interesting transition where it jumps
            // backwards a full day at 1867-10-19T15:30:00.
            (
                "America/Sitka",
                &[
                    ((0, 0), o(-8), "PST", (1969, 12, 31, 16, 0, 0, 0)),
                    (
                        (-377705023201, 0),
                        Offset::hms(14, 58, 47),
                        "LMT",
                        (-9999, 1, 2, 16, 58, 46, 0),
                    ),
                    (
                        (-3225223728, 0),
                        Offset::hms(14, 58, 47),
                        "LMT",
                        (1867, 10, 19, 15, 29, 59, 0),
                    ),
                    // Notice the 24 hour time jump backwards a whole day!
                    (
                        (-3225223727, 0),
                        -Offset::hms(9, 1, 13),
                        "LMT",
                        (1867, 10, 18, 15, 30, 0, 0),
                    ),
                    (
                        (-3225223726, 0),
                        -Offset::hms(9, 1, 13),
                        "LMT",
                        (1867, 10, 18, 15, 30, 1, 0),
                    ),
                ],
            ),
        ];
        let db = ConcatenatedTzif::open(ANDROID_CONCATENATED_TZIF).unwrap();
        let (mut buf1, mut buf2) = (alloc::vec![], alloc::vec![]);
        for &(tzname, timestamps_to_datetimes) in tests {
            let tz = db.get(tzname, &mut buf1, &mut buf2).unwrap().unwrap();
            for &((unix_sec, unix_nano), offset, abbrev, datetime) in
                timestamps_to_datetimes
            {
                let (year, month, day, hour, min, sec, nano) = datetime;
                let timestamp = Timestamp::new(unix_sec, unix_nano).unwrap();
                let info = tz.to_offset_info(timestamp);
                assert_eq!(
                    info.offset(),
                    offset,
                    "\nTZ={tzname}, timestamp({unix_sec}, {unix_nano})",
                );
                assert_eq!(
                    info.abbreviation(),
                    abbrev,
                    "\nTZ={tzname}, timestamp({unix_sec}, {unix_nano})",
                );
                assert_eq!(
                    info.offset().to_datetime(timestamp),
                    date(year, month, day).at(hour, min, sec, nano),
                    "\nTZ={tzname}, timestamp({unix_sec}, {unix_nano})",
                );
            }
        }
    }

    #[test]
    #[cfg(not(miri))]
    fn read_all_time_zones() {
        let db = ConcatenatedTzif::open(ANDROID_CONCATENATED_TZIF).unwrap();
        let available = db.available(&mut alloc::vec![]).unwrap();
        let (mut buf1, mut buf2) = (alloc::vec![], alloc::vec![]);
        for tzname in available.iter() {
            let tz = db.get(tzname, &mut buf1, &mut buf2).unwrap().unwrap();
            assert_eq!(tzname, tz.iana_name().unwrap());
        }
    }

    #[test]
    fn available_len() {
        let db = ConcatenatedTzif::open(ANDROID_CONCATENATED_TZIF).unwrap();
        let available = db.available(&mut alloc::vec![]).unwrap();
        assert_eq!(596, available.len());
        for window in available.windows(2) {
            let (x1, x2) = (&window[0], &window[1]);
            assert!(x1 < x2, "{x1} is not less than {x2}");
        }
    }
}