jiff/tz/
concatenated.rs

1use alloc::{
2    string::{String, ToString},
3    vec::Vec,
4};
5
6use crate::{
7    error::{err, Error, ErrorContext},
8    tz::TimeZone,
9    util::{array_str::ArrayStr, escape, utf8},
10};
11
12/// An abstraction for reading data from Android's concatenated TZif data file.
13///
14/// This abstraction is designed in a way that the data is reads from is
15/// largely untrusted. This means that, no matter what sequence of bytes is
16/// given, this should never panic (or else there is a bug). Moreover, there is
17/// some guarding against disproportionate allocation. While big allocations
18/// can still happen, they require a proportionally large data file. (Thus,
19/// callers can guard against this by considering the size of the data.) What
20/// this implementation prevents against is accidentally OOMing or panicking as
21/// a result of naively doing `Vec::with_capacity(rdr.decode_integer())`.
22///
23/// This is also designed to work in alloc-only contexts mostly out of "good
24/// sense." Technically we don't (currently) use this outside of `std`, since
25/// it's only used for reading tzdb on Android from the file system. But we do
26/// things this way in case we end up wanting to use it for something else.
27/// If we needed this for no-alloc environments, then that's a much bigger
28/// change, if only because it would require making the TZif parser no-alloc
29/// compatible, and it's not quite clear what the best way to do that is. We
30/// achieve the alloc-only API be introducing a trait that abstracts over a
31/// `File` for random access to bytes.
32#[derive(Debug)]
33pub(crate) struct ConcatenatedTzif<R> {
34    rdr: R,
35    header: Header,
36}
37
38impl<R: Read> ConcatenatedTzif<R> {
39    /// Open the concatenated TZif file using the reader given.
40    ///
41    /// This reads the header and will return an error if the header is
42    /// invalid.
43    pub(crate) fn open(rdr: R) -> Result<ConcatenatedTzif<R>, Error> {
44        let header = Header::read(&rdr)?;
45        Ok(ConcatenatedTzif { rdr, header })
46    }
47
48    /// Returns the version of this `tzdata` database.
49    pub(crate) fn version(&self) -> ArrayStr<5> {
50        self.header.version
51    }
52
53    /// Returns a `TimeZone` extracted from this concatenated TZif data.
54    ///
55    /// This is only successful if an index entry with the corresponding
56    /// IANA time zone identifier could be found.
57    ///
58    /// Callers must provide two scratch buffers that are used for temporary
59    /// allocation internally. Callers can create a new buffer for each call,
60    /// but it's likely faster to reuse them if possible.
61    ///
62    /// If a `TimeZone` is returned, it is guaranteed to have a present IANA
63    /// name (accessible via `TimeZone::iana_name`).
64    pub(crate) fn get(
65        &self,
66        query: &str,
67        scratch1: &mut Vec<u8>,
68        scratch2: &mut Vec<u8>,
69    ) -> Result<Option<TimeZone>, Error> {
70        scratch1.clear();
71        alloc(scratch1, self.header.index_len())?;
72        self.rdr
73            .read_exact_at(scratch1, self.header.index_offset)
74            .context("failed to read index block")?;
75
76        let mut index = &**scratch1;
77        while !index.is_empty() {
78            let entry = IndexEntry::new(&index[..IndexEntry::LEN]);
79            index = &index[IndexEntry::LEN..];
80            let ordering = utf8::cmp_ignore_ascii_case_bytes(
81                entry.name_bytes(),
82                query.as_bytes(),
83            );
84            if ordering.is_ne() {
85                continue;
86            }
87
88            // OK because `entry.name_bytes()` is equal to `query`,
89            // ignoring ASCII case. The only way this can be true is is
90            // `entry.name_bytes()` is itself valid UTF-8.
91            let name = entry.name().unwrap();
92            scratch2.clear();
93            alloc(scratch2, entry.len())?;
94            let start = self.header.data_offset.saturating_add(entry.start());
95            self.rdr
96                .read_exact_at(scratch2, start)
97                .context("failed to read TZif data block")?;
98            return TimeZone::tzif(name, scratch2).map(Some);
99        }
100        Ok(None)
101    }
102
103    /// Returns a list of all IANA time zone identifiers in this concatenated
104    /// TZif data.
105    ///
106    /// Callers must provide a scratch buffer that is used for temporary
107    /// allocation internally. Callers can create a new buffer for each call,
108    /// but it's likely faster to reuse them if possible.
109    pub(crate) fn available(
110        &self,
111        scratch: &mut Vec<u8>,
112    ) -> Result<Vec<String>, Error> {
113        scratch.clear();
114        alloc(scratch, self.header.index_len())?;
115        self.rdr
116            .read_exact_at(scratch, self.header.index_offset)
117            .context("failed to read index block")?;
118
119        let names_len = self.header.index_len() / IndexEntry::LEN;
120        // Why are we careless with this alloc? Well, its size is proportional
121        // to the actual amount of data in the file. So the only way to get a
122        // big alloc is to create a huge file. This seems... fine... I guess.
123        // Where as the `alloc` above is done on the basis of an arbitrary
124        // 32-bit integer.
125        let mut names = Vec::with_capacity(names_len);
126        let mut index = &**scratch;
127        while !index.is_empty() {
128            let entry = IndexEntry::new(&index[..IndexEntry::LEN]);
129            index = &index[IndexEntry::LEN..];
130            names.push(entry.name()?.to_string());
131        }
132        Ok(names)
133    }
134}
135
136/// The header of Android concatenated TZif data.
137///
138/// The header has the version and some offsets indicating the location of
139/// the index entry (a list of IANA time zone identifiers and offsets into
140/// the data block) and the actual TZif data.
141#[derive(Debug)]
142struct Header {
143    version: ArrayStr<5>,
144    index_offset: u64,
145    data_offset: u64,
146}
147
148impl Header {
149    /// Reads the header from Android's concatenated TZif concatenated data
150    /// file.
151    ///
152    /// Basically, this gives us the version and some offsets for where to find
153    /// data.
154    fn read<R: Read + ?Sized>(rdr: &R) -> Result<Header, Error> {
155        // 12 bytes plus 3 4-byte big endian integers.
156        let mut buf = [0; 12 + 3 * 4];
157        rdr.read_exact_at(&mut buf, 0)
158            .context("failed to read concatenated TZif header")?;
159        if &buf[..6] != b"tzdata" {
160            return Err(err!(
161                "expected first 6 bytes of concatenated TZif header \
162                 to be `tzdata`, but found `{found}`",
163                found = escape::Bytes(&buf[..6]),
164            ));
165        }
166        if buf[11] != 0 {
167            return Err(err!(
168                "expected last byte of concatenated TZif header \
169                 to be NUL, but found `{found}`",
170                found = escape::Bytes(&buf[..12]),
171            ));
172        }
173
174        let version = {
175            let version = core::str::from_utf8(&buf[6..11]).map_err(|_| {
176                err!(
177                    "expected version in concatenated TZif header to \
178                     be valid UTF-8, but found `{found}`",
179                    found = escape::Bytes(&buf[6..11]),
180                )
181            })?;
182            // OK because `version` is exactly 5 bytes, by construction.
183            ArrayStr::new(version).unwrap()
184        };
185        // OK because the sub-slice is sized to exactly 4 bytes.
186        let index_offset = u64::from(read_be32(&buf[12..16]));
187        // OK because the sub-slice is sized to exactly 4 bytes.
188        let data_offset = u64::from(read_be32(&buf[16..20]));
189        if index_offset > data_offset {
190            return Err(err!(
191                "invalid index ({index_offset}) and data ({data_offset}) \
192                 offsets, expected index offset to be less than or equal \
193                 to data offset",
194            ));
195        }
196        // we don't read 20..24 since we don't care about zonetab (yet)
197        let header = Header { version, index_offset, data_offset };
198        if header.index_len() % IndexEntry::LEN != 0 {
199            return Err(err!(
200                "length of index block is not a multiple {len}",
201                len = IndexEntry::LEN,
202            ));
203        }
204        Ok(header)
205    }
206
207    /// Returns the length of the index section of the concatenated tzdb.
208    ///
209    /// Beware of using this to create allocations. In theory, this should be
210    /// trusted data, but the length can be any 32-bit integer. If it's used to
211    /// create an allocation, it could potentially be up to 4GB.
212    fn index_len(&self) -> usize {
213        // OK because `Header` parsing returns an error if this overflows.
214        let len = self.data_offset.checked_sub(self.index_offset).unwrap();
215        // N.B. Overflow only occurs here on 16-bit (or smaller) platforms,
216        // which at the time of writing, is not supported by Jiff. Instead,
217        // a `usize::MAX` will trigger an allocation error.
218        usize::try_from(len).unwrap_or(usize::MAX)
219    }
220}
221
222/// A view into a single index entry in the index block of concatenated TZif
223/// data.
224///
225/// If we had safe transmute, it would be much nicer to define this as
226///
227/// ```text
228/// #[derive(Clone, Copy)]
229/// #[repr(transparent, align(1))]
230/// struct IndexEntry {
231///     name: [u8; 40],
232///     start: u32,
233///     len: u32,
234///     _raw_utc_offset: u32, // we don't use this here
235/// }
236/// ```
237///
238/// And probably implement a trait asserting that this is plain old data (or
239/// derive it safely). And then we could cast `&[u8]` to `&[IndexEntry]`
240/// safely and access the individual fields as is. We could do this today,
241/// but not in safe code. And since this isn't performance critical, it's just
242/// not worth flagging this code as potentially containing undefined behavior.
243#[derive(Clone, Copy)]
244struct IndexEntry<'a>(&'a [u8]);
245
246impl<'a> IndexEntry<'a> {
247    /// The length of an index entry. It's fixed size. 40 bytes for the IANA
248    /// time zone identifier. 4 bytes for each of 3 big-endian integers. The
249    /// first is the start of the corresponding TZif data within the data
250    /// block. The second is the length of said TZif data. And the third is
251    /// the "raw UTC offset" of the time zone. (I'm unclear on the semantics
252    /// of this third, since some time zones have more than one because of
253    /// DST. And of course, it can change over time. Since I don't know what
254    /// Android uses this for, I'm not sure how I'm supposed to interpret it.)
255    const LEN: usize = 40 + 3 * 4;
256
257    /// Creates a new view into an entry in the concatenated TZif index.
258    ///
259    /// # Panics
260    ///
261    /// When `slice` does not have the expected length (`IndexEntry::LEN`).
262    fn new(slice: &'a [u8]) -> IndexEntry<'a> {
263        assert_eq!(slice.len(), IndexEntry::LEN, "invalid index entry length");
264        IndexEntry(slice)
265    }
266
267    /// Like `name_bytes`, but as a `&str`.
268    ///
269    /// This returns an error if the name isn't valid UTF-8.
270    fn name(&self) -> Result<&str, Error> {
271        core::str::from_utf8(self.name_bytes()).map_err(|_| {
272            err!(
273                "IANA time zone identifier `{name}` is not valid UTF-8",
274                name = escape::Bytes(self.name_bytes()),
275            )
276        })
277    }
278
279    /// Returns the IANA time zone identifier as a byte slice.
280    ///
281    /// In theory, an empty slice could be returned. But if that happens,
282    /// then there is probably a bug in this code somewhere, the format
283    /// changed or the source data is corrupt somehow.
284    fn name_bytes(&self) -> &'a [u8] {
285        let mut block = &self.0[..40];
286        while block.last().copied() == Some(0) {
287            block = &block[..block.len() - 1];
288        }
289        block
290    }
291
292    /// Returns the starting offset (relative to the beginning of the TZif
293    /// data block) of the corresponding TZif data.
294    fn start(&self) -> u64 {
295        u64::from(read_be32(&self.0[40..44]))
296    }
297
298    /// Returns the length of the TZif data block.
299    ///
300    /// Beware of using this to create allocations. In theory, this should be
301    /// trusted data, but the length can be any 32-bit integer. If it's used to
302    /// create an allocation, it could potentially be up to 4GB.
303    fn len(&self) -> usize {
304        // N.B. Overflow only occurs here on 16-bit (or smaller) platforms,
305        // which at the time of writing, is not supported by Jiff. Instead,
306        // a `usize::MAX` will trigger an allocation error.
307        usize::try_from(read_be32(&self.0[44..48])).unwrap_or(usize::MAX)
308    }
309}
310
311impl<'a> core::fmt::Debug for IndexEntry<'a> {
312    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
313        f.debug_struct("IndexEntry")
314            .field("name", &escape::Bytes(self.name_bytes()))
315            .field("start", &self.start())
316            .field("len", &self.len())
317            .finish()
318    }
319}
320
321/// A crate-internal trait defining the source of concatenated TZif data.
322///
323/// Basically, this just provides a way to read a fixed amount of data at a
324/// particular offset. This is obviously trivial to implement on `&[u8]` (and
325/// indeed, we do so for testing), but we use it to abstract over platform
326/// differences when reading from a `File`.
327///
328/// The intent is that on Unix, this will use `pread`, which avoids a file
329/// seek followed by a `read` call.
330pub(crate) trait Read {
331    fn read_exact_at(&self, buf: &mut [u8], offset: u64) -> Result<(), Error>;
332}
333
334impl<'a, R: Read + ?Sized> Read for &'a R {
335    fn read_exact_at(&self, buf: &mut [u8], offset: u64) -> Result<(), Error> {
336        (**self).read_exact_at(buf, offset)
337    }
338}
339
340/// Reads a 32-bit big endian encoded integer from `bytes`.
341///
342/// # Panics
343///
344/// If `bytes.len() != 4`.
345fn read_be32(bytes: &[u8]) -> u32 {
346    u32::from_be_bytes(bytes.try_into().expect("slice of length 4"))
347}
348
349#[cfg(test)]
350impl Read for [u8] {
351    fn read_exact_at(&self, buf: &mut [u8], offset: u64) -> Result<(), Error> {
352        let offset = usize::try_from(offset)
353            .map_err(|_| err!("offset `{offset}` overflowed `usize`"))?;
354        let Some(slice) = self.get(offset..) else {
355            return Err(err!(
356                "given offset `{offset}` is not valid \
357                 (only {len} bytes are available)",
358                len = self.len(),
359            ));
360        };
361        if buf.len() > slice.len() {
362            return Err(err!(
363                "unexpected EOF, expected {len} bytes but only have {have}",
364                len = buf.len(),
365                have = slice.len()
366            ));
367        }
368        buf.copy_from_slice(&slice[..buf.len()]);
369        Ok(())
370    }
371}
372
373#[cfg(all(feature = "std", unix))]
374impl Read for std::fs::File {
375    fn read_exact_at(&self, buf: &mut [u8], offset: u64) -> Result<(), Error> {
376        use std::os::unix::fs::FileExt;
377        FileExt::read_exact_at(self, buf, offset).map_err(Error::io)
378    }
379}
380
381#[cfg(all(feature = "std", windows))]
382impl Read for std::fs::File {
383    fn read_exact_at(
384        &self,
385        mut buf: &mut [u8],
386        mut offset: u64,
387    ) -> Result<(), Error> {
388        use std::{io, os::windows::fs::FileExt};
389
390        while !buf.is_empty() {
391            match self.seek_read(buf, offset) {
392                Ok(0) => break,
393                Ok(n) => {
394                    buf = &mut buf[n..];
395                    offset = u64::try_from(n)
396                        .ok()
397                        .and_then(|n| n.checked_add(offset))
398                        .ok_or_else(|| {
399                            err!("offset overflow when reading from `File`")
400                        })?;
401                }
402                Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {}
403                Err(e) => return Err(Error::io(e)),
404            }
405        }
406        if !buf.is_empty() {
407            Err(Error::io(io::Error::new(
408                io::ErrorKind::UnexpectedEof,
409                "failed to fill whole buffer",
410            )))
411        } else {
412            Ok(())
413        }
414    }
415}
416
417#[cfg(all(feature = "std", all(not(unix), not(windows))))]
418impl Read for std::fs::File {
419    fn read_exact_at(&self, buf: &mut [u8], offset: u64) -> Result<(), Error> {
420        use std::io::{Read as _, Seek as _, SeekFrom};
421        let mut file = self;
422        file.seek(SeekFrom::Start(offset)).map_err(Error::io).with_context(
423            || err!("failed to seek to offset {offset} in `File`"),
424        )?;
425        file.read_exact(buf).map_err(Error::io)
426    }
427}
428
429/// Allocates `additional` extra bytes on the `Vec` given and set them to `0`.
430///
431/// This specifically will never do an "OOM panic" and will instead return an
432/// error (courtesy of `Vec::try_reserve_exact`). It will also return an error
433/// without even trying the allocation if it's deemed to be "too big."
434///
435/// This is used so that we are extra careful about creating allocations based
436/// on integers parsed from concatenated TZif data. Generally speaking, the
437/// data we parse should be "trusted" (since it's probably not writable by
438/// anyone other than `root`), but who knows where this code will ultimately be
439/// used. So we try pretty hard to avoid panicking (even for OOM).
440///
441/// To be clear, we probably could panic on the error path. The goal here
442/// isn't to avoid OOM because you can't allocate 10 bytes---Jiff isn't robust
443/// enough in that kind of environment by far. The goal is to avoid OOM for
444/// exorbitantly large allocations through some kind of attack vector.
445fn alloc(bytes: &mut Vec<u8>, additional: usize) -> Result<(), Error> {
446    // At time of writing, the biggest TZif data file is a few KB. And the
447    // index block is tens of KB. So impose a limit that is a couple of orders
448    // of magnitude bigger, but still overall pretty small for... some systems.
449    // Anyway, I welcome improvements to this heuristic!
450    const LIMIT: usize = 10 * 1 << 20;
451
452    if additional > LIMIT {
453        return Err(err!(
454            "attempted to allocate more than {LIMIT} bytes \
455             while reading concatenated TZif data, which \
456             exceeds a heuristic limit to prevent huge allocations \
457             (please file a bug if this error is inappropriate)",
458        ));
459    }
460    bytes.try_reserve_exact(additional).map_err(|_| {
461        err!(
462            "failed to allocation {additional} bytes \
463             for reading concatenated TZif data"
464        )
465    })?;
466    // This... can't actually happen right?
467    let new_len = bytes
468        .len()
469        .checked_add(additional)
470        .ok_or_else(|| err!("total allocation length overflowed `usize`"))?;
471    bytes.resize(new_len, 0);
472    Ok(())
473}
474
475#[cfg(test)]
476mod tests {
477    use crate::{
478        civil::date,
479        tz::{
480            offset, testdata::ANDROID_CONCATENATED_TZIF, AmbiguousOffset,
481            Offset,
482        },
483        Timestamp,
484    };
485
486    use super::*;
487
488    fn unambiguous(offset_hours: i8) -> AmbiguousOffset {
489        let offset = offset(offset_hours);
490        o_unambiguous(offset)
491    }
492
493    fn gap(
494        earlier_offset_hours: i8,
495        later_offset_hours: i8,
496    ) -> AmbiguousOffset {
497        let earlier = offset(earlier_offset_hours);
498        let later = offset(later_offset_hours);
499        o_gap(earlier, later)
500    }
501
502    fn fold(
503        earlier_offset_hours: i8,
504        later_offset_hours: i8,
505    ) -> AmbiguousOffset {
506        let earlier = offset(earlier_offset_hours);
507        let later = offset(later_offset_hours);
508        o_fold(earlier, later)
509    }
510
511    fn o_unambiguous(offset: Offset) -> AmbiguousOffset {
512        AmbiguousOffset::Unambiguous { offset }
513    }
514
515    fn o_gap(earlier: Offset, later: Offset) -> AmbiguousOffset {
516        AmbiguousOffset::Gap { before: earlier, after: later }
517    }
518
519    fn o_fold(earlier: Offset, later: Offset) -> AmbiguousOffset {
520        AmbiguousOffset::Fold { before: earlier, after: later }
521    }
522
523    // Copied from src/tz/mod.rs.
524    #[test]
525    fn time_zone_tzif_to_ambiguous_timestamp() {
526        let tests: &[(&str, &[_])] = &[
527            (
528                "America/New_York",
529                &[
530                    ((1969, 12, 31, 19, 0, 0, 0), unambiguous(-5)),
531                    ((2024, 3, 10, 1, 59, 59, 999_999_999), unambiguous(-5)),
532                    ((2024, 3, 10, 2, 0, 0, 0), gap(-5, -4)),
533                    ((2024, 3, 10, 2, 59, 59, 999_999_999), gap(-5, -4)),
534                    ((2024, 3, 10, 3, 0, 0, 0), unambiguous(-4)),
535                    ((2024, 11, 3, 0, 59, 59, 999_999_999), unambiguous(-4)),
536                    ((2024, 11, 3, 1, 0, 0, 0), fold(-4, -5)),
537                    ((2024, 11, 3, 1, 59, 59, 999_999_999), fold(-4, -5)),
538                    ((2024, 11, 3, 2, 0, 0, 0), unambiguous(-5)),
539                ],
540            ),
541            (
542                "Europe/Dublin",
543                &[
544                    ((1970, 1, 1, 0, 0, 0, 0), unambiguous(1)),
545                    ((2024, 3, 31, 0, 59, 59, 999_999_999), unambiguous(0)),
546                    ((2024, 3, 31, 1, 0, 0, 0), gap(0, 1)),
547                    ((2024, 3, 31, 1, 59, 59, 999_999_999), gap(0, 1)),
548                    ((2024, 3, 31, 2, 0, 0, 0), unambiguous(1)),
549                    ((2024, 10, 27, 0, 59, 59, 999_999_999), unambiguous(1)),
550                    ((2024, 10, 27, 1, 0, 0, 0), fold(1, 0)),
551                    ((2024, 10, 27, 1, 59, 59, 999_999_999), fold(1, 0)),
552                    ((2024, 10, 27, 2, 0, 0, 0), unambiguous(0)),
553                ],
554            ),
555            (
556                "Australia/Tasmania",
557                &[
558                    ((1970, 1, 1, 11, 0, 0, 0), unambiguous(11)),
559                    ((2024, 4, 7, 1, 59, 59, 999_999_999), unambiguous(11)),
560                    ((2024, 4, 7, 2, 0, 0, 0), fold(11, 10)),
561                    ((2024, 4, 7, 2, 59, 59, 999_999_999), fold(11, 10)),
562                    ((2024, 4, 7, 3, 0, 0, 0), unambiguous(10)),
563                    ((2024, 10, 6, 1, 59, 59, 999_999_999), unambiguous(10)),
564                    ((2024, 10, 6, 2, 0, 0, 0), gap(10, 11)),
565                    ((2024, 10, 6, 2, 59, 59, 999_999_999), gap(10, 11)),
566                    ((2024, 10, 6, 3, 0, 0, 0), unambiguous(11)),
567                ],
568            ),
569            (
570                "Antarctica/Troll",
571                &[
572                    ((1970, 1, 1, 0, 0, 0, 0), unambiguous(0)),
573                    // test the gap
574                    ((2024, 3, 31, 0, 59, 59, 999_999_999), unambiguous(0)),
575                    ((2024, 3, 31, 1, 0, 0, 0), gap(0, 2)),
576                    ((2024, 3, 31, 1, 59, 59, 999_999_999), gap(0, 2)),
577                    // still in the gap!
578                    ((2024, 3, 31, 2, 0, 0, 0), gap(0, 2)),
579                    ((2024, 3, 31, 2, 59, 59, 999_999_999), gap(0, 2)),
580                    // finally out
581                    ((2024, 3, 31, 3, 0, 0, 0), unambiguous(2)),
582                    // test the fold
583                    ((2024, 10, 27, 0, 59, 59, 999_999_999), unambiguous(2)),
584                    ((2024, 10, 27, 1, 0, 0, 0), fold(2, 0)),
585                    ((2024, 10, 27, 1, 59, 59, 999_999_999), fold(2, 0)),
586                    // still in the fold!
587                    ((2024, 10, 27, 2, 0, 0, 0), fold(2, 0)),
588                    ((2024, 10, 27, 2, 59, 59, 999_999_999), fold(2, 0)),
589                    // finally out
590                    ((2024, 10, 27, 3, 0, 0, 0), unambiguous(0)),
591                ],
592            ),
593            (
594                "America/St_Johns",
595                &[
596                    (
597                        (1969, 12, 31, 20, 30, 0, 0),
598                        o_unambiguous(-Offset::hms(3, 30, 0)),
599                    ),
600                    (
601                        (2024, 3, 10, 1, 59, 59, 999_999_999),
602                        o_unambiguous(-Offset::hms(3, 30, 0)),
603                    ),
604                    (
605                        (2024, 3, 10, 2, 0, 0, 0),
606                        o_gap(-Offset::hms(3, 30, 0), -Offset::hms(2, 30, 0)),
607                    ),
608                    (
609                        (2024, 3, 10, 2, 59, 59, 999_999_999),
610                        o_gap(-Offset::hms(3, 30, 0), -Offset::hms(2, 30, 0)),
611                    ),
612                    (
613                        (2024, 3, 10, 3, 0, 0, 0),
614                        o_unambiguous(-Offset::hms(2, 30, 0)),
615                    ),
616                    (
617                        (2024, 11, 3, 0, 59, 59, 999_999_999),
618                        o_unambiguous(-Offset::hms(2, 30, 0)),
619                    ),
620                    (
621                        (2024, 11, 3, 1, 0, 0, 0),
622                        o_fold(-Offset::hms(2, 30, 0), -Offset::hms(3, 30, 0)),
623                    ),
624                    (
625                        (2024, 11, 3, 1, 59, 59, 999_999_999),
626                        o_fold(-Offset::hms(2, 30, 0), -Offset::hms(3, 30, 0)),
627                    ),
628                    (
629                        (2024, 11, 3, 2, 0, 0, 0),
630                        o_unambiguous(-Offset::hms(3, 30, 0)),
631                    ),
632                ],
633            ),
634            // This time zone has an interesting transition where it jumps
635            // backwards a full day at 1867-10-19T15:30:00.
636            (
637                "America/Sitka",
638                &[
639                    ((1969, 12, 31, 16, 0, 0, 0), unambiguous(-8)),
640                    (
641                        (-9999, 1, 2, 16, 58, 46, 0),
642                        o_unambiguous(Offset::hms(14, 58, 47)),
643                    ),
644                    (
645                        (1867, 10, 18, 15, 29, 59, 0),
646                        o_unambiguous(Offset::hms(14, 58, 47)),
647                    ),
648                    (
649                        (1867, 10, 18, 15, 30, 0, 0),
650                        // A fold of 24 hours!!!
651                        o_fold(
652                            Offset::hms(14, 58, 47),
653                            -Offset::hms(9, 1, 13),
654                        ),
655                    ),
656                    (
657                        (1867, 10, 19, 15, 29, 59, 999_999_999),
658                        // Still in the fold...
659                        o_fold(
660                            Offset::hms(14, 58, 47),
661                            -Offset::hms(9, 1, 13),
662                        ),
663                    ),
664                    (
665                        (1867, 10, 19, 15, 30, 0, 0),
666                        // Finally out.
667                        o_unambiguous(-Offset::hms(9, 1, 13)),
668                    ),
669                ],
670            ),
671            // As with to_datetime, we test every possible transition
672            // point here since this time zone has a small number of them.
673            (
674                "Pacific/Honolulu",
675                &[
676                    (
677                        (1896, 1, 13, 11, 59, 59, 0),
678                        o_unambiguous(-Offset::hms(10, 31, 26)),
679                    ),
680                    (
681                        (1896, 1, 13, 12, 0, 0, 0),
682                        o_gap(
683                            -Offset::hms(10, 31, 26),
684                            -Offset::hms(10, 30, 0),
685                        ),
686                    ),
687                    (
688                        (1896, 1, 13, 12, 1, 25, 0),
689                        o_gap(
690                            -Offset::hms(10, 31, 26),
691                            -Offset::hms(10, 30, 0),
692                        ),
693                    ),
694                    (
695                        (1896, 1, 13, 12, 1, 26, 0),
696                        o_unambiguous(-Offset::hms(10, 30, 0)),
697                    ),
698                    (
699                        (1933, 4, 30, 1, 59, 59, 0),
700                        o_unambiguous(-Offset::hms(10, 30, 0)),
701                    ),
702                    (
703                        (1933, 4, 30, 2, 0, 0, 0),
704                        o_gap(-Offset::hms(10, 30, 0), -Offset::hms(9, 30, 0)),
705                    ),
706                    (
707                        (1933, 4, 30, 2, 59, 59, 0),
708                        o_gap(-Offset::hms(10, 30, 0), -Offset::hms(9, 30, 0)),
709                    ),
710                    (
711                        (1933, 4, 30, 3, 0, 0, 0),
712                        o_unambiguous(-Offset::hms(9, 30, 0)),
713                    ),
714                    (
715                        (1933, 5, 21, 10, 59, 59, 0),
716                        o_unambiguous(-Offset::hms(9, 30, 0)),
717                    ),
718                    (
719                        (1933, 5, 21, 11, 0, 0, 0),
720                        o_fold(
721                            -Offset::hms(9, 30, 0),
722                            -Offset::hms(10, 30, 0),
723                        ),
724                    ),
725                    (
726                        (1933, 5, 21, 11, 59, 59, 0),
727                        o_fold(
728                            -Offset::hms(9, 30, 0),
729                            -Offset::hms(10, 30, 0),
730                        ),
731                    ),
732                    (
733                        (1933, 5, 21, 12, 0, 0, 0),
734                        o_unambiguous(-Offset::hms(10, 30, 0)),
735                    ),
736                    (
737                        (1942, 2, 9, 1, 59, 59, 0),
738                        o_unambiguous(-Offset::hms(10, 30, 0)),
739                    ),
740                    (
741                        (1942, 2, 9, 2, 0, 0, 0),
742                        o_gap(-Offset::hms(10, 30, 0), -Offset::hms(9, 30, 0)),
743                    ),
744                    (
745                        (1942, 2, 9, 2, 59, 59, 0),
746                        o_gap(-Offset::hms(10, 30, 0), -Offset::hms(9, 30, 0)),
747                    ),
748                    (
749                        (1942, 2, 9, 3, 0, 0, 0),
750                        o_unambiguous(-Offset::hms(9, 30, 0)),
751                    ),
752                    (
753                        (1945, 8, 14, 13, 29, 59, 0),
754                        o_unambiguous(-Offset::hms(9, 30, 0)),
755                    ),
756                    (
757                        (1945, 8, 14, 13, 30, 0, 0),
758                        o_unambiguous(-Offset::hms(9, 30, 0)),
759                    ),
760                    (
761                        (1945, 8, 14, 13, 30, 1, 0),
762                        o_unambiguous(-Offset::hms(9, 30, 0)),
763                    ),
764                    (
765                        (1945, 9, 30, 0, 59, 59, 0),
766                        o_unambiguous(-Offset::hms(9, 30, 0)),
767                    ),
768                    (
769                        (1945, 9, 30, 1, 0, 0, 0),
770                        o_fold(
771                            -Offset::hms(9, 30, 0),
772                            -Offset::hms(10, 30, 0),
773                        ),
774                    ),
775                    (
776                        (1945, 9, 30, 1, 59, 59, 0),
777                        o_fold(
778                            -Offset::hms(9, 30, 0),
779                            -Offset::hms(10, 30, 0),
780                        ),
781                    ),
782                    (
783                        (1945, 9, 30, 2, 0, 0, 0),
784                        o_unambiguous(-Offset::hms(10, 30, 0)),
785                    ),
786                    (
787                        (1947, 6, 8, 1, 59, 59, 0),
788                        o_unambiguous(-Offset::hms(10, 30, 0)),
789                    ),
790                    (
791                        (1947, 6, 8, 2, 0, 0, 0),
792                        o_gap(-Offset::hms(10, 30, 0), -offset(10)),
793                    ),
794                    (
795                        (1947, 6, 8, 2, 29, 59, 0),
796                        o_gap(-Offset::hms(10, 30, 0), -offset(10)),
797                    ),
798                    ((1947, 6, 8, 2, 30, 0, 0), unambiguous(-10)),
799                ],
800            ),
801        ];
802        let db = ConcatenatedTzif::open(ANDROID_CONCATENATED_TZIF).unwrap();
803        let (mut buf1, mut buf2) = (alloc::vec![], alloc::vec![]);
804        for &(tzname, datetimes_to_ambiguous) in tests {
805            let tz = db.get(tzname, &mut buf1, &mut buf2).unwrap().unwrap();
806            for &(datetime, ambiguous_kind) in datetimes_to_ambiguous {
807                let (year, month, day, hour, min, sec, nano) = datetime;
808                let dt = date(year, month, day).at(hour, min, sec, nano);
809                let got = tz.to_ambiguous_zoned(dt);
810                assert_eq!(
811                    got.offset(),
812                    ambiguous_kind,
813                    "\nTZ: {tzname}\ndatetime: \
814                     {year:04}-{month:02}-{day:02}T\
815                     {hour:02}:{min:02}:{sec:02}.{nano:09}",
816                );
817            }
818        }
819    }
820
821    // Copied from src/tz/mod.rs.
822    #[test]
823    fn time_zone_tzif_to_datetime() {
824        let o = |hours| offset(hours);
825        let tests: &[(&str, &[_])] = &[
826            (
827                "America/New_York",
828                &[
829                    ((0, 0), o(-5), "EST", (1969, 12, 31, 19, 0, 0, 0)),
830                    (
831                        (1710052200, 0),
832                        o(-5),
833                        "EST",
834                        (2024, 3, 10, 1, 30, 0, 0),
835                    ),
836                    (
837                        (1710053999, 999_999_999),
838                        o(-5),
839                        "EST",
840                        (2024, 3, 10, 1, 59, 59, 999_999_999),
841                    ),
842                    ((1710054000, 0), o(-4), "EDT", (2024, 3, 10, 3, 0, 0, 0)),
843                    (
844                        (1710055800, 0),
845                        o(-4),
846                        "EDT",
847                        (2024, 3, 10, 3, 30, 0, 0),
848                    ),
849                    ((1730610000, 0), o(-4), "EDT", (2024, 11, 3, 1, 0, 0, 0)),
850                    (
851                        (1730611800, 0),
852                        o(-4),
853                        "EDT",
854                        (2024, 11, 3, 1, 30, 0, 0),
855                    ),
856                    (
857                        (1730613599, 999_999_999),
858                        o(-4),
859                        "EDT",
860                        (2024, 11, 3, 1, 59, 59, 999_999_999),
861                    ),
862                    ((1730613600, 0), o(-5), "EST", (2024, 11, 3, 1, 0, 0, 0)),
863                    (
864                        (1730615400, 0),
865                        o(-5),
866                        "EST",
867                        (2024, 11, 3, 1, 30, 0, 0),
868                    ),
869                ],
870            ),
871            (
872                "Australia/Tasmania",
873                &[
874                    ((0, 0), o(11), "AEDT", (1970, 1, 1, 11, 0, 0, 0)),
875                    (
876                        (1728142200, 0),
877                        o(10),
878                        "AEST",
879                        (2024, 10, 6, 1, 30, 0, 0),
880                    ),
881                    (
882                        (1728143999, 999_999_999),
883                        o(10),
884                        "AEST",
885                        (2024, 10, 6, 1, 59, 59, 999_999_999),
886                    ),
887                    (
888                        (1728144000, 0),
889                        o(11),
890                        "AEDT",
891                        (2024, 10, 6, 3, 0, 0, 0),
892                    ),
893                    (
894                        (1728145800, 0),
895                        o(11),
896                        "AEDT",
897                        (2024, 10, 6, 3, 30, 0, 0),
898                    ),
899                    ((1712415600, 0), o(11), "AEDT", (2024, 4, 7, 2, 0, 0, 0)),
900                    (
901                        (1712417400, 0),
902                        o(11),
903                        "AEDT",
904                        (2024, 4, 7, 2, 30, 0, 0),
905                    ),
906                    (
907                        (1712419199, 999_999_999),
908                        o(11),
909                        "AEDT",
910                        (2024, 4, 7, 2, 59, 59, 999_999_999),
911                    ),
912                    ((1712419200, 0), o(10), "AEST", (2024, 4, 7, 2, 0, 0, 0)),
913                    (
914                        (1712421000, 0),
915                        o(10),
916                        "AEST",
917                        (2024, 4, 7, 2, 30, 0, 0),
918                    ),
919                ],
920            ),
921            // Pacific/Honolulu is small eough that we just test every
922            // possible instant before, at and after each transition.
923            (
924                "Pacific/Honolulu",
925                &[
926                    (
927                        (-2334101315, 0),
928                        -Offset::hms(10, 31, 26),
929                        "LMT",
930                        (1896, 1, 13, 11, 59, 59, 0),
931                    ),
932                    (
933                        (-2334101314, 0),
934                        -Offset::hms(10, 30, 0),
935                        "HST",
936                        (1896, 1, 13, 12, 1, 26, 0),
937                    ),
938                    (
939                        (-2334101313, 0),
940                        -Offset::hms(10, 30, 0),
941                        "HST",
942                        (1896, 1, 13, 12, 1, 27, 0),
943                    ),
944                    (
945                        (-1157283001, 0),
946                        -Offset::hms(10, 30, 0),
947                        "HST",
948                        (1933, 4, 30, 1, 59, 59, 0),
949                    ),
950                    (
951                        (-1157283000, 0),
952                        -Offset::hms(9, 30, 0),
953                        "HDT",
954                        (1933, 4, 30, 3, 0, 0, 0),
955                    ),
956                    (
957                        (-1157282999, 0),
958                        -Offset::hms(9, 30, 0),
959                        "HDT",
960                        (1933, 4, 30, 3, 0, 1, 0),
961                    ),
962                    (
963                        (-1155436201, 0),
964                        -Offset::hms(9, 30, 0),
965                        "HDT",
966                        (1933, 5, 21, 11, 59, 59, 0),
967                    ),
968                    (
969                        (-1155436200, 0),
970                        -Offset::hms(10, 30, 0),
971                        "HST",
972                        (1933, 5, 21, 11, 0, 0, 0),
973                    ),
974                    (
975                        (-1155436199, 0),
976                        -Offset::hms(10, 30, 0),
977                        "HST",
978                        (1933, 5, 21, 11, 0, 1, 0),
979                    ),
980                    (
981                        (-880198201, 0),
982                        -Offset::hms(10, 30, 0),
983                        "HST",
984                        (1942, 2, 9, 1, 59, 59, 0),
985                    ),
986                    (
987                        (-880198200, 0),
988                        -Offset::hms(9, 30, 0),
989                        "HWT",
990                        (1942, 2, 9, 3, 0, 0, 0),
991                    ),
992                    (
993                        (-880198199, 0),
994                        -Offset::hms(9, 30, 0),
995                        "HWT",
996                        (1942, 2, 9, 3, 0, 1, 0),
997                    ),
998                    (
999                        (-769395601, 0),
1000                        -Offset::hms(9, 30, 0),
1001                        "HWT",
1002                        (1945, 8, 14, 13, 29, 59, 0),
1003                    ),
1004                    (
1005                        (-769395600, 0),
1006                        -Offset::hms(9, 30, 0),
1007                        "HPT",
1008                        (1945, 8, 14, 13, 30, 0, 0),
1009                    ),
1010                    (
1011                        (-769395599, 0),
1012                        -Offset::hms(9, 30, 0),
1013                        "HPT",
1014                        (1945, 8, 14, 13, 30, 1, 0),
1015                    ),
1016                    (
1017                        (-765376201, 0),
1018                        -Offset::hms(9, 30, 0),
1019                        "HPT",
1020                        (1945, 9, 30, 1, 59, 59, 0),
1021                    ),
1022                    (
1023                        (-765376200, 0),
1024                        -Offset::hms(10, 30, 0),
1025                        "HST",
1026                        (1945, 9, 30, 1, 0, 0, 0),
1027                    ),
1028                    (
1029                        (-765376199, 0),
1030                        -Offset::hms(10, 30, 0),
1031                        "HST",
1032                        (1945, 9, 30, 1, 0, 1, 0),
1033                    ),
1034                    (
1035                        (-712150201, 0),
1036                        -Offset::hms(10, 30, 0),
1037                        "HST",
1038                        (1947, 6, 8, 1, 59, 59, 0),
1039                    ),
1040                    // At this point, we hit the last transition and the POSIX
1041                    // TZ string takes over.
1042                    (
1043                        (-712150200, 0),
1044                        -Offset::hms(10, 0, 0),
1045                        "HST",
1046                        (1947, 6, 8, 2, 30, 0, 0),
1047                    ),
1048                    (
1049                        (-712150199, 0),
1050                        -Offset::hms(10, 0, 0),
1051                        "HST",
1052                        (1947, 6, 8, 2, 30, 1, 0),
1053                    ),
1054                ],
1055            ),
1056            // This time zone has an interesting transition where it jumps
1057            // backwards a full day at 1867-10-19T15:30:00.
1058            (
1059                "America/Sitka",
1060                &[
1061                    ((0, 0), o(-8), "PST", (1969, 12, 31, 16, 0, 0, 0)),
1062                    (
1063                        (-377705023201, 0),
1064                        Offset::hms(14, 58, 47),
1065                        "LMT",
1066                        (-9999, 1, 2, 16, 58, 46, 0),
1067                    ),
1068                    (
1069                        (-3225223728, 0),
1070                        Offset::hms(14, 58, 47),
1071                        "LMT",
1072                        (1867, 10, 19, 15, 29, 59, 0),
1073                    ),
1074                    // Notice the 24 hour time jump backwards a whole day!
1075                    (
1076                        (-3225223727, 0),
1077                        -Offset::hms(9, 1, 13),
1078                        "LMT",
1079                        (1867, 10, 18, 15, 30, 0, 0),
1080                    ),
1081                    (
1082                        (-3225223726, 0),
1083                        -Offset::hms(9, 1, 13),
1084                        "LMT",
1085                        (1867, 10, 18, 15, 30, 1, 0),
1086                    ),
1087                ],
1088            ),
1089        ];
1090        let db = ConcatenatedTzif::open(ANDROID_CONCATENATED_TZIF).unwrap();
1091        let (mut buf1, mut buf2) = (alloc::vec![], alloc::vec![]);
1092        for &(tzname, timestamps_to_datetimes) in tests {
1093            let tz = db.get(tzname, &mut buf1, &mut buf2).unwrap().unwrap();
1094            for &((unix_sec, unix_nano), offset, abbrev, datetime) in
1095                timestamps_to_datetimes
1096            {
1097                let (year, month, day, hour, min, sec, nano) = datetime;
1098                let timestamp = Timestamp::new(unix_sec, unix_nano).unwrap();
1099                let info = tz.to_offset_info(timestamp);
1100                assert_eq!(
1101                    info.offset(),
1102                    offset,
1103                    "\nTZ={tzname}, timestamp({unix_sec}, {unix_nano})",
1104                );
1105                assert_eq!(
1106                    info.abbreviation(),
1107                    abbrev,
1108                    "\nTZ={tzname}, timestamp({unix_sec}, {unix_nano})",
1109                );
1110                assert_eq!(
1111                    info.offset().to_datetime(timestamp),
1112                    date(year, month, day).at(hour, min, sec, nano),
1113                    "\nTZ={tzname}, timestamp({unix_sec}, {unix_nano})",
1114                );
1115            }
1116        }
1117    }
1118
1119    #[test]
1120    #[cfg(not(miri))]
1121    fn read_all_time_zones() {
1122        let db = ConcatenatedTzif::open(ANDROID_CONCATENATED_TZIF).unwrap();
1123        let available = db.available(&mut alloc::vec![]).unwrap();
1124        let (mut buf1, mut buf2) = (alloc::vec![], alloc::vec![]);
1125        for tzname in available.iter() {
1126            let tz = db.get(tzname, &mut buf1, &mut buf2).unwrap().unwrap();
1127            assert_eq!(tzname, tz.iana_name().unwrap());
1128        }
1129    }
1130
1131    #[test]
1132    fn available_len() {
1133        let db = ConcatenatedTzif::open(ANDROID_CONCATENATED_TZIF).unwrap();
1134        let available = db.available(&mut alloc::vec![]).unwrap();
1135        assert_eq!(596, available.len());
1136        for window in available.windows(2) {
1137            let (x1, x2) = (&window[0], &window[1]);
1138            assert!(x1 < x2, "{x1} is not less than {x2}");
1139        }
1140    }
1141}
jiff/tz/concatenated.rs

jiff/tz/
concatenated.rs