jiff/tz/concatenated.rs
1use alloc::{
2 string::{String, ToString},
3 vec::Vec,
4};
5
6use crate::{
7 error::{err, Error, ErrorContext},
8 tz::TimeZone,
9 util::{array_str::ArrayStr, escape, utf8},
10};
11
12/// An abstraction for reading data from Android's concatenated TZif data file.
13///
14/// This abstraction is designed in a way that the data is reads from is
15/// largely untrusted. This means that, no matter what sequence of bytes is
16/// given, this should never panic (or else there is a bug). Moreover, there is
17/// some guarding against disproportionate allocation. While big allocations
18/// can still happen, they require a proportionally large data file. (Thus,
19/// callers can guard against this by considering the size of the data.) What
20/// this implementation prevents against is accidentally OOMing or panicking as
21/// a result of naively doing `Vec::with_capacity(rdr.decode_integer())`.
22///
23/// This is also designed to work in alloc-only contexts mostly out of "good
24/// sense." Technically we don't (currently) use this outside of `std`, since
25/// it's only used for reading tzdb on Android from the file system. But we do
26/// things this way in case we end up wanting to use it for something else.
27/// If we needed this for no-alloc environments, then that's a much bigger
28/// change, if only because it would require making the TZif parser no-alloc
29/// compatible, and it's not quite clear what the best way to do that is. We
30/// achieve the alloc-only API be introducing a trait that abstracts over a
31/// `File` for random access to bytes.
32#[derive(Debug)]
33pub(crate) struct ConcatenatedTzif<R> {
34 rdr: R,
35 header: Header,
36}
37
38impl<R: Read> ConcatenatedTzif<R> {
39 /// Open the concatenated TZif file using the reader given.
40 ///
41 /// This reads the header and will return an error if the header is
42 /// invalid.
43 pub(crate) fn open(rdr: R) -> Result<ConcatenatedTzif<R>, Error> {
44 let header = Header::read(&rdr)?;
45 Ok(ConcatenatedTzif { rdr, header })
46 }
47
48 /// Returns the version of this `tzdata` database.
49 pub(crate) fn version(&self) -> ArrayStr<5> {
50 self.header.version
51 }
52
53 /// Returns a `TimeZone` extracted from this concatenated TZif data.
54 ///
55 /// This is only successful if an index entry with the corresponding
56 /// IANA time zone identifier could be found.
57 ///
58 /// Callers must provide two scratch buffers that are used for temporary
59 /// allocation internally. Callers can create a new buffer for each call,
60 /// but it's likely faster to reuse them if possible.
61 ///
62 /// If a `TimeZone` is returned, it is guaranteed to have a present IANA
63 /// name (accessible via `TimeZone::iana_name`).
64 pub(crate) fn get(
65 &self,
66 query: &str,
67 scratch1: &mut Vec<u8>,
68 scratch2: &mut Vec<u8>,
69 ) -> Result<Option<TimeZone>, Error> {
70 scratch1.clear();
71 alloc(scratch1, self.header.index_len())?;
72 self.rdr
73 .read_exact_at(scratch1, self.header.index_offset)
74 .context("failed to read index block")?;
75
76 let mut index = &**scratch1;
77 while !index.is_empty() {
78 let entry = IndexEntry::new(&index[..IndexEntry::LEN]);
79 index = &index[IndexEntry::LEN..];
80 let ordering = utf8::cmp_ignore_ascii_case_bytes(
81 entry.name_bytes(),
82 query.as_bytes(),
83 );
84 if ordering.is_ne() {
85 continue;
86 }
87
88 // OK because `entry.name_bytes()` is equal to `query`,
89 // ignoring ASCII case. The only way this can be true is is
90 // `entry.name_bytes()` is itself valid UTF-8.
91 let name = entry.name().unwrap();
92 scratch2.clear();
93 alloc(scratch2, entry.len())?;
94 let start = self.header.data_offset.saturating_add(entry.start());
95 self.rdr
96 .read_exact_at(scratch2, start)
97 .context("failed to read TZif data block")?;
98 return TimeZone::tzif(name, scratch2).map(Some);
99 }
100 Ok(None)
101 }
102
103 /// Returns a list of all IANA time zone identifiers in this concatenated
104 /// TZif data.
105 ///
106 /// Callers must provide a scratch buffer that is used for temporary
107 /// allocation internally. Callers can create a new buffer for each call,
108 /// but it's likely faster to reuse them if possible.
109 pub(crate) fn available(
110 &self,
111 scratch: &mut Vec<u8>,
112 ) -> Result<Vec<String>, Error> {
113 scratch.clear();
114 alloc(scratch, self.header.index_len())?;
115 self.rdr
116 .read_exact_at(scratch, self.header.index_offset)
117 .context("failed to read index block")?;
118
119 let names_len = self.header.index_len() / IndexEntry::LEN;
120 // Why are we careless with this alloc? Well, its size is proportional
121 // to the actual amount of data in the file. So the only way to get a
122 // big alloc is to create a huge file. This seems... fine... I guess.
123 // Where as the `alloc` above is done on the basis of an arbitrary
124 // 32-bit integer.
125 let mut names = Vec::with_capacity(names_len);
126 let mut index = &**scratch;
127 while !index.is_empty() {
128 let entry = IndexEntry::new(&index[..IndexEntry::LEN]);
129 index = &index[IndexEntry::LEN..];
130 names.push(entry.name()?.to_string());
131 }
132 Ok(names)
133 }
134}
135
136/// The header of Android concatenated TZif data.
137///
138/// The header has the version and some offsets indicating the location of
139/// the index entry (a list of IANA time zone identifiers and offsets into
140/// the data block) and the actual TZif data.
141#[derive(Debug)]
142struct Header {
143 version: ArrayStr<5>,
144 index_offset: u64,
145 data_offset: u64,
146}
147
148impl Header {
149 /// Reads the header from Android's concatenated TZif concatenated data
150 /// file.
151 ///
152 /// Basically, this gives us the version and some offsets for where to find
153 /// data.
154 fn read<R: Read + ?Sized>(rdr: &R) -> Result<Header, Error> {
155 // 12 bytes plus 3 4-byte big endian integers.
156 let mut buf = [0; 12 + 3 * 4];
157 rdr.read_exact_at(&mut buf, 0)
158 .context("failed to read concatenated TZif header")?;
159 if &buf[..6] != b"tzdata" {
160 return Err(err!(
161 "expected first 6 bytes of concatenated TZif header \
162 to be `tzdata`, but found `{found}`",
163 found = escape::Bytes(&buf[..6]),
164 ));
165 }
166 if buf[11] != 0 {
167 return Err(err!(
168 "expected last byte of concatenated TZif header \
169 to be NUL, but found `{found}`",
170 found = escape::Bytes(&buf[..12]),
171 ));
172 }
173
174 let version = {
175 let version = core::str::from_utf8(&buf[6..11]).map_err(|_| {
176 err!(
177 "expected version in concatenated TZif header to \
178 be valid UTF-8, but found `{found}`",
179 found = escape::Bytes(&buf[6..11]),
180 )
181 })?;
182 // OK because `version` is exactly 5 bytes, by construction.
183 ArrayStr::new(version).unwrap()
184 };
185 // OK because the sub-slice is sized to exactly 4 bytes.
186 let index_offset = u64::from(read_be32(&buf[12..16]));
187 // OK because the sub-slice is sized to exactly 4 bytes.
188 let data_offset = u64::from(read_be32(&buf[16..20]));
189 if index_offset > data_offset {
190 return Err(err!(
191 "invalid index ({index_offset}) and data ({data_offset}) \
192 offsets, expected index offset to be less than or equal \
193 to data offset",
194 ));
195 }
196 // we don't read 20..24 since we don't care about zonetab (yet)
197 let header = Header { version, index_offset, data_offset };
198 if header.index_len() % IndexEntry::LEN != 0 {
199 return Err(err!(
200 "length of index block is not a multiple {len}",
201 len = IndexEntry::LEN,
202 ));
203 }
204 Ok(header)
205 }
206
207 /// Returns the length of the index section of the concatenated tzdb.
208 ///
209 /// Beware of using this to create allocations. In theory, this should be
210 /// trusted data, but the length can be any 32-bit integer. If it's used to
211 /// create an allocation, it could potentially be up to 4GB.
212 fn index_len(&self) -> usize {
213 // OK because `Header` parsing returns an error if this overflows.
214 let len = self.data_offset.checked_sub(self.index_offset).unwrap();
215 // N.B. Overflow only occurs here on 16-bit (or smaller) platforms,
216 // which at the time of writing, is not supported by Jiff. Instead,
217 // a `usize::MAX` will trigger an allocation error.
218 usize::try_from(len).unwrap_or(usize::MAX)
219 }
220}
221
222/// A view into a single index entry in the index block of concatenated TZif
223/// data.
224///
225/// If we had safe transmute, it would be much nicer to define this as
226///
227/// ```text
228/// #[derive(Clone, Copy)]
229/// #[repr(transparent, align(1))]
230/// struct IndexEntry {
231/// name: [u8; 40],
232/// start: u32,
233/// len: u32,
234/// _raw_utc_offset: u32, // we don't use this here
235/// }
236/// ```
237///
238/// And probably implement a trait asserting that this is plain old data (or
239/// derive it safely). And then we could cast `&[u8]` to `&[IndexEntry]`
240/// safely and access the individual fields as is. We could do this today,
241/// but not in safe code. And since this isn't performance critical, it's just
242/// not worth flagging this code as potentially containing undefined behavior.
243#[derive(Clone, Copy)]
244struct IndexEntry<'a>(&'a [u8]);
245
246impl<'a> IndexEntry<'a> {
247 /// The length of an index entry. It's fixed size. 40 bytes for the IANA
248 /// time zone identifier. 4 bytes for each of 3 big-endian integers. The
249 /// first is the start of the corresponding TZif data within the data
250 /// block. The second is the length of said TZif data. And the third is
251 /// the "raw UTC offset" of the time zone. (I'm unclear on the semantics
252 /// of this third, since some time zones have more than one because of
253 /// DST. And of course, it can change over time. Since I don't know what
254 /// Android uses this for, I'm not sure how I'm supposed to interpret it.)
255 const LEN: usize = 40 + 3 * 4;
256
257 /// Creates a new view into an entry in the concatenated TZif index.
258 ///
259 /// # Panics
260 ///
261 /// When `slice` does not have the expected length (`IndexEntry::LEN`).
262 fn new(slice: &'a [u8]) -> IndexEntry<'a> {
263 assert_eq!(slice.len(), IndexEntry::LEN, "invalid index entry length");
264 IndexEntry(slice)
265 }
266
267 /// Like `name_bytes`, but as a `&str`.
268 ///
269 /// This returns an error if the name isn't valid UTF-8.
270 fn name(&self) -> Result<&str, Error> {
271 core::str::from_utf8(self.name_bytes()).map_err(|_| {
272 err!(
273 "IANA time zone identifier `{name}` is not valid UTF-8",
274 name = escape::Bytes(self.name_bytes()),
275 )
276 })
277 }
278
279 /// Returns the IANA time zone identifier as a byte slice.
280 ///
281 /// In theory, an empty slice could be returned. But if that happens,
282 /// then there is probably a bug in this code somewhere, the format
283 /// changed or the source data is corrupt somehow.
284 fn name_bytes(&self) -> &'a [u8] {
285 let mut block = &self.0[..40];
286 while block.last().copied() == Some(0) {
287 block = &block[..block.len() - 1];
288 }
289 block
290 }
291
292 /// Returns the starting offset (relative to the beginning of the TZif
293 /// data block) of the corresponding TZif data.
294 fn start(&self) -> u64 {
295 u64::from(read_be32(&self.0[40..44]))
296 }
297
298 /// Returns the length of the TZif data block.
299 ///
300 /// Beware of using this to create allocations. In theory, this should be
301 /// trusted data, but the length can be any 32-bit integer. If it's used to
302 /// create an allocation, it could potentially be up to 4GB.
303 fn len(&self) -> usize {
304 // N.B. Overflow only occurs here on 16-bit (or smaller) platforms,
305 // which at the time of writing, is not supported by Jiff. Instead,
306 // a `usize::MAX` will trigger an allocation error.
307 usize::try_from(read_be32(&self.0[44..48])).unwrap_or(usize::MAX)
308 }
309}
310
311impl<'a> core::fmt::Debug for IndexEntry<'a> {
312 fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
313 f.debug_struct("IndexEntry")
314 .field("name", &escape::Bytes(self.name_bytes()))
315 .field("start", &self.start())
316 .field("len", &self.len())
317 .finish()
318 }
319}
320
321/// A crate-internal trait defining the source of concatenated TZif data.
322///
323/// Basically, this just provides a way to read a fixed amount of data at a
324/// particular offset. This is obviously trivial to implement on `&[u8]` (and
325/// indeed, we do so for testing), but we use it to abstract over platform
326/// differences when reading from a `File`.
327///
328/// The intent is that on Unix, this will use `pread`, which avoids a file
329/// seek followed by a `read` call.
330pub(crate) trait Read {
331 fn read_exact_at(&self, buf: &mut [u8], offset: u64) -> Result<(), Error>;
332}
333
334impl<'a, R: Read + ?Sized> Read for &'a R {
335 fn read_exact_at(&self, buf: &mut [u8], offset: u64) -> Result<(), Error> {
336 (**self).read_exact_at(buf, offset)
337 }
338}
339
340/// Reads a 32-bit big endian encoded integer from `bytes`.
341///
342/// # Panics
343///
344/// If `bytes.len() != 4`.
345fn read_be32(bytes: &[u8]) -> u32 {
346 u32::from_be_bytes(bytes.try_into().expect("slice of length 4"))
347}
348
349#[cfg(test)]
350impl Read for [u8] {
351 fn read_exact_at(&self, buf: &mut [u8], offset: u64) -> Result<(), Error> {
352 let offset = usize::try_from(offset)
353 .map_err(|_| err!("offset `{offset}` overflowed `usize`"))?;
354 let Some(slice) = self.get(offset..) else {
355 return Err(err!(
356 "given offset `{offset}` is not valid \
357 (only {len} bytes are available)",
358 len = self.len(),
359 ));
360 };
361 if buf.len() > slice.len() {
362 return Err(err!(
363 "unexpected EOF, expected {len} bytes but only have {have}",
364 len = buf.len(),
365 have = slice.len()
366 ));
367 }
368 buf.copy_from_slice(&slice[..buf.len()]);
369 Ok(())
370 }
371}
372
373#[cfg(all(feature = "std", unix))]
374impl Read for std::fs::File {
375 fn read_exact_at(&self, buf: &mut [u8], offset: u64) -> Result<(), Error> {
376 use std::os::unix::fs::FileExt;
377 FileExt::read_exact_at(self, buf, offset).map_err(Error::io)
378 }
379}
380
381#[cfg(all(feature = "std", windows))]
382impl Read for std::fs::File {
383 fn read_exact_at(
384 &self,
385 mut buf: &mut [u8],
386 mut offset: u64,
387 ) -> Result<(), Error> {
388 use std::{io, os::windows::fs::FileExt};
389
390 while !buf.is_empty() {
391 match self.seek_read(buf, offset) {
392 Ok(0) => break,
393 Ok(n) => {
394 buf = &mut buf[n..];
395 offset = u64::try_from(n)
396 .ok()
397 .and_then(|n| n.checked_add(offset))
398 .ok_or_else(|| {
399 err!("offset overflow when reading from `File`")
400 })?;
401 }
402 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {}
403 Err(e) => return Err(Error::io(e)),
404 }
405 }
406 if !buf.is_empty() {
407 Err(Error::io(io::Error::new(
408 io::ErrorKind::UnexpectedEof,
409 "failed to fill whole buffer",
410 )))
411 } else {
412 Ok(())
413 }
414 }
415}
416
417#[cfg(all(feature = "std", all(not(unix), not(windows))))]
418impl Read for std::fs::File {
419 fn read_exact_at(&self, buf: &mut [u8], offset: u64) -> Result<(), Error> {
420 use std::io::{Read as _, Seek as _, SeekFrom};
421 let mut file = self;
422 file.seek(SeekFrom::Start(offset)).map_err(Error::io).with_context(
423 || err!("failed to seek to offset {offset} in `File`"),
424 )?;
425 file.read_exact(buf).map_err(Error::io)
426 }
427}
428
429/// Allocates `additional` extra bytes on the `Vec` given and set them to `0`.
430///
431/// This specifically will never do an "OOM panic" and will instead return an
432/// error (courtesy of `Vec::try_reserve_exact`). It will also return an error
433/// without even trying the allocation if it's deemed to be "too big."
434///
435/// This is used so that we are extra careful about creating allocations based
436/// on integers parsed from concatenated TZif data. Generally speaking, the
437/// data we parse should be "trusted" (since it's probably not writable by
438/// anyone other than `root`), but who knows where this code will ultimately be
439/// used. So we try pretty hard to avoid panicking (even for OOM).
440///
441/// To be clear, we probably could panic on the error path. The goal here
442/// isn't to avoid OOM because you can't allocate 10 bytes---Jiff isn't robust
443/// enough in that kind of environment by far. The goal is to avoid OOM for
444/// exorbitantly large allocations through some kind of attack vector.
445fn alloc(bytes: &mut Vec<u8>, additional: usize) -> Result<(), Error> {
446 // At time of writing, the biggest TZif data file is a few KB. And the
447 // index block is tens of KB. So impose a limit that is a couple of orders
448 // of magnitude bigger, but still overall pretty small for... some systems.
449 // Anyway, I welcome improvements to this heuristic!
450 const LIMIT: usize = 10 * 1 << 20;
451
452 if additional > LIMIT {
453 return Err(err!(
454 "attempted to allocate more than {LIMIT} bytes \
455 while reading concatenated TZif data, which \
456 exceeds a heuristic limit to prevent huge allocations \
457 (please file a bug if this error is inappropriate)",
458 ));
459 }
460 bytes.try_reserve_exact(additional).map_err(|_| {
461 err!(
462 "failed to allocation {additional} bytes \
463 for reading concatenated TZif data"
464 )
465 })?;
466 // This... can't actually happen right?
467 let new_len = bytes
468 .len()
469 .checked_add(additional)
470 .ok_or_else(|| err!("total allocation length overflowed `usize`"))?;
471 bytes.resize(new_len, 0);
472 Ok(())
473}
474
475#[cfg(test)]
476mod tests {
477 use crate::{
478 civil::date,
479 tz::{
480 offset, testdata::ANDROID_CONCATENATED_TZIF, AmbiguousOffset,
481 Offset,
482 },
483 Timestamp,
484 };
485
486 use super::*;
487
488 fn unambiguous(offset_hours: i8) -> AmbiguousOffset {
489 let offset = offset(offset_hours);
490 o_unambiguous(offset)
491 }
492
493 fn gap(
494 earlier_offset_hours: i8,
495 later_offset_hours: i8,
496 ) -> AmbiguousOffset {
497 let earlier = offset(earlier_offset_hours);
498 let later = offset(later_offset_hours);
499 o_gap(earlier, later)
500 }
501
502 fn fold(
503 earlier_offset_hours: i8,
504 later_offset_hours: i8,
505 ) -> AmbiguousOffset {
506 let earlier = offset(earlier_offset_hours);
507 let later = offset(later_offset_hours);
508 o_fold(earlier, later)
509 }
510
511 fn o_unambiguous(offset: Offset) -> AmbiguousOffset {
512 AmbiguousOffset::Unambiguous { offset }
513 }
514
515 fn o_gap(earlier: Offset, later: Offset) -> AmbiguousOffset {
516 AmbiguousOffset::Gap { before: earlier, after: later }
517 }
518
519 fn o_fold(earlier: Offset, later: Offset) -> AmbiguousOffset {
520 AmbiguousOffset::Fold { before: earlier, after: later }
521 }
522
523 // Copied from src/tz/mod.rs.
524 #[test]
525 fn time_zone_tzif_to_ambiguous_timestamp() {
526 let tests: &[(&str, &[_])] = &[
527 (
528 "America/New_York",
529 &[
530 ((1969, 12, 31, 19, 0, 0, 0), unambiguous(-5)),
531 ((2024, 3, 10, 1, 59, 59, 999_999_999), unambiguous(-5)),
532 ((2024, 3, 10, 2, 0, 0, 0), gap(-5, -4)),
533 ((2024, 3, 10, 2, 59, 59, 999_999_999), gap(-5, -4)),
534 ((2024, 3, 10, 3, 0, 0, 0), unambiguous(-4)),
535 ((2024, 11, 3, 0, 59, 59, 999_999_999), unambiguous(-4)),
536 ((2024, 11, 3, 1, 0, 0, 0), fold(-4, -5)),
537 ((2024, 11, 3, 1, 59, 59, 999_999_999), fold(-4, -5)),
538 ((2024, 11, 3, 2, 0, 0, 0), unambiguous(-5)),
539 ],
540 ),
541 (
542 "Europe/Dublin",
543 &[
544 ((1970, 1, 1, 0, 0, 0, 0), unambiguous(1)),
545 ((2024, 3, 31, 0, 59, 59, 999_999_999), unambiguous(0)),
546 ((2024, 3, 31, 1, 0, 0, 0), gap(0, 1)),
547 ((2024, 3, 31, 1, 59, 59, 999_999_999), gap(0, 1)),
548 ((2024, 3, 31, 2, 0, 0, 0), unambiguous(1)),
549 ((2024, 10, 27, 0, 59, 59, 999_999_999), unambiguous(1)),
550 ((2024, 10, 27, 1, 0, 0, 0), fold(1, 0)),
551 ((2024, 10, 27, 1, 59, 59, 999_999_999), fold(1, 0)),
552 ((2024, 10, 27, 2, 0, 0, 0), unambiguous(0)),
553 ],
554 ),
555 (
556 "Australia/Tasmania",
557 &[
558 ((1970, 1, 1, 11, 0, 0, 0), unambiguous(11)),
559 ((2024, 4, 7, 1, 59, 59, 999_999_999), unambiguous(11)),
560 ((2024, 4, 7, 2, 0, 0, 0), fold(11, 10)),
561 ((2024, 4, 7, 2, 59, 59, 999_999_999), fold(11, 10)),
562 ((2024, 4, 7, 3, 0, 0, 0), unambiguous(10)),
563 ((2024, 10, 6, 1, 59, 59, 999_999_999), unambiguous(10)),
564 ((2024, 10, 6, 2, 0, 0, 0), gap(10, 11)),
565 ((2024, 10, 6, 2, 59, 59, 999_999_999), gap(10, 11)),
566 ((2024, 10, 6, 3, 0, 0, 0), unambiguous(11)),
567 ],
568 ),
569 (
570 "Antarctica/Troll",
571 &[
572 ((1970, 1, 1, 0, 0, 0, 0), unambiguous(0)),
573 // test the gap
574 ((2024, 3, 31, 0, 59, 59, 999_999_999), unambiguous(0)),
575 ((2024, 3, 31, 1, 0, 0, 0), gap(0, 2)),
576 ((2024, 3, 31, 1, 59, 59, 999_999_999), gap(0, 2)),
577 // still in the gap!
578 ((2024, 3, 31, 2, 0, 0, 0), gap(0, 2)),
579 ((2024, 3, 31, 2, 59, 59, 999_999_999), gap(0, 2)),
580 // finally out
581 ((2024, 3, 31, 3, 0, 0, 0), unambiguous(2)),
582 // test the fold
583 ((2024, 10, 27, 0, 59, 59, 999_999_999), unambiguous(2)),
584 ((2024, 10, 27, 1, 0, 0, 0), fold(2, 0)),
585 ((2024, 10, 27, 1, 59, 59, 999_999_999), fold(2, 0)),
586 // still in the fold!
587 ((2024, 10, 27, 2, 0, 0, 0), fold(2, 0)),
588 ((2024, 10, 27, 2, 59, 59, 999_999_999), fold(2, 0)),
589 // finally out
590 ((2024, 10, 27, 3, 0, 0, 0), unambiguous(0)),
591 ],
592 ),
593 (
594 "America/St_Johns",
595 &[
596 (
597 (1969, 12, 31, 20, 30, 0, 0),
598 o_unambiguous(-Offset::hms(3, 30, 0)),
599 ),
600 (
601 (2024, 3, 10, 1, 59, 59, 999_999_999),
602 o_unambiguous(-Offset::hms(3, 30, 0)),
603 ),
604 (
605 (2024, 3, 10, 2, 0, 0, 0),
606 o_gap(-Offset::hms(3, 30, 0), -Offset::hms(2, 30, 0)),
607 ),
608 (
609 (2024, 3, 10, 2, 59, 59, 999_999_999),
610 o_gap(-Offset::hms(3, 30, 0), -Offset::hms(2, 30, 0)),
611 ),
612 (
613 (2024, 3, 10, 3, 0, 0, 0),
614 o_unambiguous(-Offset::hms(2, 30, 0)),
615 ),
616 (
617 (2024, 11, 3, 0, 59, 59, 999_999_999),
618 o_unambiguous(-Offset::hms(2, 30, 0)),
619 ),
620 (
621 (2024, 11, 3, 1, 0, 0, 0),
622 o_fold(-Offset::hms(2, 30, 0), -Offset::hms(3, 30, 0)),
623 ),
624 (
625 (2024, 11, 3, 1, 59, 59, 999_999_999),
626 o_fold(-Offset::hms(2, 30, 0), -Offset::hms(3, 30, 0)),
627 ),
628 (
629 (2024, 11, 3, 2, 0, 0, 0),
630 o_unambiguous(-Offset::hms(3, 30, 0)),
631 ),
632 ],
633 ),
634 // This time zone has an interesting transition where it jumps
635 // backwards a full day at 1867-10-19T15:30:00.
636 (
637 "America/Sitka",
638 &[
639 ((1969, 12, 31, 16, 0, 0, 0), unambiguous(-8)),
640 (
641 (-9999, 1, 2, 16, 58, 46, 0),
642 o_unambiguous(Offset::hms(14, 58, 47)),
643 ),
644 (
645 (1867, 10, 18, 15, 29, 59, 0),
646 o_unambiguous(Offset::hms(14, 58, 47)),
647 ),
648 (
649 (1867, 10, 18, 15, 30, 0, 0),
650 // A fold of 24 hours!!!
651 o_fold(
652 Offset::hms(14, 58, 47),
653 -Offset::hms(9, 1, 13),
654 ),
655 ),
656 (
657 (1867, 10, 19, 15, 29, 59, 999_999_999),
658 // Still in the fold...
659 o_fold(
660 Offset::hms(14, 58, 47),
661 -Offset::hms(9, 1, 13),
662 ),
663 ),
664 (
665 (1867, 10, 19, 15, 30, 0, 0),
666 // Finally out.
667 o_unambiguous(-Offset::hms(9, 1, 13)),
668 ),
669 ],
670 ),
671 // As with to_datetime, we test every possible transition
672 // point here since this time zone has a small number of them.
673 (
674 "Pacific/Honolulu",
675 &[
676 (
677 (1896, 1, 13, 11, 59, 59, 0),
678 o_unambiguous(-Offset::hms(10, 31, 26)),
679 ),
680 (
681 (1896, 1, 13, 12, 0, 0, 0),
682 o_gap(
683 -Offset::hms(10, 31, 26),
684 -Offset::hms(10, 30, 0),
685 ),
686 ),
687 (
688 (1896, 1, 13, 12, 1, 25, 0),
689 o_gap(
690 -Offset::hms(10, 31, 26),
691 -Offset::hms(10, 30, 0),
692 ),
693 ),
694 (
695 (1896, 1, 13, 12, 1, 26, 0),
696 o_unambiguous(-Offset::hms(10, 30, 0)),
697 ),
698 (
699 (1933, 4, 30, 1, 59, 59, 0),
700 o_unambiguous(-Offset::hms(10, 30, 0)),
701 ),
702 (
703 (1933, 4, 30, 2, 0, 0, 0),
704 o_gap(-Offset::hms(10, 30, 0), -Offset::hms(9, 30, 0)),
705 ),
706 (
707 (1933, 4, 30, 2, 59, 59, 0),
708 o_gap(-Offset::hms(10, 30, 0), -Offset::hms(9, 30, 0)),
709 ),
710 (
711 (1933, 4, 30, 3, 0, 0, 0),
712 o_unambiguous(-Offset::hms(9, 30, 0)),
713 ),
714 (
715 (1933, 5, 21, 10, 59, 59, 0),
716 o_unambiguous(-Offset::hms(9, 30, 0)),
717 ),
718 (
719 (1933, 5, 21, 11, 0, 0, 0),
720 o_fold(
721 -Offset::hms(9, 30, 0),
722 -Offset::hms(10, 30, 0),
723 ),
724 ),
725 (
726 (1933, 5, 21, 11, 59, 59, 0),
727 o_fold(
728 -Offset::hms(9, 30, 0),
729 -Offset::hms(10, 30, 0),
730 ),
731 ),
732 (
733 (1933, 5, 21, 12, 0, 0, 0),
734 o_unambiguous(-Offset::hms(10, 30, 0)),
735 ),
736 (
737 (1942, 2, 9, 1, 59, 59, 0),
738 o_unambiguous(-Offset::hms(10, 30, 0)),
739 ),
740 (
741 (1942, 2, 9, 2, 0, 0, 0),
742 o_gap(-Offset::hms(10, 30, 0), -Offset::hms(9, 30, 0)),
743 ),
744 (
745 (1942, 2, 9, 2, 59, 59, 0),
746 o_gap(-Offset::hms(10, 30, 0), -Offset::hms(9, 30, 0)),
747 ),
748 (
749 (1942, 2, 9, 3, 0, 0, 0),
750 o_unambiguous(-Offset::hms(9, 30, 0)),
751 ),
752 (
753 (1945, 8, 14, 13, 29, 59, 0),
754 o_unambiguous(-Offset::hms(9, 30, 0)),
755 ),
756 (
757 (1945, 8, 14, 13, 30, 0, 0),
758 o_unambiguous(-Offset::hms(9, 30, 0)),
759 ),
760 (
761 (1945, 8, 14, 13, 30, 1, 0),
762 o_unambiguous(-Offset::hms(9, 30, 0)),
763 ),
764 (
765 (1945, 9, 30, 0, 59, 59, 0),
766 o_unambiguous(-Offset::hms(9, 30, 0)),
767 ),
768 (
769 (1945, 9, 30, 1, 0, 0, 0),
770 o_fold(
771 -Offset::hms(9, 30, 0),
772 -Offset::hms(10, 30, 0),
773 ),
774 ),
775 (
776 (1945, 9, 30, 1, 59, 59, 0),
777 o_fold(
778 -Offset::hms(9, 30, 0),
779 -Offset::hms(10, 30, 0),
780 ),
781 ),
782 (
783 (1945, 9, 30, 2, 0, 0, 0),
784 o_unambiguous(-Offset::hms(10, 30, 0)),
785 ),
786 (
787 (1947, 6, 8, 1, 59, 59, 0),
788 o_unambiguous(-Offset::hms(10, 30, 0)),
789 ),
790 (
791 (1947, 6, 8, 2, 0, 0, 0),
792 o_gap(-Offset::hms(10, 30, 0), -offset(10)),
793 ),
794 (
795 (1947, 6, 8, 2, 29, 59, 0),
796 o_gap(-Offset::hms(10, 30, 0), -offset(10)),
797 ),
798 ((1947, 6, 8, 2, 30, 0, 0), unambiguous(-10)),
799 ],
800 ),
801 ];
802 let db = ConcatenatedTzif::open(ANDROID_CONCATENATED_TZIF).unwrap();
803 let (mut buf1, mut buf2) = (alloc::vec![], alloc::vec![]);
804 for &(tzname, datetimes_to_ambiguous) in tests {
805 let tz = db.get(tzname, &mut buf1, &mut buf2).unwrap().unwrap();
806 for &(datetime, ambiguous_kind) in datetimes_to_ambiguous {
807 let (year, month, day, hour, min, sec, nano) = datetime;
808 let dt = date(year, month, day).at(hour, min, sec, nano);
809 let got = tz.to_ambiguous_zoned(dt);
810 assert_eq!(
811 got.offset(),
812 ambiguous_kind,
813 "\nTZ: {tzname}\ndatetime: \
814 {year:04}-{month:02}-{day:02}T\
815 {hour:02}:{min:02}:{sec:02}.{nano:09}",
816 );
817 }
818 }
819 }
820
821 // Copied from src/tz/mod.rs.
822 #[test]
823 fn time_zone_tzif_to_datetime() {
824 let o = |hours| offset(hours);
825 let tests: &[(&str, &[_])] = &[
826 (
827 "America/New_York",
828 &[
829 ((0, 0), o(-5), "EST", (1969, 12, 31, 19, 0, 0, 0)),
830 (
831 (1710052200, 0),
832 o(-5),
833 "EST",
834 (2024, 3, 10, 1, 30, 0, 0),
835 ),
836 (
837 (1710053999, 999_999_999),
838 o(-5),
839 "EST",
840 (2024, 3, 10, 1, 59, 59, 999_999_999),
841 ),
842 ((1710054000, 0), o(-4), "EDT", (2024, 3, 10, 3, 0, 0, 0)),
843 (
844 (1710055800, 0),
845 o(-4),
846 "EDT",
847 (2024, 3, 10, 3, 30, 0, 0),
848 ),
849 ((1730610000, 0), o(-4), "EDT", (2024, 11, 3, 1, 0, 0, 0)),
850 (
851 (1730611800, 0),
852 o(-4),
853 "EDT",
854 (2024, 11, 3, 1, 30, 0, 0),
855 ),
856 (
857 (1730613599, 999_999_999),
858 o(-4),
859 "EDT",
860 (2024, 11, 3, 1, 59, 59, 999_999_999),
861 ),
862 ((1730613600, 0), o(-5), "EST", (2024, 11, 3, 1, 0, 0, 0)),
863 (
864 (1730615400, 0),
865 o(-5),
866 "EST",
867 (2024, 11, 3, 1, 30, 0, 0),
868 ),
869 ],
870 ),
871 (
872 "Australia/Tasmania",
873 &[
874 ((0, 0), o(11), "AEDT", (1970, 1, 1, 11, 0, 0, 0)),
875 (
876 (1728142200, 0),
877 o(10),
878 "AEST",
879 (2024, 10, 6, 1, 30, 0, 0),
880 ),
881 (
882 (1728143999, 999_999_999),
883 o(10),
884 "AEST",
885 (2024, 10, 6, 1, 59, 59, 999_999_999),
886 ),
887 (
888 (1728144000, 0),
889 o(11),
890 "AEDT",
891 (2024, 10, 6, 3, 0, 0, 0),
892 ),
893 (
894 (1728145800, 0),
895 o(11),
896 "AEDT",
897 (2024, 10, 6, 3, 30, 0, 0),
898 ),
899 ((1712415600, 0), o(11), "AEDT", (2024, 4, 7, 2, 0, 0, 0)),
900 (
901 (1712417400, 0),
902 o(11),
903 "AEDT",
904 (2024, 4, 7, 2, 30, 0, 0),
905 ),
906 (
907 (1712419199, 999_999_999),
908 o(11),
909 "AEDT",
910 (2024, 4, 7, 2, 59, 59, 999_999_999),
911 ),
912 ((1712419200, 0), o(10), "AEST", (2024, 4, 7, 2, 0, 0, 0)),
913 (
914 (1712421000, 0),
915 o(10),
916 "AEST",
917 (2024, 4, 7, 2, 30, 0, 0),
918 ),
919 ],
920 ),
921 // Pacific/Honolulu is small eough that we just test every
922 // possible instant before, at and after each transition.
923 (
924 "Pacific/Honolulu",
925 &[
926 (
927 (-2334101315, 0),
928 -Offset::hms(10, 31, 26),
929 "LMT",
930 (1896, 1, 13, 11, 59, 59, 0),
931 ),
932 (
933 (-2334101314, 0),
934 -Offset::hms(10, 30, 0),
935 "HST",
936 (1896, 1, 13, 12, 1, 26, 0),
937 ),
938 (
939 (-2334101313, 0),
940 -Offset::hms(10, 30, 0),
941 "HST",
942 (1896, 1, 13, 12, 1, 27, 0),
943 ),
944 (
945 (-1157283001, 0),
946 -Offset::hms(10, 30, 0),
947 "HST",
948 (1933, 4, 30, 1, 59, 59, 0),
949 ),
950 (
951 (-1157283000, 0),
952 -Offset::hms(9, 30, 0),
953 "HDT",
954 (1933, 4, 30, 3, 0, 0, 0),
955 ),
956 (
957 (-1157282999, 0),
958 -Offset::hms(9, 30, 0),
959 "HDT",
960 (1933, 4, 30, 3, 0, 1, 0),
961 ),
962 (
963 (-1155436201, 0),
964 -Offset::hms(9, 30, 0),
965 "HDT",
966 (1933, 5, 21, 11, 59, 59, 0),
967 ),
968 (
969 (-1155436200, 0),
970 -Offset::hms(10, 30, 0),
971 "HST",
972 (1933, 5, 21, 11, 0, 0, 0),
973 ),
974 (
975 (-1155436199, 0),
976 -Offset::hms(10, 30, 0),
977 "HST",
978 (1933, 5, 21, 11, 0, 1, 0),
979 ),
980 (
981 (-880198201, 0),
982 -Offset::hms(10, 30, 0),
983 "HST",
984 (1942, 2, 9, 1, 59, 59, 0),
985 ),
986 (
987 (-880198200, 0),
988 -Offset::hms(9, 30, 0),
989 "HWT",
990 (1942, 2, 9, 3, 0, 0, 0),
991 ),
992 (
993 (-880198199, 0),
994 -Offset::hms(9, 30, 0),
995 "HWT",
996 (1942, 2, 9, 3, 0, 1, 0),
997 ),
998 (
999 (-769395601, 0),
1000 -Offset::hms(9, 30, 0),
1001 "HWT",
1002 (1945, 8, 14, 13, 29, 59, 0),
1003 ),
1004 (
1005 (-769395600, 0),
1006 -Offset::hms(9, 30, 0),
1007 "HPT",
1008 (1945, 8, 14, 13, 30, 0, 0),
1009 ),
1010 (
1011 (-769395599, 0),
1012 -Offset::hms(9, 30, 0),
1013 "HPT",
1014 (1945, 8, 14, 13, 30, 1, 0),
1015 ),
1016 (
1017 (-765376201, 0),
1018 -Offset::hms(9, 30, 0),
1019 "HPT",
1020 (1945, 9, 30, 1, 59, 59, 0),
1021 ),
1022 (
1023 (-765376200, 0),
1024 -Offset::hms(10, 30, 0),
1025 "HST",
1026 (1945, 9, 30, 1, 0, 0, 0),
1027 ),
1028 (
1029 (-765376199, 0),
1030 -Offset::hms(10, 30, 0),
1031 "HST",
1032 (1945, 9, 30, 1, 0, 1, 0),
1033 ),
1034 (
1035 (-712150201, 0),
1036 -Offset::hms(10, 30, 0),
1037 "HST",
1038 (1947, 6, 8, 1, 59, 59, 0),
1039 ),
1040 // At this point, we hit the last transition and the POSIX
1041 // TZ string takes over.
1042 (
1043 (-712150200, 0),
1044 -Offset::hms(10, 0, 0),
1045 "HST",
1046 (1947, 6, 8, 2, 30, 0, 0),
1047 ),
1048 (
1049 (-712150199, 0),
1050 -Offset::hms(10, 0, 0),
1051 "HST",
1052 (1947, 6, 8, 2, 30, 1, 0),
1053 ),
1054 ],
1055 ),
1056 // This time zone has an interesting transition where it jumps
1057 // backwards a full day at 1867-10-19T15:30:00.
1058 (
1059 "America/Sitka",
1060 &[
1061 ((0, 0), o(-8), "PST", (1969, 12, 31, 16, 0, 0, 0)),
1062 (
1063 (-377705023201, 0),
1064 Offset::hms(14, 58, 47),
1065 "LMT",
1066 (-9999, 1, 2, 16, 58, 46, 0),
1067 ),
1068 (
1069 (-3225223728, 0),
1070 Offset::hms(14, 58, 47),
1071 "LMT",
1072 (1867, 10, 19, 15, 29, 59, 0),
1073 ),
1074 // Notice the 24 hour time jump backwards a whole day!
1075 (
1076 (-3225223727, 0),
1077 -Offset::hms(9, 1, 13),
1078 "LMT",
1079 (1867, 10, 18, 15, 30, 0, 0),
1080 ),
1081 (
1082 (-3225223726, 0),
1083 -Offset::hms(9, 1, 13),
1084 "LMT",
1085 (1867, 10, 18, 15, 30, 1, 0),
1086 ),
1087 ],
1088 ),
1089 ];
1090 let db = ConcatenatedTzif::open(ANDROID_CONCATENATED_TZIF).unwrap();
1091 let (mut buf1, mut buf2) = (alloc::vec![], alloc::vec![]);
1092 for &(tzname, timestamps_to_datetimes) in tests {
1093 let tz = db.get(tzname, &mut buf1, &mut buf2).unwrap().unwrap();
1094 for &((unix_sec, unix_nano), offset, abbrev, datetime) in
1095 timestamps_to_datetimes
1096 {
1097 let (year, month, day, hour, min, sec, nano) = datetime;
1098 let timestamp = Timestamp::new(unix_sec, unix_nano).unwrap();
1099 let info = tz.to_offset_info(timestamp);
1100 assert_eq!(
1101 info.offset(),
1102 offset,
1103 "\nTZ={tzname}, timestamp({unix_sec}, {unix_nano})",
1104 );
1105 assert_eq!(
1106 info.abbreviation(),
1107 abbrev,
1108 "\nTZ={tzname}, timestamp({unix_sec}, {unix_nano})",
1109 );
1110 assert_eq!(
1111 info.offset().to_datetime(timestamp),
1112 date(year, month, day).at(hour, min, sec, nano),
1113 "\nTZ={tzname}, timestamp({unix_sec}, {unix_nano})",
1114 );
1115 }
1116 }
1117 }
1118
1119 #[test]
1120 #[cfg(not(miri))]
1121 fn read_all_time_zones() {
1122 let db = ConcatenatedTzif::open(ANDROID_CONCATENATED_TZIF).unwrap();
1123 let available = db.available(&mut alloc::vec![]).unwrap();
1124 let (mut buf1, mut buf2) = (alloc::vec![], alloc::vec![]);
1125 for tzname in available.iter() {
1126 let tz = db.get(tzname, &mut buf1, &mut buf2).unwrap().unwrap();
1127 assert_eq!(tzname, tz.iana_name().unwrap());
1128 }
1129 }
1130
1131 #[test]
1132 fn available_len() {
1133 let db = ConcatenatedTzif::open(ANDROID_CONCATENATED_TZIF).unwrap();
1134 let available = db.available(&mut alloc::vec![]).unwrap();
1135 assert_eq!(596, available.len());
1136 for window in available.windows(2) {
1137 let (x1, x2) = (&window[0], &window[1]);
1138 assert!(x1 < x2, "{x1} is not less than {x2}");
1139 }
1140 }
1141}