jiff/tz/db/concatenated/
enabled.rs

1use alloc::{
2    string::{String, ToString},
3    vec,
4    vec::Vec,
5};
6
7use std::{
8    ffi::OsString,
9    fs::File,
10    path::{Path, PathBuf},
11    sync::{Arc, RwLock},
12    time::Duration,
13};
14
15use crate::{
16    error::{err, Error},
17    timestamp::Timestamp,
18    tz::{
19        concatenated::ConcatenatedTzif, db::special_time_zone, TimeZone,
20        TimeZoneNameIter,
21    },
22    util::{self, array_str::ArrayStr, cache::Expiration, utf8},
23};
24
25const DEFAULT_TTL: Duration = Duration::new(5 * 60, 0);
26
27/// The places to look for a concatenated `tzdata` file.
28static TZDATA_LOCATIONS: &[TzdataLocation] = &[
29    TzdataLocation::Env {
30        name: "ANDROID_ROOT",
31        default: "/system",
32        suffix: "usr/share/zoneinfo/tzdata",
33    },
34    TzdataLocation::Env {
35        name: "ANDROID_DATA",
36        default: "/data/misc",
37        suffix: "zoneinfo/current/tzdata",
38    },
39];
40
41pub(crate) struct Database {
42    path: Option<PathBuf>,
43    names: Option<Names>,
44    zones: RwLock<CachedZones>,
45}
46
47impl Database {
48    pub(crate) fn from_env() -> Database {
49        let mut attempted = vec![];
50        for loc in TZDATA_LOCATIONS {
51            let path = loc.to_path_buf();
52            trace!(
53                "opening concatenated tzdata database at {}",
54                path.display()
55            );
56            match Database::from_path(&path) {
57                Ok(db) => return db,
58                Err(_err) => {
59                    trace!("failed opening {}: {_err}", path.display());
60                }
61            }
62            attempted.push(path.to_string_lossy().into_owned());
63        }
64        debug!(
65            "could not find concatenated tzdata database at any of the \
66             following paths: {}",
67            attempted.join(", "),
68        );
69        Database::none()
70    }
71
72    pub(crate) fn from_path(path: &Path) -> Result<Database, Error> {
73        let names = Some(Names::new(path)?);
74        let zones = RwLock::new(CachedZones::new());
75        Ok(Database { path: Some(path.to_path_buf()), names, zones })
76    }
77
78    /// Creates a "dummy" zoneinfo database in which all lookups fail.
79    pub(crate) fn none() -> Database {
80        let path = None;
81        let names = None;
82        let zones = RwLock::new(CachedZones::new());
83        Database { path, names, zones }
84    }
85
86    pub(crate) fn reset(&self) {
87        let mut zones = self.zones.write().unwrap();
88        if let Some(ref names) = self.names {
89            names.reset();
90        }
91        zones.reset();
92    }
93
94    pub(crate) fn get(&self, query: &str) -> Option<TimeZone> {
95        if let Some(tz) = special_time_zone(query) {
96            return Some(tz);
97        }
98        let path = self.path.as_ref()?;
99        // The fast path is when the query matches a pre-existing unexpired
100        // time zone.
101        {
102            let zones = self.zones.read().unwrap();
103            if let Some(czone) = zones.get(query) {
104                if !czone.is_expired() {
105                    trace!(
106                        "for time zone query `{query}`, \
107                         found cached zone `{}` \
108                         (expiration={}, last_modified={:?})",
109                        czone.tz.diagnostic_name(),
110                        czone.expiration,
111                        czone.last_modified,
112                    );
113                    return Some(czone.tz.clone());
114                }
115            }
116        }
117        // At this point, one of three possible cases is true:
118        //
119        // 1. The given query does not match any time zone in this database.
120        // 2. A time zone exists, but isn't cached.
121        // 3. A zime exists and is cached, but needs to be revalidated.
122        //
123        // While (3) is probably the common case since our TTLs are pretty
124        // short, both (2) and (3) require write access. Thus we rule out (1)
125        // before acquiring a write lock on the entire database. Plus, we'll
126        // need the zone info for case (2) and possibly for (3) if cache
127        // revalidation fails.
128        //
129        // I feel kind of bad about all this because it seems to me like there
130        // is too much work being done while holding on to the write lock.
131        // In particular, it seems like bad juju to do any I/O of any kind
132        // while holding any lock at all. I think I could design something
133        // that avoids doing I/O while holding a lock, but it seems a lot more
134        // complicated. (And what happens if the I/O becomes outdated by the
135        // time you acquire the lock?)
136        let mut zones = self.zones.write().unwrap();
137        let ttl = zones.ttl;
138        match zones.get_zone_index(query) {
139            Ok(i) => {
140                let czone = &mut zones.zones[i];
141                if czone.revalidate(path, ttl) {
142                    // Metadata on the file didn't change, so we assume the
143                    // file hasn't either.
144                    return Some(czone.tz.clone());
145                }
146                // Revalidation failed. Re-read the TZif data.
147                let (scratch1, scratch2) = zones.scratch();
148                let czone = match CachedTimeZone::new(
149                    path, query, ttl, scratch1, scratch2,
150                ) {
151                    Ok(Some(czone)) => czone,
152                    Ok(None) => return None,
153                    Err(_err) => {
154                        warn!(
155                            "failed to re-cache time zone {query} \
156                             from {path}: {_err}",
157                            path = path.display(),
158                        );
159                        return None;
160                    }
161                };
162                let tz = czone.tz.clone();
163                zones.zones[i] = czone;
164                Some(tz)
165            }
166            Err(i) => {
167                let (scratch1, scratch2) = zones.scratch();
168                let czone = match CachedTimeZone::new(
169                    path, query, ttl, scratch1, scratch2,
170                ) {
171                    Ok(Some(czone)) => czone,
172                    Ok(None) => return None,
173                    Err(_err) => {
174                        warn!(
175                            "failed to cache time zone {query} \
176                             from {path}: {_err}",
177                            path = path.display(),
178                        );
179                        return None;
180                    }
181                };
182                let tz = czone.tz.clone();
183                zones.zones.insert(i, czone);
184                Some(tz)
185            }
186        }
187    }
188
189    pub(crate) fn available<'d>(&'d self) -> TimeZoneNameIter<'d> {
190        let Some(path) = self.path.as_ref() else {
191            return TimeZoneNameIter::empty();
192        };
193        let Some(names) = self.names.as_ref() else {
194            return TimeZoneNameIter::empty();
195        };
196        TimeZoneNameIter::from_iter(names.available(path).into_iter())
197    }
198
199    pub(crate) fn is_definitively_empty(&self) -> bool {
200        self.names.is_none()
201    }
202}
203
204impl core::fmt::Debug for Database {
205    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
206        write!(f, "Concatenated(")?;
207        if let Some(ref path) = self.path {
208            write!(f, "{}", path.display())?;
209        } else {
210            write!(f, "unavailable")?;
211        }
212        write!(f, ")")
213    }
214}
215
216#[derive(Debug)]
217struct CachedZones {
218    zones: Vec<CachedTimeZone>,
219    ttl: Duration,
220    scratch1: Vec<u8>,
221    scratch2: Vec<u8>,
222}
223
224impl CachedZones {
225    const DEFAULT_TTL: Duration = DEFAULT_TTL;
226
227    fn new() -> CachedZones {
228        CachedZones {
229            zones: vec![],
230            ttl: CachedZones::DEFAULT_TTL,
231            scratch1: vec![],
232            scratch2: vec![],
233        }
234    }
235
236    fn get(&self, query: &str) -> Option<&CachedTimeZone> {
237        self.get_zone_index(query).ok().map(|i| &self.zones[i])
238    }
239
240    fn get_zone_index(&self, query: &str) -> Result<usize, usize> {
241        self.zones.binary_search_by(|zone| {
242            utf8::cmp_ignore_ascii_case(zone.name(), query)
243        })
244    }
245
246    fn reset(&mut self) {
247        self.zones.clear();
248    }
249
250    fn scratch(&mut self) -> (&mut Vec<u8>, &mut Vec<u8>) {
251        (&mut self.scratch1, &mut self.scratch2)
252    }
253}
254
255#[derive(Clone, Debug)]
256struct CachedTimeZone {
257    tz: TimeZone,
258    expiration: Expiration,
259    last_modified: Option<Timestamp>,
260}
261
262impl CachedTimeZone {
263    /// Create a new cached time zone.
264    ///
265    /// `path` should be a concatenated `tzdata` file. `query` is the IANA time
266    /// zone identifier we're looing for. The `ttl` says how long
267    /// the cached time zone should minimally remain fresh for.
268    ///
269    /// The `scratch1` and `scratch2` given are used to help amortize
270    /// allocation when deserializing TZif data from the concatenated `tzdata`
271    /// file.
272    ///
273    /// If no such time zone exists and no other error occurred, then
274    /// `Ok(None)` is returned.
275    fn new(
276        path: &Path,
277        query: &str,
278        ttl: Duration,
279        scratch1: &mut Vec<u8>,
280        scratch2: &mut Vec<u8>,
281    ) -> Result<Option<CachedTimeZone>, Error> {
282        let file = File::open(path).map_err(|e| Error::io(e).path(path))?;
283        let db = ConcatenatedTzif::open(&file)?;
284        let Some(tz) = db.get(query, scratch1, scratch2)? else {
285            return Ok(None);
286        };
287        let last_modified = util::fs::last_modified_from_file(path, &file);
288        let expiration = Expiration::after(ttl);
289        Ok(Some(CachedTimeZone { tz, expiration, last_modified }))
290    }
291
292    /// Returns true if this time zone has gone stale and should, at minimum,
293    /// be revalidated.
294    fn is_expired(&self) -> bool {
295        self.expiration.is_expired()
296    }
297
298    /// Returns the IANA time zone identifier of this cached time zone.
299    fn name(&self) -> &str {
300        // OK because `ConcatenatedTzif` guarantees all `TimeZone` values it
301        // returns have an IANA name.
302        self.tz.iana_name().unwrap()
303    }
304
305    /// Attempts to revalidate this cached time zone.
306    ///
307    /// Upon successful revalidation (that is, the cached time zone is still
308    /// fresh and okay to use), this returns true. Otherwise, the cached time
309    /// zone should be considered stale and must be re-created.
310    ///
311    /// Note that technically another layer of revalidation could be done.
312    /// For example, we could keep a checksum of the TZif data, and only
313    /// consider rebuilding the time zone when the checksum changes. But I
314    /// think the last modified metadata will in practice be good enough, and
315    /// parsing TZif data should be quite fast.
316    ///
317    /// `path` should be a concatenated `tzdata` file.
318    fn revalidate(&mut self, path: &Path, ttl: Duration) -> bool {
319        // If we started with no last modified timestamp, then I guess we
320        // should always fail revalidation? I suppose a case could be made to
321        // do the opposite: always pass revalidation.
322        let Some(old_last_modified) = self.last_modified else {
323            trace!(
324                "revalidation for {name} in {path} failed because \
325                 old last modified time is unavailable",
326                name = self.name(),
327                path = path.display(),
328            );
329            return false;
330        };
331        let Some(new_last_modified) = util::fs::last_modified_from_path(path)
332        else {
333            trace!(
334                "revalidation for {name} in {path} failed because \
335                 new last modified time is unavailable",
336                name = self.name(),
337                path = path.display(),
338            );
339            return false;
340        };
341        // We consider any change to invalidate cache.
342        if old_last_modified != new_last_modified {
343            trace!(
344                "revalidation for {name} in {path} failed because \
345                 last modified times do not match: old = {old} != {new} = new",
346                name = self.name(),
347                path = path.display(),
348                old = old_last_modified,
349                new = new_last_modified,
350            );
351            return false;
352        }
353        trace!(
354            "revalidation for {name} in {path} succeeded because \
355             last modified times match: old = {old} == {new} = new",
356            name = self.name(),
357            path = path.display(),
358            old = old_last_modified,
359            new = new_last_modified,
360        );
361        self.expiration = Expiration::after(ttl);
362        true
363    }
364}
365
366/// A collection of time zone names extracted from a concatenated tzdata file.
367///
368/// This type is responsible not just for providing the names, but also for
369/// updating them periodically.
370///
371/// Every name _should_ correspond to an entry in the data block of the
372/// corresponding `tzdata` file, but we generally don't take advantage of this.
373/// The reason is that the file could theoretically change. Between when we
374/// extract the names and when we do a TZif lookup later. This is all perfectly
375/// manageable, but it should only be done if there's a benchmark demanding
376/// more effort be spent here. As it stands, we do have a rudimentary caching
377/// mechanism, so not all time zone lookups go through this slower path. (This
378/// is also why `Names` has no lookup routine. There's just a routine to return
379/// all names.)
380#[derive(Debug)]
381struct Names {
382    inner: RwLock<NamesInner>,
383}
384
385#[derive(Debug)]
386struct NamesInner {
387    /// All available names from the `tzdata` file.
388    names: Vec<Arc<str>>,
389    /// The version string read from the `tzdata` file.
390    version: ArrayStr<5>,
391    /// Scratch space used to help amortize allocation when extracting names
392    /// from a `tzdata` file.
393    scratch: Vec<u8>,
394    /// The expiration time of these cached names.
395    ///
396    /// Note that this is a necessary but not sufficient criterion for
397    /// invalidating the cached value.
398    ttl: Duration,
399    /// The time at which the data in `names` becomes stale.
400    expiration: Expiration,
401}
402
403impl Names {
404    /// See commnents in `tz/db/zoneinfo/enabled.rs` about this. We just copied
405    /// it from there.
406    const DEFAULT_TTL: Duration = DEFAULT_TTL;
407
408    /// Create a new collection of names from the concatenated `tzdata` file
409    /// path given.
410    ///
411    /// If no names of time zones could be found in the given directory, then
412    /// an error is returned.
413    fn new(path: &Path) -> Result<Names, Error> {
414        let path = path.to_path_buf();
415        let mut scratch = vec![];
416        let (names, version) = read_names_and_version(&path, &mut scratch)?;
417        trace!(
418            "found concatenated tzdata at {path} \
419             with version {version} and {len} \
420             IANA time zone identifiers",
421            path = path.display(),
422            len = names.len(),
423        );
424        let ttl = Names::DEFAULT_TTL;
425        let expiration = Expiration::after(ttl);
426        let inner = NamesInner { names, version, scratch, ttl, expiration };
427        Ok(Names { inner: RwLock::new(inner) })
428    }
429
430    /// Returns all available time zone names after attempting a refresh of
431    /// the underlying data if it's stale.
432    fn available(&self, path: &Path) -> Vec<String> {
433        let mut inner = self.inner.write().unwrap();
434        inner.attempt_refresh(path);
435        inner.available()
436    }
437
438    fn reset(&self) {
439        self.inner.write().unwrap().reset();
440    }
441}
442
443impl NamesInner {
444    /// Returns all available time zone names.
445    fn available(&self) -> Vec<String> {
446        self.names.iter().map(|name| name.to_string()).collect()
447    }
448
449    /// Attempts a refresh, but only follows through if the TTL has been
450    /// exceeded.
451    ///
452    /// The caller must ensure that the other cache invalidation criteria
453    /// have been upheld. For example, this should only be called for a missed
454    /// zone name lookup.
455    fn attempt_refresh(&mut self, path: &Path) {
456        if self.expiration.is_expired() {
457            self.refresh(path);
458        }
459    }
460
461    /// Forcefully refreshes the cached names with possibly new data from disk.
462    /// If an error occurs when fetching the names, then no names are updated
463    /// (but the `expires_at` is updated). This will also emit a warning log on
464    /// failure.
465    fn refresh(&mut self, path: &Path) {
466        // PERF: Should we try to move this tzdb handling to run outside of a
467        // lock? It probably happens pretty rarely, so it might not matter.
468        let result = read_names_and_version(path, &mut self.scratch);
469        self.expiration = Expiration::after(self.ttl);
470        match result {
471            Ok((names, version)) => {
472                trace!(
473                    "refreshed concatenated tzdata at {path} \
474                     with version {version} and {len} \
475                     IANA time zone identifiers",
476                    path = path.display(),
477                    len = names.len(),
478                );
479                self.names = names;
480                self.version = version;
481            }
482            Err(_err) => {
483                warn!(
484                    "failed to refresh concatenated time zone name cache \
485                     for {path}: {_err}",
486                    path = path.display(),
487                )
488            }
489        }
490    }
491
492    /// Resets the state such that the next lookup is guaranteed to force a
493    /// cache refresh, and that it is impossible for any data to be stale.
494    fn reset(&mut self) {
495        // This will force the next lookup to fail.
496        self.names.clear();
497        // And this will force the next failed lookup to result in a refresh.
498        self.expiration = Expiration::expired();
499    }
500}
501
502/// A type representing how to find a `tzdata` file.
503///
504/// This currently only supports an Android-centric lookup via env vars, but if
505/// we wanted to check a fixed path like we do for `ZoneInfo`, then adding a
506/// `Fixed` variant here would be appropriate.
507#[derive(Debug)]
508enum TzdataLocation {
509    Env { name: &'static str, default: &'static str, suffix: &'static str },
510}
511
512impl TzdataLocation {
513    /// Converts this location to an actual path, which might involve an
514    /// environment variable lookup.
515    fn to_path_buf(&self) -> PathBuf {
516        match *self {
517            TzdataLocation::Env { name, default, suffix } => {
518                let var = std::env::var_os(name)
519                    .unwrap_or_else(|| OsString::from(default));
520                let prefix = PathBuf::from(var);
521                prefix.join(suffix)
522            }
523        }
524    }
525}
526
527/// Reads only the IANA time zone identifiers from the given path (and the
528/// version of the database).
529///
530/// The `scratch` given is used to help amortize allocation when deserializing
531/// names from the concatenated `tzdata` file.
532///
533/// This returns an error if reading was successful but no names were found.
534fn read_names_and_version(
535    path: &Path,
536    scratch: &mut Vec<u8>,
537) -> Result<(Vec<Arc<str>>, ArrayStr<5>), Error> {
538    let file = File::open(path).map_err(|e| Error::io(e).path(path))?;
539    let db = ConcatenatedTzif::open(file)?;
540    let names: Vec<Arc<str>> =
541        db.available(scratch)?.into_iter().map(Arc::from).collect();
542    if names.is_empty() {
543        return Err(err!(
544            "found no IANA time zone identifiers in \
545             concatenated tzdata file at {path}",
546            path = path.display(),
547        ));
548    }
549    Ok((names, db.version()))
550}
551
552#[cfg(test)]
553mod tests {
554    use super::*;
555
556    /// DEBUG COMMAND
557    ///
558    /// Takes environment variable `JIFF_DEBUG_ZONEINFO_DIR` as input and
559    /// prints a list of all time zone names in the directory (one per line).
560    ///
561    /// Callers may also set `RUST_LOG` to get extra debugging output.
562    #[test]
563    fn debug_tzdata_list() -> anyhow::Result<()> {
564        let _ = crate::logging::Logger::init();
565
566        const ENV: &str = "JIFF_DEBUG_CONCATENATED_TZDATA";
567        let Some(val) = std::env::var_os(ENV) else { return Ok(()) };
568        let path = PathBuf::from(val);
569        let db = Database::from_path(&path)?;
570        for name in db.available() {
571            std::eprintln!("{name}");
572        }
573        Ok(())
574    }
575}