Source code for satpy.aux_download

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (c) 2021 Satpy developers
#
# This file is part of satpy.
#
# satpy is free software: you can redistribute it and/or modify it under the
# terms of the GNU General Public License as published by the Free Software
# Foundation, either version 3 of the License, or (at your option) any later
# version.
#
# satpy is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along with
# satpy.  If not, see <http://www.gnu.org/licenses/>.
"""Functions and utilities for downloading ancillary data."""

import logging
import os

import pooch

import satpy

logger = logging.getLogger(__name__)

_FILE_REGISTRY = {}
_FILE_URLS = {}
RUNNING_TESTS = False


[docs] def register_file(url, filename, component_type=None, known_hash=None): """Register file for future retrieval. This function only prepares Satpy to be able to download and cache the provided file. It will not download the file. See :func:`satpy.aux_download.retrieve` for more information. Args: url (str): URL where remote file can be downloaded. filename (str): Filename used to identify and store the downloaded file as. component_type (str or None): Name of the type of Satpy component that will use this file. Typically "readers", "composites", "writers", or "enhancements" for consistency. This will be prepended to the filename when storing the data in the cache. known_hash (str): Hash used to verify the file is downloaded correctly. See https://www.fatiando.org/pooch/v1.3.0/beginner.html#hashes for more information. If not provided then the file is not checked. Returns: Cache key that can be used to retrieve the file later. The cache key consists of the ``component_type`` and provided ``filename``. This should be passed to :func:`satpy.aux_download_retrieve` when the file will be used. """ fname = _generate_filename(filename, component_type) _FILE_REGISTRY[fname] = known_hash _FILE_URLS[fname] = url return fname
[docs] def _generate_filename(filename, component_type): if filename is None: return None path = filename if component_type: path = "/".join([component_type, path]) return path
[docs] def _retrieve_offline(data_dir, cache_key): logger.debug("Downloading auxiliary files is turned off, will check " "local files.") local_file = os.path.join(data_dir, *cache_key.split("/")) if not os.path.isfile(local_file): raise RuntimeError("Satpy 'download_aux' setting is False meaning " "no new files will be downloaded and the local " "file '{}' does not exist.".format(local_file)) return local_file
[docs] def _should_download(cache_key): """Check if we're running tests and can download this file.""" return not RUNNING_TESTS or "README" in cache_key
[docs] def retrieve(cache_key, pooch_kwargs=None): """Download and cache the file associated with the provided ``cache_key``. Cache location is controlled by the config ``data_dir`` key. See :ref:`data_dir_setting` for more information. Args: cache_key (str): Cache key returned by :func:`~satpy.aux_download.register_file`. pooch_kwargs (dict or None): Extra keyword arguments to pass to :meth:`pooch.Pooch.fetch`. Returns: Local path of the cached file. """ pooch_kwargs = pooch_kwargs or {} path = satpy.config.get("data_dir") if not satpy.config.get("download_aux"): return _retrieve_offline(path, cache_key) if not _should_download(cache_key): raise RuntimeError("Auxiliary data download is not allowed during " "tests. Mock the appropriate components of your " "tests to not need the 'retrieve' function.") # reuse data directory as the default URL where files can be downloaded from pooch_obj = pooch.create(path, path, registry=_FILE_REGISTRY, urls=_FILE_URLS) return pooch_obj.fetch(cache_key, **pooch_kwargs)
[docs] def _retrieve_all_with_pooch(pooch_kwargs): if pooch_kwargs is None: pooch_kwargs = {} path = satpy.config.get("data_dir") pooch_obj = pooch.create(path, path, registry=_FILE_REGISTRY, urls=_FILE_URLS) for fname in _FILE_REGISTRY: logger.info("Downloading extra data file '%s'...", fname) pooch_obj.fetch(fname, **pooch_kwargs)
[docs] def retrieve_all(readers=None, writers=None, composite_sensors=None, pooch_kwargs=None): """Find cache-able data files for Satpy and download them. The typical use case for this function is to download all ancillary files before going to an environment/system that does not have internet access. Args: readers (list or None): Limit searching to these readers. If not specified or ``None`` then all readers are searched. If an empty list then no readers are searched. writers (list or None): Limit searching to these writers. If not specified or ``None`` then all writers are searched. If an empty list then no writers are searched. composite_sensors (list or None): Limit searching to composite configuration files for these sensors. If ``None`` then all sensor configs will be searched. If an empty list then no composites will be searched. pooch_kwargs (dict): Additional keyword arguments to pass to pooch ``fetch``. """ if not satpy.config.get("download_aux"): raise RuntimeError("Satpy 'download_aux' setting is False so no files " "will be downloaded.") find_registerable_files(readers=readers, writers=writers, composite_sensors=composite_sensors) _retrieve_all_with_pooch(pooch_kwargs) logger.info("Done downloading all extra files.")
[docs] def find_registerable_files(readers=None, writers=None, composite_sensors=None): """Load all Satpy components so they can be downloaded. Args: readers (list or None): Limit searching to these readers. If not specified or ``None`` then all readers are searched. If an empty list then no readers are searched. writers (list or None): Limit searching to these writers. If not specified or ``None`` then all writers are searched. If an empty list then no writers are searched. composite_sensors (list or None): Limit searching to composite configuration files for these sensors. If ``None`` then all sensor configs will be searched. If an empty list then no composites will be searched. """ _find_registerable_files_compositors(composite_sensors) _find_registerable_files_readers(readers) _find_registerable_files_writers(writers) return sorted(_FILE_REGISTRY.keys())
[docs] def _find_registerable_files_compositors(sensors=None): """Load all compositor configs so that files are registered. Compositor objects should register files when they are initialized. """ from satpy.composites.config_loader import all_composite_sensors, load_compositor_configs_for_sensors if sensors is None: sensors = all_composite_sensors() if sensors: mods = load_compositor_configs_for_sensors(sensors)[1] _register_modifier_files(mods)
[docs] def _register_modifier_files(modifiers): for mod_sensor_dict in modifiers.values(): for mod_name, (mod_cls, mod_props) in mod_sensor_dict.items(): try: mod_cls(**mod_props) except (ValueError, RuntimeError): logger.error("Could not initialize modifier '%s' for " "auxiliary download registration.", mod_name)
[docs] def _find_registerable_files_readers(readers=None): """Load all readers so that files are registered.""" import yaml from satpy.readers import configs_for_reader, load_reader for reader_configs in configs_for_reader(reader=readers): try: load_reader(reader_configs) except (ModuleNotFoundError, yaml.YAMLError): continue
[docs] def _find_registerable_files_writers(writers=None): """Load all writers so that files are registered.""" from satpy.writers import configs_for_writer, load_writer_configs for writer_configs in configs_for_writer(writer=writers): try: load_writer_configs(writer_configs) except ValueError: continue
[docs] class DataDownloadMixin: """Mixin class for Satpy components to download files. This class simplifies the logic needed to download and cache data files needed for operations in a Satpy component (readers, writers, etc). It does this in a two step process where files that might be downloaded are "registered" and then "retrieved" when they need to be used. To use this class include it as one of the subclasses of your Satpy component. Then in the ``__init__`` method, call the ``register_data_files`` function during initialization. .. note:: This class is already included in the ``FileYAMLReader`` and ``Writer`` base classes. There is no need to define a custom class. The below code is shown as an example:: from satpy.readers.yaml_reader import AbstractYAMLReader from satpy.aux_download import DataDownloadMixin class MyReader(AbstractYAMLReader, DataDownloadMixin): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.register_data_files() This class expects data files to be configured in either a ``self.info['data_files']`` (standard for readers/writers) or ``self.config['data_files']`` list. The ``data_files`` item itself is a list of dictionaries. This information can also be passed directly to ``register_data_files`` for more complex cases. In YAML, for a reader, this might look like this:: reader: name: abi_l1b short_name: ABI L1b long_name: GOES-R ABI Level 1b ... other metadata ... data_files: - url: "https://example.com/my_data_file.dat" - url: "https://raw.githubusercontent.com/pytroll/satpy/main/README.rst" known_hash: "sha256:5891286b63e7745de08c4b0ac204ad44cfdb9ab770309debaba90308305fa759" - url: "https://raw.githubusercontent.com/pytroll/satpy/main/RELEASING.md" filename: "satpy_releasing.md" In this example we register two files that might be downloaded. If ``known_hash`` is not provided or None (null in YAML) then the data file will not be checked for validity when downloaded. See :func:`~satpy.aux_download.register_file` for more information. You can optionally specify ``filename`` to define the in-cache name when this file is downloaded. This can be useful in cases when the filename can not be easily determined from the URL. When it comes time to needing the file, you can retrieve the local path by calling ``~satpy.aux_download.retrieve(cache_key)`` with the "cache key" generated during registration. These keys will be in the format: ``<component_type>/<filename>``. For a reader this would be ``readers/satpy_release.md``. This Mixin is not the only way to register and download files for a Satpy component, but is the most generic and flexible. Feel free to use the :func:`~satpy.aux_download.register_file` and :func:`~satpy.aux_download.retrieve` functions directly. However, :meth:`~satpy.aux_download.find_registerable_files` must also be updated to support your component (if files are not register during initialization). """ DATA_FILE_COMPONENTS = { "reader": "readers", "writer": "writers", "composit": "composites", "modifi": "modifiers", "corr": "modifiers", } @property def _data_file_component_type(self): cls_name = self.__class__.__name__.lower() for cls_name_sub, comp_type in self.DATA_FILE_COMPONENTS.items(): if cls_name_sub in cls_name: return comp_type return "other"
[docs] def register_data_files(self, data_files=None): """Register a series of files that may be downloaded later. See :class:`~satpy.aux_download.DataDownloadMixin` for more information on the assumptions and structure of the data file configuration dictionary. """ comp_type = self._data_file_component_type if data_files is None: df_parent = getattr(self, "info", self.config) data_files = df_parent.get("data_files", []) cache_keys = [] for data_file_entry in data_files: cache_key = self._register_data_file(data_file_entry, comp_type) cache_keys.append(cache_key) return cache_keys
[docs] @staticmethod def _register_data_file(data_file_entry, comp_type): url = data_file_entry["url"] filename = data_file_entry.get("filename", os.path.basename(url)) known_hash = data_file_entry.get("known_hash") return register_file(url, filename, component_type=comp_type, known_hash=known_hash)
[docs] def retrieve_all_cmd(argv=None): """Call 'retrieve_all' function from console script 'satpy_retrieve_all'.""" import argparse parser = argparse.ArgumentParser(description="Download auxiliary data files used by Satpy.") parser.add_argument("--data-dir", help="Override 'SATPY_DATA_DIR' for destination of " "downloaded files. This does NOT change the " "directory Satpy will look at when searching " "for files outside of this script.") parser.add_argument("--composite-sensors", nargs="*", help="Limit loaded composites for the specified " "sensors. If specified with no arguments, " "no composite files will be downloaded.") parser.add_argument("--readers", nargs="*", help="Limit searching to these readers. If specified " "with no arguments, no reader files will be " "downloaded.") parser.add_argument("--writers", nargs="*", help="Limit searching to these writers. If specified " "with no arguments, no writer files will be " "downloaded.") args = parser.parse_args(argv) logging.basicConfig(level=logging.INFO) if args.data_dir is None: args.data_dir = satpy.config.get("data_dir") with satpy.config.set(data_dir=args.data_dir): retrieve_all(readers=args.readers, writers=args.writers, composite_sensors=args.composite_sensors)