Source code for satpy.cf.encoding

# Copyright (c) 2017-2023 Satpy developers
#
# This file is part of satpy.
#
# satpy is free software: you can redistribute it and/or modify it under the
# terms of the GNU General Public License as published by the Free Software
# Foundation, either version 3 of the License, or (at your option) any later
# version.
#
# satpy is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along with
# satpy.  If not, see <http://www.gnu.org/licenses/>.
"""CF encoding."""
import logging

import numpy as np
import xarray as xr
from xarray.coding.times import CFDatetimeCoder

logger = logging.getLogger(__name__)


[docs] def _set_default_chunks(encoding, dataset): """Update encoding to preserve current dask chunks. Existing user-defined chunks take precedence. """ for var_name, variable in dataset.variables.items(): if variable.chunks: chunks = tuple( np.stack([variable.data.chunksize, variable.shape]).min(axis=0) ) # Chunksize may not exceed shape encoding.setdefault(var_name, {}) encoding[var_name].setdefault("chunksizes", chunks) return encoding
[docs] def _set_default_fill_value(encoding, dataset): """Set default fill values. Avoid _FillValue attribute being added to coordinate variables (https://github.com/pydata/xarray/issues/1865). """ coord_vars = [] for data_array in dataset.values(): coord_vars.extend(set(data_array.dims).intersection(data_array.coords)) for coord_var in coord_vars: encoding.setdefault(coord_var, {}) encoding[coord_var].update({"_FillValue": None}) return encoding
[docs] def _set_default_time_encoding(encoding, dataset): """Set default time encoding. Make sure time coordinates and bounds have the same units. Default is xarray's CF datetime encoding, which can be overridden by user-defined encoding. """ if "time" in dataset: try: dtnp64 = dataset["time"].data[0] except IndexError: dtnp64 = dataset["time"].data default = CFDatetimeCoder().encode(xr.DataArray(dtnp64)) time_enc = {"units": default.attrs["units"], "calendar": default.attrs["calendar"]} time_enc.update(encoding.get("time", {})) bounds_enc = {"units": time_enc["units"], "calendar": time_enc["calendar"], "_FillValue": None} encoding["time"] = time_enc encoding["time_bnds"] = bounds_enc # FUTURE: Not required anymore with xarray-0.14+ return encoding
[docs] def _update_encoding_dataset_names(encoding, dataset, numeric_name_prefix): """Ensure variable names of the encoding dictionary account for numeric_name_prefix. A lot of channel names in satpy starts with a digit. When preparing CF-compliant datasets, these channels are prefixed with numeric_name_prefix. If variables names in the encoding dictionary are numeric digits, their name is prefixed with numeric_name_prefix """ for var_name in list(dataset.variables): if not numeric_name_prefix or not var_name.startswith(numeric_name_prefix): continue orig_var_name = var_name.replace(numeric_name_prefix, "") if orig_var_name in encoding: encoding[var_name] = encoding.pop(orig_var_name) return encoding
[docs] def update_encoding(dataset, to_engine_kwargs, numeric_name_prefix="CHANNEL_"): """Update encoding. Preserve dask chunks, avoid fill values in coordinate variables and make sure that time & time bounds have the same units. """ other_to_engine_kwargs = to_engine_kwargs.copy() encoding = other_to_engine_kwargs.pop("encoding", {}).copy() encoding = _update_encoding_dataset_names(encoding, dataset, numeric_name_prefix) encoding = _set_default_chunks(encoding, dataset) encoding = _set_default_fill_value(encoding, dataset) encoding = _set_default_time_encoding(encoding, dataset) return encoding, other_to_engine_kwargs