#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (c) 2019 Satpy developers
#
# This file is part of satpy.
#
# satpy is free software: you can redistribute it and/or modify it under the
# terms of the GNU General Public License as published by the Free Software
# Foundation, either version 3 of the License, or (at your option) any later
# version.
#
# satpy is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE. See the GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along with
# satpy. If not, see <http://www.gnu.org/licenses/>.
"""Modis level 2 hdf-eos format reader.
Introduction
------------
The ``modis_l2`` reader reads and calibrates Modis L2 image data in hdf-eos format.
Since there are a multitude of different level 2 datasets not all of theses are implemented (yet).
Currently the reader supports:
- m[o/y]d35_l2: cloud_mask dataset
- some datasets in m[o/y]d06 files
Additionally the reader tries to add non yaml configured 2D datasets dynamically. As mentioned above there are a lot
of different level 2 datasets so this might not work in every case (for example bit encoded datasets similar to the
supported m[0/y]d35_l2 cloud mask are not decoded).
To get a list of the available datasets for a given file refer to the :ref:`reading:available datasets` section.
Geolocation files
-----------------
Similar to the ``modis_l1b`` reader the geolocation files (mod03) for the 1km data are optional and if not
given 1km geolocations will be interpolated from the 5km geolocation contained within the file.
For the 500m and 250m data geolocation files are needed.
References:
- Documentation about the format: https://modis-atmos.gsfc.nasa.gov/products
"""
import logging
import dask.array as da
import numpy as np
import xarray as xr
from satpy.readers.core.hdf4 import from_sds
from satpy.readers.core.hdfeos import HDFEOSGeoReader
from satpy.utils import get_legacy_chunk_size
logger = logging.getLogger(__name__)
CHUNK_SIZE = get_legacy_chunk_size()
[docs]
class ModisL2HDFFileHandler(HDFEOSGeoReader):
"""File handler for MODIS HDF-EOS Level 2 files.
Includes error handling for files produced by IMAPP produced files.
"""
@property
def is_imapp_mask_byte1(self):
"""Get if this file is the IMAPP 'mask_byte1' file type."""
return "mask_byte1" in self.filetype_info["file_type"]
@property
def start_time(self):
"""Get the start time of the dataset."""
try:
return super().start_time
except KeyError:
try:
return self.filename_info["start_time"]
except KeyError:
return self.filename_info["acquisition_time"]
@property
def end_time(self):
"""Get the end time of the dataset."""
try:
return super().end_time
except KeyError:
return self.start_time
[docs]
@staticmethod
def read_geo_resolution(metadata):
"""Parse metadata to find the geolocation resolution.
It is implemented as a staticmethod to match read_mda pattern.
"""
try:
return HDFEOSGeoReader.read_geo_resolution(metadata)
except RuntimeError:
# most L2 products are 5000m
return 5000
[docs]
def _select_hdf_dataset(self, hdf_dataset_name, byte_dimension):
"""Load a dataset from HDF-EOS level 2 file."""
dataset = self.sd.select(hdf_dataset_name)
dask_arr = from_sds(dataset, self.filename, chunks=CHUNK_SIZE)
attrs = dataset.attributes()
dims = ["y", "x"]
if byte_dimension == 0:
dims = ["i", "y", "x"]
dask_arr = dask_arr.astype(np.uint8)
elif byte_dimension == 2:
dims = ["y", "x", "i"]
dask_arr = dask_arr.astype(np.uint8)
dataset = xr.DataArray(dask_arr, dims=dims, attrs=attrs)
if "i" in dataset.dims:
# Reorder dimensions for consistency
dataset = dataset.transpose("i", "y", "x")
return dataset
[docs]
def get_dataset(self, dataset_id, dataset_info):
"""Get DataArray for specified dataset."""
dataset_name = dataset_id["name"]
if self.is_geo_loadable_dataset(dataset_name):
return HDFEOSGeoReader.get_dataset(self, dataset_id, dataset_info)
dataset_name_in_file = dataset_info["file_key"]
if self.is_imapp_mask_byte1:
dataset_name_in_file = dataset_info.get("imapp_file_key", dataset_name_in_file)
# The dataset asked correspond to a given set of bits of the HDF EOS dataset
if "byte" in dataset_info and "byte_dimension" in dataset_info:
dataset = self._extract_and_mask_category_dataset(dataset_id, dataset_info, dataset_name_in_file)
else:
# No byte manipulation required
dataset = self.load_dataset(dataset_name_in_file, dataset_info.pop("category", False))
self._add_satpy_metadata(dataset_id, dataset)
return dataset
[docs]
def available_datasets(self, configured_datasets):
"""Add dataset information from arbitrary level 2 files.
Adds dataset information not specifically specified in reader yaml file
from arbitrary modis level 2 product files to available datasets.
Notes:
Currently only adds 2D datasets and does not decode bit encoded information.
"""
# pass along yaml configured (handled) datasets and collect their file keys to check against dynamically
# collected variables later on.
handled = set()
for is_avail, ds_info in (configured_datasets or []):
file_key = ds_info.get("file_key", ds_info["name"])
handled.add(file_key)
if is_avail is not None:
yield is_avail, ds_info
continue
yield self.file_type_matches(ds_info["file_type"]), ds_info
res_dict = {5416: 250, 2708: 500, 1354: 1000, 270: 5000, 135: 10000}
# get variables from file dynamically and only add those which are not already configured in yaml
for var_name, val in self.sd.datasets().items():
if var_name in handled:
continue
if len(val[0]) != 2:
continue
resolution = res_dict.get(val[1][-1])
if resolution is not None:
ds_info = {
"file_type": self.filetype_info["file_type"],
"resolution": resolution,
"name": var_name,
"file_key": var_name,
"coordinates": ["longitude", "latitude"]
}
yield True, ds_info
[docs]
def _extract_and_mask_category_dataset(self, dataset_id, dataset_info, var_name):
# what dimension is per-byte
byte_dimension = None if self.is_imapp_mask_byte1 else dataset_info["byte_dimension"]
dataset = self._select_hdf_dataset(var_name, byte_dimension)
# category products always have factor=1/offset=0 so don't apply them
# also remove them so they don't screw up future satpy processing
dataset.attrs.pop("scale_factor", None)
dataset.attrs.pop("add_offset", None)
# Don't do this byte work if we are using the IMAPP mask_byte1 file
if self.is_imapp_mask_byte1:
return dataset
dataset = _extract_byte_mask(dataset,
dataset_info["byte"],
dataset_info["bit_start"],
dataset_info["bit_count"])
dataset = self._mask_with_quality_assurance_if_needed(dataset, dataset_info, dataset_id)
return dataset
[docs]
def _mask_with_quality_assurance_if_needed(self, dataset, dataset_info, dataset_id):
if not dataset_info.get("quality_assurance", False):
return dataset
# Get quality assurance dataset recursively
quality_assurance_dataset_id = dataset_id.from_dict(
dict(name="quality_assurance", resolution=1000)
)
quality_assurance_dataset_info = {
"name": "quality_assurance",
"resolution": 1000,
"byte_dimension": 2,
"byte": 0,
"bit_start": 0,
"bit_count": 1,
"file_key": "Quality_Assurance"
}
quality_assurance = self.get_dataset(
quality_assurance_dataset_id, quality_assurance_dataset_info
)
# Duplicate quality assurance dataset to create relevant filter
duplication_factor = [int(dataset_dim / quality_assurance_dim)
for dataset_dim, quality_assurance_dim
in zip(dataset.shape, quality_assurance.shape)]
quality_assurance = np.tile(quality_assurance, duplication_factor)
# Replace unassured data by NaN value
dataset = dataset.where(quality_assurance != 0, dataset.attrs["_FillValue"])
return dataset
[docs]
def _bits_strip(bit_start: int, bit_count: int, value: int) -> int:
"""Extract specified bit from bit representation of integer value.
Args:
bit_start:
Starting index of the bits to extract (first bit has index 0)
bit_count:
Number of bits starting from bit_start to extract
value:
Number from which to extract the bits
Returns:
Value of the extracted bits
"""
bit_mask = pow(2, bit_start + bit_count) - 1
return np.right_shift(np.bitwise_and(value, bit_mask), bit_start)