Source code for satpy.readers.hrit_jma

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (c) 2010-2017 Satpy developers
#
# This file is part of satpy.
#
# satpy is free software: you can redistribute it and/or modify it under the
# terms of the GNU General Public License as published by the Free Software
# Foundation, either version 3 of the License, or (at your option) any later
# version.
#
# satpy is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along with
# satpy.  If not, see <http://www.gnu.org/licenses/>.
"""HRIT format reader for JMA data.

Introduction
------------
The JMA HRIT format is described in the `JMA HRIT - Mission Specific
Implementation`_. There are three readers for this format in Satpy:

- ``jami_hrit``: For data from the `JAMI` instrument on MTSAT-1R
- ``mtsat2-imager_hrit``: For data from the `Imager` instrument on MTSAT-2
- ``ahi_hrit``: For data from the `AHI` instrument on Himawari-8/9

Although the data format is identical, the instruments have different
characteristics, which is why there is a dedicated reader for each of them.
Sample data is available here:

- `JAMI/Imager sample data`_
- `AHI sample data`_


Example:
--------
Here is an example how to read Himwari-8 HRIT data with Satpy:

.. code-block:: python

    from satpy import Scene
    import glob

    filenames = glob.glob('data/IMG_DK01B14_2018011109*')
    scn = Scene(filenames=filenames, reader='ahi_hrit')
    scn.load(['B14'])
    print(scn['B14'])


Output:

.. code-block:: none

    <xarray.DataArray (y: 5500, x: 5500)>
    dask.array<concatenate, shape=(5500, 5500), dtype=float64, chunksize=(550, 4096), ...
    Coordinates:
        acq_time  (y) datetime64[ns] 2018-01-11T09:00:20.995200 ... 2018-01-11T09:09:40.348800
        crs       object +proj=geos +lon_0=140.7 +h=35785831 +x_0=0 +y_0=0 +a=6378169 ...
      * y         (y) float64 5.5e+06 5.498e+06 5.496e+06 ... -5.496e+06 -5.498e+06
      * x         (x) float64 -5.498e+06 -5.496e+06 -5.494e+06 ... 5.498e+06 5.5e+06
    Attributes:
        orbital_parameters:   {'projection_longitude': 140.7, 'projection_latitud...
        standard_name:        toa_brightness_temperature
        level:                None
        wavelength:           (11.0, 11.2, 11.4)
        units:                K
        calibration:          brightness_temperature
        file_type:            ['hrit_b14_seg', 'hrit_b14_fd']
        modifiers:            ()
        polarization:         None
        sensor:               ahi
        name:                 B14
        platform_name:        Himawari-8
        resolution:           4000
        start_time:           2018-01-11 09:00:20.995200
        end_time:             2018-01-11 09:09:40.348800
        area:                 Area ID: FLDK, Description: Full Disk, Projection I...
        ancillary_variables:  []

JMA HRIT data contain the scanline acquisition time for only a subset of scanlines. Timestamps of
the remaining scanlines are computed using linear interpolation. This is what you'll find in the
``acq_time`` coordinate of the dataset.

Compression
-----------

Gzip-compressed MTSAT files can be decompressed on the fly using
:class:`~satpy.readers.FSFile`:

.. code-block:: python

    import fsspec
    from satpy import Scene
    from satpy.readers import FSFile

    filename = "/data/HRIT_MTSAT1_20090101_0630_DK01IR1.gz"
    open_file = fsspec.open(filename, compression="gzip")
    fs_file = FSFile(open_file)
    scn = Scene([fs_file], reader="jami_hrit")
    scn.load(["IR1"])


.. _JMA HRIT - Mission Specific Implementation: http://www.jma.go.jp/jma/jma-eng/satellite/introduction/4_2HRIT.pdf
.. _JAMI/Imager sample data: https://www.data.jma.go.jp/mscweb/en/operation/hrit_sample.html
.. _AHI sample data: https://www.data.jma.go.jp/mscweb/en/himawari89/space_segment/sample_hrit.html
"""

import logging
from datetime import datetime

import numpy as np
import xarray as xr

from satpy.readers._geos_area import get_area_definition, get_area_extent
from satpy.readers.hrit_base import (
    HRITFileHandler,
    ancillary_text,
    annotation_header,
    base_hdr_map,
    image_data_function,
)
from satpy.readers.utils import get_geostationary_mask

logger = logging.getLogger("hrit_jma")


# JMA implementation:
key_header = np.dtype([("key_number", "u4")])

segment_identification = np.dtype([("image_segm_seq_no", ">u1"),
                                   ("total_no_image_segm", ">u1"),
                                   ("line_no_image_segm", ">u2")])

encryption_key_message = np.dtype([("station_number", ">u2")])

image_compensation_information = np.dtype([("compensation", "|S1")])

image_observation_time = np.dtype([("times", "|S1")])

image_quality_information = np.dtype([("quality", "|S1")])


jma_variable_length_headers: dict = {}

jma_text_headers = {image_data_function: "image_data_function",
                    annotation_header: "annotation_header",
                    ancillary_text: "ancillary_text",
                    image_compensation_information: "image_compensation_information",
                    image_observation_time: "image_observation_time",
                    image_quality_information: "image_quality_information"}

jma_hdr_map = base_hdr_map.copy()
jma_hdr_map.update({7: key_header,
                    128: segment_identification,
                    129: encryption_key_message,
                    130: image_compensation_information,
                    131: image_observation_time,
                    132: image_quality_information
                    })


cuc_time = np.dtype([("coarse", "u1", (4, )),
                     ("fine", "u1", (3, ))])

time_cds_expanded = np.dtype([("days", ">u2"),
                              ("milliseconds", ">u4"),
                              ("microseconds", ">u2"),
                              ("nanoseconds", ">u2")])

FULL_DISK = 1
NORTH_HEMIS = 2
SOUTH_HEMIS = 3
UNKNOWN_AREA = -1
AREA_NAMES = {FULL_DISK: {"short": "FLDK", "long": "Full Disk"},
              NORTH_HEMIS: {"short": "NH", "long": "Northern Hemisphere"},
              SOUTH_HEMIS: {"short": "SH", "long": "Southern Hemisphere"},
              UNKNOWN_AREA: {"short": "UNKNOWN", "long": "Unknown Area"}}

MTSAT1R = "MTSAT-1R"
MTSAT2 = "MTSAT-2"
HIMAWARI8 = "Himawari-8"
UNKNOWN_PLATFORM = "Unknown Platform"
PLATFORMS = {
    "GEOS(140.00)": MTSAT1R,
    "GEOS(140.25)": MTSAT1R,
    "GEOS(140.70)": HIMAWARI8,
    "GEOS(145.00)": MTSAT2,
}
SENSORS = {
    MTSAT1R: "jami",
    MTSAT2: "mtsat2_imager",
    HIMAWARI8: "ahi"
}



[docs]
def mjd2datetime64(mjd):
    """Convert Modified Julian Day (MJD) to datetime64."""
    epoch = np.datetime64("1858-11-17 00:00")
    day2nsec = 24 * 3600 * 1E9
    mjd_nsec = (mjd * day2nsec).astype(np.int64).astype("timedelta64[ns]")

    return epoch + mjd_nsec




[docs]
class HRITJMAFileHandler(HRITFileHandler):
    """JMA HRIT format reader.

    By default, the reader uses the start time parsed from the filename. To use exact time, computed
    from the metadata, the user can define a keyword argument::

        scene = Scene(filenames=filenames,
                      reader='ahi_hrit',
                      reader_kwargs={'use_acquisition_time_as_start_time': True})

    As this time is different for every channel, time-dependent calculations like SZA correction
    can be pretty slow when multiple channels are used.

    The exact scanline times are always available as coordinates of an individual channels::

        scene.load(["B03"])
        print(scene["B03].coords["acq_time"].data)

    would print something similar to::

        array(['2021-12-08T06:00:20.131200000', '2021-12-08T06:00:20.191948000',
               '2021-12-08T06:00:20.252695000', ...,
               '2021-12-08T06:09:39.449390000', '2021-12-08T06:09:39.510295000',
               '2021-12-08T06:09:39.571200000'], dtype='datetime64[ns]')

    The first value represents the exact start time, and the last one the exact end time of the data
    acquisition.

    """

    def __init__(self, filename, filename_info, filetype_info, use_acquisition_time_as_start_time=False):
        """Initialize the reader."""
        super(HRITJMAFileHandler, self).__init__(filename, filename_info,
                                                 filetype_info,
                                                 (jma_hdr_map,
                                                  jma_variable_length_headers,
                                                  jma_text_headers))

        self._use_acquisition_time_as_start_time = use_acquisition_time_as_start_time
        self.mda["segment_sequence_number"] = self.mda["image_segm_seq_no"]
        self.mda["planned_end_segment_number"] = self.mda["total_no_image_segm"]
        self.mda["planned_start_segment_number"] = 1

        items = self.mda["image_data_function"].decode().split("\r")
        if items[0].startswith("$HALFTONE"):
            self.calibration_table = []
            for item in items[1:]:
                if item == "":
                    continue
                key, value = item.split(":=")
                if key.startswith("_UNIT"):
                    self.mda["unit"] = item.split(":=")[1]
                elif key.startswith("_NAME"):
                    pass
                elif key.isdigit():
                    key = int(key)
                    value = float(value)
                    self.calibration_table.append((key, value))

            self.calibration_table = np.array(self.calibration_table)

        self.projection_name = self.mda["projection_name"].decode().strip()
        sublon = float(self.projection_name.split("(")[1][:-1])
        self.mda["projection_parameters"]["SSP_longitude"] = sublon
        self.platform = self._get_platform()
        self.is_segmented = self.mda["segment_sequence_number"] > 0
        self.area_id = filename_info.get("area", UNKNOWN_AREA)
        if self.area_id not in AREA_NAMES:
            self.area_id = UNKNOWN_AREA
        self.area = self._get_area_def()
        self.acq_time = self._get_acq_time()


[docs]
    def _get_platform(self):
        """Get the platform name.

        The platform is not specified explicitly in JMA HRIT files. For
        segmented data it is not even specified in the filename. But it
        can be derived indirectly from the projection name:

            GEOS(140.00): MTSAT-1R
            GEOS(140.25): MTSAT-1R    # TODO: Check if there is more...
            GEOS(140.70): Himawari-8
            GEOS(145.00): MTSAT-2

        See [MTSAT], section 3.1. Unfortunately Himawari-8 and 9 are not
        distinguishable using that method at the moment. From [HIMAWARI]:

        "HRIT/LRIT files have the same file naming convention in the same
        format in Himawari-8 and Himawari-9, so there is no particular
        difference."

        TODO: Find another way to distinguish Himawari-8 and 9.

        References:
        [MTSAT] http://www.data.jma.go.jp/mscweb/notice/Himawari7_e.html
        [HIMAWARI] http://www.data.jma.go.jp/mscweb/en/himawari89/space_segment/sample_hrit.html

        """
        try:
            return PLATFORMS[self.projection_name]
        except KeyError:
            logger.error("Unable to determine platform: Unknown projection "
                         'name "{}"'.format(self.projection_name))
            return UNKNOWN_PLATFORM



[docs]
    def _check_sensor_platform_consistency(self, sensor):
        """Make sure sensor and platform are consistent.

        Args:
            sensor (str) : Sensor name from YAML dataset definition

        Raises:
            ValueError if they don't match

        """
        ref_sensor = SENSORS.get(self.platform, None)
        if ref_sensor and not sensor == ref_sensor:
            logger.error("Sensor-Platform mismatch: {} is not a payload "
                         "of {}. Did you choose the correct reader?"
                         .format(sensor, self.platform))



[docs]
    def _get_line_offset(self):
        """Get line offset for the current segment.

        Read line offset from the file and adapt it to the current segment
        or half disk scan so that

            y(l) ~ l - loff

        because this is what get_geostationary_area_extent() expects.
        """
        # Get line offset from the file
        nlines = int(self.mda["number_of_lines"])
        loff = np.float32(self.mda["loff"])

        # Adapt it to the current segment
        if self.is_segmented:
            # loff in the file specifies the offset of the full disk image
            # centre (1375/2750 for VIS/IR)
            segment_number = self.mda["segment_sequence_number"] - 1
            loff -= (self.mda["total_no_image_segm"] - segment_number - 1) * nlines
        elif self.area_id in (NORTH_HEMIS, SOUTH_HEMIS):
            # loff in the file specifies the start line of the half disk image
            # in the full disk image
            loff = nlines - loff
        elif self.area_id == UNKNOWN_AREA:
            logger.error("Cannot compute line offset for unknown area")

        return loff



[docs]
    def _get_area_def(self):
        """Get the area definition of the band."""
        pdict = {
            "cfac": np.int32(self.mda["cfac"]),
            "lfac": np.int32(self.mda["lfac"]),
            "coff": np.float32(self.mda["coff"]),
            "loff": self._get_line_offset(),
            "ncols": int(self.mda["number_of_columns"]),
            "nlines": int(self.mda["number_of_lines"]),
            "scandir": "N2S",
            "a": float(self.mda["projection_parameters"]["a"]),
            "b": float(self.mda["projection_parameters"]["b"]),
            "h": float(self.mda["projection_parameters"]["h"]),
            "ssp_lon": float(self.mda["projection_parameters"]["SSP_longitude"]),
            "a_name": AREA_NAMES[self.area_id]["short"],
            "a_desc": AREA_NAMES[self.area_id]["long"],
            "p_id": "geosmsg"
        }
        area_extent = get_area_extent(pdict)
        return get_area_definition(pdict, area_extent)



[docs]
    def get_area_def(self, dsid):
        """Get the area definition of the band."""
        return self.area



[docs]
    def get_dataset(self, key, info):
        """Get the dataset designated by *key*."""
        res = super(HRITJMAFileHandler, self).get_dataset(key, info)

        # Filenames of segmented data is identical for MTSAT-1R, MTSAT-2
        # and Himawari-8/9. Make sure we have the correct reader for the data
        # at hand.
        self._check_sensor_platform_consistency(info["sensor"])

        # Calibrate and mask space pixels
        res = self._mask_space(self.calibrate(res, key["calibration"]))

        # Add scanline acquisition time
        res.coords["acq_time"] = ("y", self.acq_time)
        res.coords["acq_time"].attrs["long_name"] = "Scanline acquisition time"

        # Update attributes
        res.attrs.update(info)
        res.attrs["platform_name"] = self.platform
        res.attrs["orbital_parameters"] = {
            "projection_longitude": float(self.mda["projection_parameters"]["SSP_longitude"]),
            "projection_latitude": 0.,
            "projection_altitude": float(self.mda["projection_parameters"]["h"])}

        return res



[docs]
    def _mask_space(self, data):
        """Mask space pixels."""
        geomask = get_geostationary_mask(area=self.area)
        return data.where(geomask)



[docs]
    def _get_acq_time(self):
        r"""Get the acquisition times from the file.

        Acquisition times for a subset of scanlines are stored in the header
        as follows:

        b'LINE:=1\rTIME:=54365.022558\rLINE:=21\rTIME:=54365.022664\r...'

        Missing timestamps in between are computed using linear interpolation.
        """
        buf_b = np.frombuffer(self.mda["image_observation_time"],
                              dtype=image_observation_time)

        # Replace \r by \n before encoding, otherwise encoding will drop all
        # elements except the last one
        buf_s = b"".join(buf_b["times"]).replace(b"\r", b"\n").decode()

        # Split into key:=value pairs; then extract line number and timestamp
        splits = buf_s.strip().split("\n")
        lines_sparse = [int(s.split(":=")[1]) for s in splits[0::2]]
        times_sparse = [float(s.split(":=")[1]) for s in splits[1::2]]

        if self.platform == HIMAWARI8:
            # Only a couple of timestamps in the header, and only the first
            # and last are usable (duplicates inbetween).
            lines_sparse = [lines_sparse[0], lines_sparse[-1]]
            times_sparse = [times_sparse[0], times_sparse[-1]]

        # Compute missing timestamps using linear interpolation.
        lines = np.arange(lines_sparse[0], lines_sparse[-1]+1)
        times = np.interp(lines, lines_sparse, times_sparse)

        # Convert to np.datetime64
        times64 = mjd2datetime64(times)

        return times64



[docs]
    @staticmethod
    def _interp(arr, cal):
        return np.interp(arr.ravel(), cal[:, 0], cal[:, 1]).reshape(arr.shape)



[docs]
    def calibrate(self, data, calibration):
        """Calibrate the data."""
        tic = datetime.now()

        if calibration == "counts":
            return data
        if calibration == "radiance":
            raise NotImplementedError("Can't calibrate to radiance.")

        cal = self.calibration_table
        res = data.data.map_blocks(self._interp, cal, dtype=cal[:, 0].dtype)
        res = xr.DataArray(res,
                           dims=data.dims, attrs=data.attrs,
                           coords=data.coords)
        res = res.where(data < 65535)
        logger.debug("Calibration time " + str(datetime.now() - tic))
        return res


    @property
    def start_time(self):
        """Get start time of the scan."""
        if self._use_acquisition_time_as_start_time:
            return self.acq_time[0].astype(datetime)
        return self._start_time

    @property
    def end_time(self):
        """Get end time of the scan."""
        return self.acq_time[-1].astype(datetime)