Source code for satpy.tests.reader_tests.test_netcdf_utils

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (c) 2017-2020 Satpy developers
#
# This file is part of satpy.
#
# satpy is free software: you can redistribute it and/or modify it under the
# terms of the GNU General Public License as published by the Free Software
# Foundation, either version 3 of the License, or (at your option) any later
# version.
#
# satpy is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along with
# satpy.  If not, see <http://www.gnu.org/licenses/>.
"""Module for testing the satpy.readers.netcdf_utils module."""

import os
import unittest

import numpy as np
import pytest

try:
    from satpy.readers.netcdf_utils import NetCDF4FileHandler
except ImportError:
    # fake the import so we can at least run the tests in this file
    NetCDF4FileHandler = object  # type: ignore


[docs] class FakeNetCDF4FileHandler(NetCDF4FileHandler): """Swap-in NetCDF4 File Handler for reader tests to use.""" def __init__(self, filename, filename_info, filetype_info, auto_maskandscale=False, xarray_kwargs=None, cache_var_size=0, cache_handle=False, extra_file_content=None): """Get fake file content from 'get_test_content'.""" # unused kwargs from the real file handler del auto_maskandscale del xarray_kwargs del cache_var_size del cache_handle if NetCDF4FileHandler is object: raise ImportError("Base 'NetCDF4FileHandler' could not be " "imported.") super(NetCDF4FileHandler, self).__init__(filename, filename_info, filetype_info) self.file_content = self.get_test_content(filename, filename_info, filetype_info) if extra_file_content: self.file_content.update(extra_file_content)
[docs] def get_test_content(self, filename, filename_info, filetype_info): """Mimic reader input file content. Args: filename (str): input filename filename_info (dict): Dict of metadata pulled from filename filetype_info (dict): Dict of metadata from the reader's yaml config for this file type Returns: dict of file content with keys like: - 'dataset' - '/attr/global_attr' - 'dataset/attr/global_attr' - 'dataset/shape' - 'dataset/dimensions' - '/dimension/my_dim' """ raise NotImplementedError("Fake File Handler subclass must implement 'get_test_content'")
[docs] class TestNetCDF4FileHandler(unittest.TestCase): """Test NetCDF4 File Handler Utility class."""
[docs] def setUp(self): """Create a test NetCDF4 file.""" from netCDF4 import Dataset with Dataset("test.nc", "w") as nc: # Create dimensions nc.createDimension("rows", 10) nc.createDimension("cols", 100) # Create Group g1 = nc.createGroup("test_group") # Add datasets ds1_f = g1.createVariable("ds1_f", np.float32, dimensions=("rows", "cols")) ds1_f[:] = np.arange(10. * 100).reshape((10, 100)) ds1_i = g1.createVariable("ds1_i", np.int32, dimensions=("rows", "cols")) ds1_i[:] = np.arange(10 * 100).reshape((10, 100)) ds2_f = nc.createVariable("ds2_f", np.float32, dimensions=("rows", "cols")) ds2_f[:] = np.arange(10. * 100).reshape((10, 100)) ds2_i = nc.createVariable("ds2_i", np.int32, dimensions=("rows", "cols")) ds2_i[:] = np.arange(10 * 100).reshape((10, 100)) ds2_s = nc.createVariable("ds2_s", np.int8, dimensions=("rows",)) ds2_s[:] = np.arange(10) ds2_sc = nc.createVariable("ds2_sc", np.int8, dimensions=()) ds2_sc[:] = 42 # Add attributes nc.test_attr_str = "test_string" nc.test_attr_int = 0 nc.test_attr_float = 1.2 nc.test_attr_str_arr = np.array(b"test_string2") g1.test_attr_str = "test_string" g1.test_attr_int = 0 g1.test_attr_float = 1.2 for d in [ds1_f, ds1_i, ds2_f, ds2_i]: d.test_attr_str = "test_string" d.test_attr_int = 0 d.test_attr_float = 1.2
[docs] def tearDown(self): """Remove the previously created test file.""" os.remove("test.nc")
[docs] def test_all_basic(self): """Test everything about the NetCDF4 class.""" import xarray as xr from satpy.readers.netcdf_utils import NetCDF4FileHandler file_handler = NetCDF4FileHandler("test.nc", {}, {}) assert file_handler["/dimension/rows"] == 10 assert file_handler["/dimension/cols"] == 100 for ds in ("test_group/ds1_f", "test_group/ds1_i", "ds2_f", "ds2_i"): assert file_handler[ds].dtype == (np.float32 if ds.endswith("f") else np.int32) assert file_handler[ds + "/shape"] == (10, 100) assert file_handler[ds + "/dimensions"] == ("rows", "cols") assert file_handler[ds + "/attr/test_attr_str"] == "test_string" assert file_handler[ds + "/attr/test_attr_int"] == 0 assert file_handler[ds + "/attr/test_attr_float"] == 1.2 test_group = file_handler["test_group"] assert test_group["ds1_i"].shape == (10, 100) assert test_group["ds1_i"].dims == ("rows", "cols") assert file_handler["/attr/test_attr_str"] == "test_string" assert file_handler["/attr/test_attr_str_arr"] == "test_string2" assert file_handler["/attr/test_attr_int"] == 0 assert file_handler["/attr/test_attr_float"] == 1.2 global_attrs = { "test_attr_str": "test_string", "test_attr_str_arr": "test_string2", "test_attr_int": 0, "test_attr_float": 1.2 } assert file_handler["/attrs"] == global_attrs assert isinstance(file_handler.get("ds2_f")[:], xr.DataArray) assert file_handler.get("fake_ds") is None assert file_handler.get("fake_ds", "test") == "test" assert ("ds2_f" in file_handler) is True assert ("fake_ds" in file_handler) is False assert file_handler.file_handle is None assert file_handler["ds2_sc"] == 42
[docs] def test_listed_variables(self): """Test that only listed variables/attributes area collected.""" from satpy.readers.netcdf_utils import NetCDF4FileHandler filetype_info = { "required_netcdf_variables": [ "test_group/attr/test_attr_str", "attr/test_attr_str", ] } file_handler = NetCDF4FileHandler("test.nc", {}, filetype_info) assert len(file_handler.file_content) == 2 assert "test_group/attr/test_attr_str" in file_handler.file_content assert "attr/test_attr_str" in file_handler.file_content
[docs] def test_listed_variables_with_composing(self): """Test that composing for listed variables is performed.""" from satpy.readers.netcdf_utils import NetCDF4FileHandler filetype_info = { "required_netcdf_variables": [ "test_group/{some_parameter}/attr/test_attr_str", "test_group/attr/test_attr_str", ], "variable_name_replacements": { "some_parameter": [ "ds1_f", "ds1_i", ], "another_parameter": [ "not_used" ], } } file_handler = NetCDF4FileHandler("test.nc", {}, filetype_info) assert len(file_handler.file_content) == 3 assert "test_group/ds1_f/attr/test_attr_str" in file_handler.file_content assert "test_group/ds1_i/attr/test_attr_str" in file_handler.file_content assert not any("not_used" in var for var in file_handler.file_content) assert not any("some_parameter" in var for var in file_handler.file_content) assert not any("another_parameter" in var for var in file_handler.file_content) assert "test_group/attr/test_attr_str" in file_handler.file_content
[docs] def test_caching(self): """Test that caching works as intended.""" from satpy.readers.netcdf_utils import NetCDF4FileHandler h = NetCDF4FileHandler("test.nc", {}, {}, cache_var_size=1000, cache_handle=True) assert h.file_handle is not None assert h.file_handle.isopen() assert sorted(h.cached_file_content.keys()) == ["ds2_s", "ds2_sc"] # with caching, these tests access different lines than without np.testing.assert_array_equal(h["ds2_s"], np.arange(10)) np.testing.assert_array_equal(h["test_group/ds1_i"], np.arange(10 * 100).reshape((10, 100))) # check that root variables can still be read from cached file object, # even if not cached themselves np.testing.assert_array_equal( h["ds2_f"], np.arange(10. * 100).reshape((10, 100))) h.__del__() assert not h.file_handle.isopen()
[docs] def test_filenotfound(self): """Test that error is raised when file not found.""" from satpy.readers.netcdf_utils import NetCDF4FileHandler # NOTE: Some versions of NetCDF C report unknown file format on Windows with pytest.raises(IOError, match=".*(No such file or directory|Unknown file format).*"): NetCDF4FileHandler("/thisfiledoesnotexist.nc", {}, {})
[docs] def test_get_and_cache_npxr_is_xr(self): """Test that get_and_cache_npxr() returns xr.DataArray.""" import xarray as xr from satpy.readers.netcdf_utils import NetCDF4FileHandler file_handler = NetCDF4FileHandler("test.nc", {}, {}, cache_handle=True) data = file_handler.get_and_cache_npxr("test_group/ds1_f") assert isinstance(data, xr.DataArray)
[docs] def test_get_and_cache_npxr_data_is_cached(self): """Test that the data are cached when get_and_cache_npxr() is called.""" from satpy.readers.netcdf_utils import NetCDF4FileHandler file_handler = NetCDF4FileHandler("test.nc", {}, {}, cache_handle=True) data = file_handler.get_and_cache_npxr("test_group/ds1_f") # Delete the dataset from the file content dict, it should be available from the cache del file_handler.file_content["test_group/ds1_f"] data2 = file_handler.get_and_cache_npxr("test_group/ds1_f") assert np.all(data == data2)
[docs] class TestNetCDF4FsspecFileHandler: """Test the remote reading class."""
[docs] def test_default_to_netcdf4_lib(self): """Test that the NetCDF4 backend is used by default.""" import os import tempfile import h5py from satpy.readers.netcdf_utils import NetCDF4FsspecFileHandler with tempfile.TemporaryDirectory() as tmpdir: # Create an empty HDF5 fname = os.path.join(tmpdir, "test.nc") fid = h5py.File(fname, "w") fid.close() fh = NetCDF4FsspecFileHandler(fname, {}, {}) assert fh._use_h5netcdf is False
[docs] def test_use_h5netcdf_for_file_not_accessible_locally(self): """Test that h5netcdf is used for files that are not accesible locally.""" from unittest.mock import patch fname = "s3://bucket/object.nc" with patch("h5netcdf.File") as h5_file: with patch("satpy.readers.netcdf_utils.open_file_or_filename"): from satpy.readers.netcdf_utils import NetCDF4FsspecFileHandler fh = NetCDF4FsspecFileHandler(fname, {}, {}) h5_file.assert_called_once() assert fh._use_h5netcdf
NC_ATTRS = { "standard_name": "test_data", "scale_factor": 0.01, "add_offset": 0}
[docs] def test_get_data_as_xarray_netcdf4(tmp_path): """Test getting xr.DataArray from netcdf4 variable.""" import numpy as np from satpy.readers.netcdf_utils import get_data_as_xarray data = np.array([1, 2, 3]) fname = tmp_path / "test.nc" dset = _write_test_netcdf4(fname, data) res = get_data_as_xarray(dset["test_data"]) np.testing.assert_equal(res.data, data) assert res.attrs == NC_ATTRS
[docs] def test_get_data_as_xarray_scalar_netcdf4(tmp_path): """Test getting scalar xr.DataArray from netcdf4 variable.""" import numpy as np from satpy.readers.netcdf_utils import get_data_as_xarray data = 1 fname = tmp_path / "test.nc" dset = _write_test_netcdf4(fname, data) res = get_data_as_xarray(dset["test_data"]) np.testing.assert_equal(res.data, np.array(data)) assert res.attrs == NC_ATTRS
[docs] def _write_test_netcdf4(fname, data): import netCDF4 as nc dset = nc.Dataset(fname, "w") try: dset.createDimension("y", data.size) dims = ("y",) except AttributeError: dims = () var = dset.createVariable("test_data", "uint8", dims) var[:] = data var.setncatts(NC_ATTRS) # Turn off automatic scale factor and offset handling dset.set_auto_maskandscale(False) return dset
[docs] def test_get_data_as_xarray_h5netcdf(tmp_path): """Test getting xr.DataArray from h5netcdf variable.""" import numpy as np from satpy.readers.netcdf_utils import get_data_as_xarray data = np.array([1, 2, 3]) fname = tmp_path / "test.nc" fid = _write_test_h5netcdf(fname, data) res = get_data_as_xarray(fid["test_data"]) np.testing.assert_equal(res.data, data) assert res.attrs == NC_ATTRS
[docs] def _write_test_h5netcdf(fname, data): import h5netcdf fid = h5netcdf.File(fname, "w") try: fid.dimensions = {"y": data.size} dims = ("y",) except AttributeError: dims = () var = fid.create_variable("test_data", dims, "uint8", data=data) for key in NC_ATTRS: var.attrs[key] = NC_ATTRS[key] return fid
[docs] def test_get_data_as_xarray_scalar_h5netcdf(tmp_path): """Test getting xr.DataArray from h5netcdf variable.""" import numpy as np from satpy.readers.netcdf_utils import get_data_as_xarray data = 1 fname = tmp_path / "test.nc" fid = _write_test_h5netcdf(fname, data) res = get_data_as_xarray(fid["test_data"]) np.testing.assert_equal(res.data, np.array(data)) assert res.attrs == NC_ATTRS