Source code for satpy.demo._google_cloud_platform

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (c) 2019 Satpy developers
#
# This file is part of satpy.
#
# satpy is free software: you can redistribute it and/or modify it under the
# terms of the GNU General Public License as published by the Free Software
# Foundation, either version 3 of the License, or (at your option) any later
# version.
#
# satpy is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along with
# satpy.  If not, see <http://www.gnu.org/licenses/>.

import logging
import os
from urllib.error import URLError
from urllib.request import urlopen

try:
    import gcsfs
except ImportError:
    gcsfs = None

LOG = logging.getLogger(__name__)


[docs] def is_google_cloud_instance(): """Check if we are on a GCP virtual machine.""" try: return urlopen("http://metadata.google.internal").headers.get("Metadata-Flavor") == "Google" # nosec except URLError: return False
[docs] def get_bucket_files(glob_pattern, base_dir, force=False, pattern_slice=None): """Download files from Google Cloud Storage. Args: glob_pattern (str or list): Glob pattern string or series of patterns used to search for on Google Cloud Storage. The pattern should include the "gs://" protocol prefix. If a list of lists, then the results of each sublist pattern are concatenated and the result is treated as one pattern result. This is important for things like ``pattern_slice`` and complicated glob patterns not supported by GCP. base_dir (str): Root directory to place downloaded files on the local system. force (bool): Force re-download of data regardless of its existence on the local system. Warning: May delete non-demo files stored in download directory. pattern_slice (slice): Slice object to limit the number of files returned by each glob pattern. """ if pattern_slice is None: pattern_slice = slice(None) if gcsfs is None: raise RuntimeError("Missing 'gcsfs' dependency for GCS download.") if not os.path.isdir(base_dir): # it is the caller's responsibility to make this raise OSError("Directory does not exist: {}".format(base_dir)) if isinstance(glob_pattern, str): glob_pattern = [glob_pattern] fs = gcsfs.GCSFileSystem(token="anon") # nosec filenames = [] for gp in glob_pattern: # handle multiple glob patterns being treated as one pattern # for complicated patterns that GCP can't handle if isinstance(gp, str): glob_results = list(fs.glob(gp)) else: # flat list of results glob_results = [fn for pat in gp for fn in fs.glob(pat)] filenames.extend(_download_gcs_files(glob_results[pattern_slice], fs, base_dir, force)) if not filenames: raise OSError("No files could be found or downloaded.") return filenames
[docs] def _download_gcs_files(globbed_files, fs, base_dir, force): filenames = [] for fn in globbed_files: ondisk_fn = os.path.basename(fn) ondisk_pathname = os.path.join(base_dir, ondisk_fn) filenames.append(ondisk_pathname) if force and os.path.isfile(ondisk_pathname): os.remove(ondisk_pathname) elif os.path.isfile(ondisk_pathname): LOG.info("Found existing: {}".format(ondisk_pathname)) continue LOG.info("Downloading: {}".format(ondisk_pathname)) fs.get("gs://" + fn, ondisk_pathname) return filenames