Source code for amocatlas.read_rapid

from pathlib import Path
from typing import Union

import xarray as xr

# Import the modules used
from amocatlas import logger, utilities
from amocatlas.logger import log_error, log_info, log_warning
from amocatlas.utilities import apply_defaults

log = logger.log  # Use the global logger

# Default list of RAPID data files
RAPID_DEFAULT_SOURCE = "https://rapid.ac.uk/sites/default/files/rapid_data/"
RAPID_TRANSPORT_FILES = ["moc_transports.nc"]
RAPID_DEFAULT_FILES = [
    "moc_transports.nc",
    "moc_vertical.nc",
    "ts_gridded.nc",
    "2d_gridded.nc",
    "meridional_transports.nc",
]

# Inline metadata dictionary
RAPID_METADATA = {
    "description": "RAPID 26N transport estimates dataset",
    "project": "RAPID-AMOC 26°N array",
    "web_link": "https://rapid.ac.uk/rapidmoc",
    "note": "Dataset accessed and processed via xarray",
}

# File-specific metadata placeholder
RAPID_FILE_METADATA = {
    "moc_transports.nc": {
        "data_product": "RAPID layer transport time series",
    },
    "moc_vertical.nc": {
        "data_product": "RAPID vertical streamfunction time series",
    },
    "ts_gridded.nc": {
        "data_product": "RAPID gridded temperature and salinity",
    },
    "2d_gridded.nc": {
        "data_product": "RAPID 2D gridded temperature and salinity",
    },
    "meridional_transports.nc": {
        "data_product": "RAPID meridional transport time series",
    },
}
# https://rapid.ac.uk/sites/default/files/rapid_data/ts_gridded.nc
# https://rapid.ac.uk/sites/default/files/rapid_data/moc_vertical.nc
# https://rapid.ac.uk/sites/default/files/rapid_data/moc_transports.nc
# https://rapid.ac.uk/sites/default/files/rapid_data/2d_gridded.nc
# https://rapid.ac.uk/sites/default/files/rapid_data/meridional_transports.nc


[docs] @apply_defaults(RAPID_DEFAULT_SOURCE, RAPID_DEFAULT_FILES) def read_rapid( source: Union[str, Path, None], file_list: Union[str, list[str]], transport_only: bool = True, data_dir: Union[str, Path, None] = None, redownload: bool = False, ) -> list[xr.Dataset]: """Load the RAPID transport dataset from a URL or local file path into an xarray.Dataset. Parameters ---------- source : str, optional URL or local path to the NetCDF file(s). Defaults to the RAPID data repository URL. file_list : str or list of str, optional Filename or list of filenames to process. If None, will attempt to list files in the source directory. transport_only : bool, optional If True, restrict to transport files only. data_dir : str, Path or None, optional Optional local data directory. redownload : bool, optional If True, force redownload of the data. Returns ------- xr.Dataset The loaded xarray dataset with basic inline metadata. Raises ------ ValueError If the source is neither a valid URL nor a directory path. FileNotFoundError If no valid NetCDF files are found in the provided file list. """ log_info("Starting to read RAPID dataset") if file_list is None: file_list = RAPID_DEFAULT_FILES if transport_only: file_list = RAPID_TRANSPORT_FILES if isinstance(file_list, str): file_list = [file_list] local_data_dir = Path(data_dir) if data_dir else utilities.get_default_data_dir() local_data_dir.mkdir(parents=True, exist_ok=True) datasets = [] for file in file_list: if not file.lower().endswith(".nc"): log_warning("Skipping non-NetCDF file: %s", file) continue download_url = ( f"{source.rstrip('/')}/{file}" if utilities._is_valid_url(source) else None ) file_path = utilities.resolve_file_path( file_name=file, source=source, download_url=download_url, local_data_dir=local_data_dir, redownload=redownload, ) try: log_info("Opening RAPID dataset: %s", file_path) ds = xr.open_dataset(file_path) except Exception as e: log_error("Failed to open NetCDF file: %s: %s", file_path, e) raise FileNotFoundError(f"Failed to open NetCDF file: {file_path}: {e}") file_metadata = RAPID_FILE_METADATA.get(file, {}) log_info("Attaching metadata to RAPID dataset from file: %s", file) utilities.safe_update_attrs( ds, { "source_file": file, "source_path": str(file_path), **RAPID_METADATA, **file_metadata, }, ) datasets.append(ds) if not datasets: log_error("No valid RAPID NetCDF files found in %s", file_list) raise FileNotFoundError(f"No valid RAPID NetCDF files found in {file_list}") log_info("Successfully loaded %d RAPID dataset(s)", len(datasets)) return datasets