"""RAPID array data reader for AMOCatlas.
This module provides functions to read and process data from the RAPID
(Rapid Climate Change) observing array located at 26°N in the Atlantic.
"""
from pathlib import Path
from typing import Union
import xarray as xr
# Import the modules used
from amocatlas import logger, utilities
from amocatlas.logger import log_info
from amocatlas.utilities import apply_defaults
from amocatlas.reader_utils import ReaderUtils
log = logger.log # Use the global logger
# Datasource identifier for automatic standardization
DATASOURCE_ID = "rapid26n"
# Default list of RAPID data files
RAPID_DEFAULT_SOURCE = "https://rapid.ac.uk/sites/default/files/rapid_data/"
RAPID_TRANSPORT_FILES = ["moc_transports.nc"]
RAPID_DEFAULT_FILES = [
"moc_transports.nc",
"moc_vertical.nc",
"ts_gridded.nc",
"2d_gridded.nc",
"meridional_transports.nc",
]
# Inline metadata dictionary
RAPID_METADATA = {
"description": "RAPID 26N transport estimates dataset",
"project": "RAPID-AMOC 26°N array",
"web_link": "https://rapid.ac.uk/rapidmoc",
"note": "Dataset accessed and processed via xarray",
}
# File-specific metadata placeholder
RAPID_FILE_METADATA = {
"moc_transports.nc": {
"data_product": "Layer transports - individual water mass transport components (thermocline, intermediate water, NADW, AABW, Ekman, Florida Straits)",
},
"moc_vertical.nc": {
"data_product": "Vertical streamfunction - overturning circulation streamfunction as function of depth and time",
},
"ts_gridded.nc": {
"data_product": "Gridded temperature and salinity - T/S profiles from moorings across the basin",
},
"2d_gridded.nc": {
"data_product": "Monthly velocity and hydrography fields - Conservative Temperature (CT), Absolute Salinity (SA), and velocities on regular grid",
},
"meridional_transports.nc": {
"data_product": "Heat and freshwater transports - AMOC strength, heat transport, freshwater transport, and overturning streamfunctions in density space",
},
}
# https://rapid.ac.uk/sites/default/files/rapid_data/ts_gridded.nc
# https://rapid.ac.uk/sites/default/files/rapid_data/moc_vertical.nc
# https://rapid.ac.uk/sites/default/files/rapid_data/moc_transports.nc
# https://rapid.ac.uk/sites/default/files/rapid_data/2d_gridded.nc
# https://rapid.ac.uk/sites/default/files/rapid_data/meridional_transports.nc
[docs]
@apply_defaults(RAPID_DEFAULT_SOURCE, RAPID_DEFAULT_FILES)
def read_rapid(
source: Union[str, Path, None],
file_list: Union[str, list[str]],
transport_only: bool = True,
data_dir: Union[str, Path, None] = None,
redownload: bool = False,
track_added_attrs: bool = False,
) -> Union[list[xr.Dataset], tuple[list[xr.Dataset], list[list[str]]]]:
"""Load the RAPID transport dataset from a URL or local file path into an xarray.Dataset.
Parameters
----------
source : str, optional
URL or local path to the NetCDF file(s).
Defaults to the RAPID data repository URL.
file_list : str or list of str, optional
Filename or list of filenames to process.
If None, will attempt to list files in the source directory.
transport_only : bool, optional
If True, restrict to transport files only.
data_dir : str, Path or None, optional
Optional local data directory.
redownload : bool, optional
If True, force redownload of the data.
track_added_attrs : bool, optional
If True, return tuple of (datasets, list_of_metadata_changes_per_dataset).
If False, return only datasets. Default is False.
Returns
-------
list[xr.Dataset] or tuple[list[xr.Dataset], list[dict]]
If track_added_attrs=False: List of loaded datasets with metadata.
If track_added_attrs=True: Tuple of (datasets, list of metadata changes per dataset).
Raises
------
ValueError
If the source is neither a valid URL nor a directory path.
FileNotFoundError
If no valid NetCDF files are found in the provided file list.
"""
log_info("Starting to read RAPID dataset")
# Load YAML metadata with fallback
global_metadata, yaml_file_metadata = ReaderUtils.load_array_metadata_with_fallback(
DATASOURCE_ID, RAPID_METADATA
)
# Use ReaderUtils for common operations
file_list = ReaderUtils.prepare_file_list(
file_list, RAPID_DEFAULT_FILES, RAPID_TRANSPORT_FILES, transport_only
)
local_data_dir = ReaderUtils.setup_data_directory(data_dir)
# Print information about files being loaded - use YAML metadata if available
netcdf_files = ReaderUtils.filter_netcdf_files(file_list)
display_file_metadata = (
yaml_file_metadata if yaml_file_metadata else RAPID_FILE_METADATA
)
ReaderUtils.print_loading_info(netcdf_files, DATASOURCE_ID, display_file_metadata)
datasets = []
added_attrs_per_dataset = []
for file in netcdf_files:
# RAPID-specific URL construction
download_url = (
f"{source.rstrip('/')}/{file}" if utilities.is_valid_url(source) else None
)
file_path = utilities.resolve_file_path(
file_name=file,
source=source,
download_url=download_url,
local_data_dir=local_data_dir,
redownload=redownload,
)
# Use ReaderUtils for consistent dataset loading and metadata
ds = ReaderUtils.safe_load_dataset(file_path)
# Get file-specific metadata from YAML or fallback to hardcoded
if file in yaml_file_metadata:
file_metadata = yaml_file_metadata[file]
else:
file_metadata = RAPID_FILE_METADATA.get(file, {})
# Apply variable mapping and coordinate metadata from YAML
if file in yaml_file_metadata and yaml_file_metadata[file]:
yaml_file_data = yaml_file_metadata[file]
# Variable mapping will be handled in standardization stage (Option A approach)
# Store mapping for later use but don't apply renaming here
var_mapping = yaml_file_data.get("variable_mapping", {})
# Apply coordinate metadata from YAML
# Since we're not renaming in reader, use original coordinate names
coord_metadata = yaml_file_data.get("coordinates", {})
for coord_name, coord_attrs in coord_metadata.items():
if coord_name in ds.coords:
ds[coord_name].attrs.update(coord_attrs)
# Apply variable metadata from YAML using original variable names
# (standardized names will get metadata applied during standardization)
var_metadata = yaml_file_data.get("variables", {})
for std_var_name, var_attrs in var_metadata.items():
# Find the original variable name that maps to this standardized name
orig_var_name = None
for orig, std in var_mapping.items():
if std == std_var_name:
orig_var_name = orig
break
# Apply metadata to original variable name if it exists in dataset
if orig_var_name and orig_var_name in ds.data_vars:
ds[orig_var_name].attrs.update(var_attrs)
if track_added_attrs:
ds, attr_changes = ReaderUtils.attach_metadata_with_tracking(
ds,
file,
file_path,
global_metadata,
yaml_file_metadata,
file_metadata,
DATASOURCE_ID,
track_added_attrs=True,
)
added_attrs_per_dataset.append(attr_changes)
else:
ds = ReaderUtils.attach_metadata_with_tracking(
ds,
file,
file_path,
global_metadata,
yaml_file_metadata,
file_metadata,
DATASOURCE_ID,
track_added_attrs=False,
)
datasets.append(ds)
# Use ReaderUtils for validation
ReaderUtils.validate_datasets_loaded(datasets, file_list)
if track_added_attrs:
return datasets, added_attrs_per_dataset
else:
return datasets