"""AMOCatlas data readers: unified interface for AMOC observing arrays.
This module provides the main interface for loading data from multiple
Atlantic Meridional Overturning Circulation (AMOC) observing arrays.
It serves as the orchestrator that routes requests to specific array
readers and provides both sample and full dataset loading capabilities.
The module supports data from:
- RAPID (26°N)
- MOVE (16°N)
- OSNAP (Subpolar North Atlantic)
- SAMBA (34.5°S)
- MOCHA, 41°N, DSO, and FW2015 arrays
Main functions:
- load_dataset(): Load full datasets from any supported array
- load_sample_dataset(): Load small sample datasets for testing
"""
from pathlib import Path
from typing import Callable, List, Union
import warnings
import pandas as pd
import xarray as xr
from amocatlas import logger
from amocatlas.logger import log_info
from amocatlas.data_sources import (
read_move,
read_osnap,
read_osnap_2025,
read_rapid,
read_samba,
read_fw2015,
read_mocha,
read_41n,
read_dso,
read_calafat2025,
read_zheng2024,
read_47n,
read_fbc,
read_arcticgateway,
)
log = logger.log
# Dropbox location Public/linked_elsewhere/amocatlas_data/
server = "https://www.dropbox.com/scl/fo/4bjo8slq1krn5rkhbkyds/AM-EVfSHi8ro7u2y8WAcKyw?rlkey=16nqlykhgkwfyfeodkj274xpc&dl=0"
def _get_reader(array_name: str) -> Callable[..., List[xr.Dataset]]:
"""Return the reader function for the given array name.
Parameters
----------
array_name : str
The name of the observing array.
Returns
-------
function
Reader function corresponding to the given array name.
Raises
------
ValueError
If an unknown array name is provided.
"""
readers = {
"move": read_move,
"rapid": read_rapid,
"osnap": read_osnap,
"osnap_2025": read_osnap_2025,
"samba": read_samba,
"fw2015": read_fw2015,
"mocha": read_mocha,
"41n": read_41n,
"dso": read_dso,
"calafat2025": read_calafat2025,
"zheng2024": read_zheng2024,
"47n": read_47n,
"fbc": read_fbc,
"arcticgateway": read_arcticgateway,
}
try:
return readers[array_name.lower()]
except KeyError:
raise ValueError(
f"Unknown array name: {array_name}. Valid options are: {list(readers.keys())}",
) from None
[docs]
def load_sample_dataset(array_name: str = "rapid") -> xr.Dataset:
"""Load a sample dataset for quick testing.
.. deprecated::
This function is deprecated and will be removed in a future version.
Use the new intuitive API instead: :mod:`amocatlas.read` (e.g., ``amocatlas.read.rapid()``).
Currently supports:
- 'rapid' : loads the 'RAPID_26N_TRANSPORT.nc' file
Parameters
----------
array_name : str, optional
The name of the observing array to load. Default is 'rapid'.
Returns
-------
xr.Dataset
A single xarray Dataset from the sample file.
Raises
------
ValueError
If the array_name is not recognised.
"""
warnings.warn(
"load_sample_dataset() is deprecated and will be removed in a future version. "
"Use the new intuitive API instead: amocatlas.read.rapid()",
DeprecationWarning,
stacklevel=2,
)
if array_name.lower() == "rapid":
sample_file = "moc_transports.nc"
datasets = load_dataset(
array_name=array_name,
file_list=sample_file,
transport_only=True,
)
if not datasets:
raise FileNotFoundError(
f"No datasets were loaded for sample file: {sample_file}",
)
return datasets[0]
raise ValueError(
f"Sample dataset for array '{array_name}' is not defined. "
"Currently only 'rapid' is supported.",
)
[docs]
def load_dataset(
array_name: str,
source: str = None,
file_list: Union[str | list[str]] = None,
transport_only: bool = True,
data_dir: Union[str, Path, None] = None,
redownload: bool = False,
) -> list[xr.Dataset]:
"""Load raw datasets from a selected AMOC observing array.
.. deprecated::
This function is deprecated and will be removed in a future version.
Use the new intuitive API instead: :mod:`amocatlas.read` (e.g., ``amocatlas.read.rapid()``).
Parameters
----------
array_name : str
The name of the observing array to load. Options are:
- 'move' : MOVE 16N array
- 'rapid' : RAPID 26N array
- 'osnap' : OSNAP array (2014-2022, configurable version via main reader)
- 'osnap_2025' : OSNAP array (2014-2022, dedicated 2025 reader function)
- 'samba' : SAMBA 34S array
- 'fw2015' : FW2015 array
- '41n' : 41N array
- 'dso' : DSO array
- 'calafat2025' : CALAFAT2025 array
- 'zheng2024' : ZHENG2024 array
- '47n' : 47N array
- 'fbc' : Faroe Bank Channel overflow array
- 'arcticgateway' : ARCTIC Gateway array
source : str, optional
URL or local path to the data source.
If None, the reader-specific default source will be used.
file_list : str or list of str, optional
Filename or list of filenames to process.
If None, the reader-specific default files will be used.
transport_only : bool, optional
If True, restrict to transport files only.
data_dir : str, optional
Local directory for downloaded files.
redownload : bool, optional
If True, force redownload of the data.
Returns
-------
list of xarray.Dataset
List of datasets loaded from the specified array.
Raises
------
ValueError
If an unknown array name is provided.
"""
warnings.warn(
"load_dataset() is deprecated and will be removed in a future version. "
f"Use the new intuitive API instead: amocatlas.read.{array_name.lower()}()",
DeprecationWarning,
stacklevel=2,
)
if logger.LOGGING_ENABLED:
logger.setup_logger(array_name=array_name)
# Use logger globally
log_info(f"Loading dataset for array: {array_name}")
reader = _get_reader(array_name)
datasets = reader(
source=source,
file_list=file_list,
transport_only=transport_only,
data_dir=data_dir,
redownload=redownload,
)
log_info(f"Successfully loaded {len(datasets)} dataset(s) for array: {array_name}")
_summarise_datasets(datasets, array_name)
return datasets
def _summarise_datasets(datasets: list, array_name: str) -> None:
"""Print and log a summary of loaded datasets."""
summary_lines = []
summary_lines.append(f"Summary for array '{array_name}':")
summary_lines.append(f"Total datasets loaded: {len(datasets)}\n")
for idx, ds in enumerate(datasets, start=1):
summary_lines.append(f"Dataset {idx}:")
# Filename from metadata
source_file = ds.attrs.get("source_file", "Unknown")
summary_lines.append(f" Source file: {source_file}")
# Time coverage
time_var = ds.get("TIME")
if time_var is not None:
time_clean = pd.to_datetime(time_var.values)
time_clean = time_clean[~pd.isna(time_clean)]
if len(time_clean) > 0:
time_start = time_clean[0].strftime("%Y-%m-%d")
time_end = time_clean[-1].strftime("%Y-%m-%d")
summary_lines.append(f" Time coverage: {time_start} to {time_end}")
else:
summary_lines.append(" Time coverage: no valid time values found")
# Dimensions
summary_lines.append(" Dimensions:")
for dim, size in ds.sizes.items():
summary_lines.append(f" - {dim}: {size}")
# Variables
summary_lines.append(" Variables:")
for var in ds.data_vars:
shape = ds[var].shape
summary_lines.append(f" - {var}: shape {shape}")
summary_lines.append("") # empty line between datasets
summary = "\n".join(summary_lines)
# Print to console
print(summary)
# Write to log
log_info("\n" + summary)