"""Sanchez-Franks Satellite proxy for the AMOC at 26N data reader for AMOCatlas.
This module provides functions to read and process satellite proxy transport data for 26N
from Sanchez-Franks et al. (2021). This dataset provides a satellite reconstruction of the AMOC transport
at 26N based on satellite altimetry. It also includes the upper-mid-ocean and gulf stream components.
The components are derived through a dynamically based method.
"""
from pathlib import Path
from typing import Union
import numpy as np
import xarray as xr
# Import the modules used
from amocatlas import logger, utilities
from amocatlas.logger import log_error, log_info, log_warning, log_debug
from amocatlas.utilities import apply_defaults
from amocatlas.reader_utils import ReaderUtils
log = logger.log # Use the global logger
# Datasource identifier for automatic standardization
DATASOURCE_ID = "sf2021"
# Default list of SF2021 (Sanchez-Franks 2021) data files
SF2021_DEFAULT_FILES = ["altimetry_moc_transport_1993_2020_18mos_smoothed.nc"]
SF2021_TRANSPORT_FILES = ["altimetry_moc_transport_1993_2020_18mos_smoothed.nc"]
SF2021_DEFAULT_SOURCE = "https://zenodo.org/records/18941523/files/"
SF2021_METADATA = {
"project": "A satellite reconstruction of the AMOC transport at 26N",
"weblink": "https://zenodo.org/records/18941523",
"comment": "Dataset accessed and processed via http://github.com/AMOCcommunity/amocatlas",
}
SF2021_FILE_METADATA = {
"altimetry_moc_transport_1993_2020_18mos_smoothed.nc": {
"data_product": "A satellite reconstruction of the AMOC transport at 26N",
}
}
# Metadata for time coordinate
_TIME_METADATA = {
"units": "seconds since 1970-01-01T00:00:00Z",
"long_name": "Time elapsed since 1970-01-01T00:00:00Z",
"standard_name": "time",
"calendar": "gregorian",
"vocabulary": "http://vocab.nerc.ac.uk/collection/OG1/current/TIME/",
}
def _normalize_sf2021_time_coordinate(
ds: xr.Dataset, source_file: str = None
) -> xr.Dataset:
"""Convert SF2021 TIME coordinate from days since 0000-01-01 to datetime64[ns].
Parameters
----------
ds : xr.Dataset
Dataset with sat_time or TIME coordinate as float (days since 0000-01-01)
source_file : str, optional
Source filename (currently unused, kept for API compatibility)
Returns
-------
xr.Dataset
Dataset with time coordinate converted to datetime64[ns]
"""
# Find time variable (check raw name first, then final name)
time_var = next((var for var in ["sat_time", "TIME"] if var in ds.coords), None)
if not time_var or ds[time_var].dtype.kind not in ["f", "i"]:
log_debug(
f"Skipping TIME normalization - {time_var or 'TIME'} not found or not numeric"
)
return ds
try:
# Convert days since 0000-01-01 to datetime64[ns] without using year 0 in ns resolution.
# Decompose into integer days and fractional nanoseconds relative to 1970-01-01.
time_values = np.asarray(ds[time_var].values, dtype=np.float64)
epoch_days = 719528.0 # Days between 0000-01-01 and 1970-01-01 in proleptic Gregorian calendar.
relative_days = time_values - epoch_days
whole_days = np.floor(relative_days).astype(np.int64)
fractional_ns = np.rint((relative_days - whole_days) * 86400 * 1e9).astype(
np.int64
)
time_datetime = (
np.datetime64("1970-01-01", "ns")
+ whole_days.astype("timedelta64[D]")
+ fractional_ns.astype("timedelta64[ns]")
).astype("datetime64[ns]")
# Use assign_coords to properly set dimension coordinate
ds = ds.assign_coords({time_var: time_datetime})
ds[time_var].attrs = _TIME_METADATA
log_debug(f"Converted SF2021 {time_var} from days to datetime64[ns]")
except (ValueError, TypeError, OverflowError) as e:
log_warning(f"Failed to convert SF2021 TIME coordinate: {e}")
return ds
[docs]
@apply_defaults(SF2021_DEFAULT_SOURCE, SF2021_DEFAULT_FILES)
def read_sf2021(
source: Union[str, Path, None],
file_list: Union[str, list[str]],
transport_only: bool = True,
data_dir: Union[str, Path, None] = None,
redownload: bool = False,
track_added_attrs: bool = False,
) -> list[xr.Dataset]:
"""Load the SF2021 transport datasets from a URL or local file path into xarray Datasets.
Parameters
----------
source : str, optional
Local path to the data directory (remote source is handled per-file).
file_list : str or list of str, optional
Filename or list of filenames to process.
Defaults to SF2021_DEFAULT_FILES.
transport_only : bool, optional
If True, restrict to transport files only.
data_dir : str, Path or None, optional
Optional local data directory.
redownload : bool, optional
If True, force redownload of the data.
track_added_attrs : bool, optional
If True, track which attributes were added during metadata enrichment.
Returns
-------
list of xr.Dataset
List of loaded xarray datasets with basic inline and file-specific metadata.
Raises
------
ValueError
If no source is provided for a file and no default URL mapping is found.
FileNotFoundError
If the file cannot be downloaded or does not exist locally.
"""
log.info("Starting to read SF2021 dataset")
# Load YAML metadata with fallback
global_metadata, yaml_file_metadata = ReaderUtils.load_array_metadata_with_fallback(
DATASOURCE_ID, SF2021_METADATA
)
# Ensure file_list has a default
if file_list is None:
file_list = SF2021_DEFAULT_FILES
if transport_only:
file_list = SF2021_TRANSPORT_FILES
if isinstance(file_list, str):
file_list = [file_list]
# Determine the local storage path
local_data_dir = Path(data_dir) if data_dir else utilities.get_default_data_dir()
local_data_dir.mkdir(parents=True, exist_ok=True)
# Print information about files being loaded
ReaderUtils.print_loading_info(file_list, DATASOURCE_ID, SF2021_FILE_METADATA)
datasets = []
added_attrs_per_dataset = [] if track_added_attrs else None
for file in file_list:
if not (file.lower().endswith(".nc")):
log_warning("Skipping unsupported file type : %s", file)
continue
download_url = (
f"{source.rstrip('/')}/{file}" if utilities.is_valid_url(source) else None
)
file_path = utilities.resolve_file_path(
file_name=file,
source=source,
download_url=download_url,
local_data_dir=local_data_dir,
redownload=redownload,
)
# Open dataset
if file.lower().endswith(".nc"):
# Use ReaderUtils for consistent dataset loading
ds = ReaderUtils.safe_load_dataset(file_path)
# Attach metadata
# Attach metadata with optional tracking
if track_added_attrs:
ds, attr_changes = ReaderUtils.attach_metadata_with_tracking(
ds,
file,
file_path,
global_metadata,
yaml_file_metadata,
SF2021_FILE_METADATA,
DATASOURCE_ID,
track_added_attrs=True,
)
added_attrs_per_dataset.append(attr_changes)
else:
ds = ReaderUtils.attach_metadata_with_tracking(
ds,
file,
file_path,
global_metadata,
yaml_file_metadata,
SF2021_FILE_METADATA,
DATASOURCE_ID,
track_added_attrs=False,
)
# Normalize SF2021 TIME coordinate AFTER metadata attachment
ds = _normalize_sf2021_time_coordinate(ds, source_file=file)
else:
raise ValueError(
f"Unsupported file type for {file}. Only .nc files are supported."
)
datasets.append(ds)
if not datasets:
log_error("No valid SF2021 files in %s", file_list)
raise FileNotFoundError(f"No valid data files found in {file_list}")
log_info("Successfully loaded %d SF2021 dataset(s)", len(datasets))
if track_added_attrs:
return datasets, added_attrs_per_dataset
else:
return datasets