"""Frajka-Williams 2015 dataset reader for AMOCatlas.
This module provides functions to read and process data from the
Frajka-Williams et al. (2015) AMOC proxy dataset. This dataset provides
a reconstruction of AMOC variability based on sea surface height and wind
stress observations, extending the observational record beyond direct
mooring observations.
"""
from pathlib import Path
from typing import Union
import xarray as xr
import scipy.io
import pandas as pd
import numpy as np
from amocatlas import logger, utilities
from amocatlas.logger import log_error, log_info, log_warning
from amocatlas.utilities import apply_defaults
from amocatlas.reader_utils import ReaderUtils
log = logger.log # Use global logger
# Datasource identifier for automatic standardization
DATASOURCE_ID = "fw2015"
# Default file list
FW2015_DEFAULT_FILES = [
"MOCproxy_for_figshare_v1.mat",
]
FW2015_TRANSPORT_FILES = ["MOCproxy_for_figshare_v1.mat"]
# Mapping of filenames to download URLs
FW2015_FILE_URLS = {
"README.txt": "https://figshare.com/ndownloader/files/3369791?private_link=281b3e9c8abba860d553",
"MOCproxy_for_figshare_v1.mat": "https://figshare.com/ndownloader/files/3369779",
}
# General Metadata (global for FW2015)
FW2015_METADATA = {
"project": "Estimating the Atlantic overturning at 26°N using satellite altimetry and cable measurements",
"doi": "http://dx.doi.org/10.1002/2015GL063220",
}
# File-specific metadata (placeholder, ready to extend)
FW2015_FILE_METADATA = {
"MOCproxy_for_figshare_v1.mat": {
"data_product": "Time series of MOC",
},
}
[docs]
@apply_defaults(None, FW2015_DEFAULT_FILES)
def read_fw2015(
source: Union[str, Path, None],
file_list: Union[str, list[str]],
transport_only: bool = True,
data_dir: Union[str, Path, None] = None,
redownload: bool = False,
track_added_attrs: bool = False,
) -> list[xr.Dataset]:
"""Load the FW2015 transport datasets from a URL or local file path into xarray Datasets.
Parameters
----------
source : str, optional
Local path to the data directory (remote source is handled per-file).
file_list : str or list of str, optional
Filename or list of filenames to process.
Defaults to FW2015_DEFAULT_FILES.
transport_only : bool, optional
If True, restrict to transport files only.
data_dir : str, Path or None, optional
Optional local data directory.
redownload : bool, optional
If True, force redownload of the data.
track_added_attrs : bool, optional
If True, track which attributes were added during metadata enrichment.
Returns
-------
list of xr.Dataset
List of loaded xarray datasets with basic inline and file-specific metadata.
Raises
------
ValueError
If no source is provided for a file and no default URL mapping is found.
FileNotFoundError
If the file cannot be downloaded or does not exist locally.
"""
log_info("Starting to read FW2015 dataset")
# Load YAML metadata with fallback
global_metadata, yaml_file_metadata = ReaderUtils.load_array_metadata_with_fallback(
DATASOURCE_ID, FW2015_METADATA
)
# ensure file_list has a default
if file_list is None:
file_list = FW2015_DEFAULT_FILES
if transport_only:
file_list = FW2015_TRANSPORT_FILES
if isinstance(file_list, str):
file_list = [file_list]
# Determine the local storage path
local_data_dir = Path(data_dir) if data_dir else utilities.get_default_data_dir()
local_data_dir.mkdir(parents=True, exist_ok=True)
# Print information about files being loaded
ReaderUtils.print_loading_info(file_list, DATASOURCE_ID, FW2015_FILE_METADATA)
datasets = []
added_attrs_per_dataset = [] if track_added_attrs else None
for file in file_list:
if not (file.lower().endswith(".txt") or file.lower().endswith(".mat")):
log_warning("Skipping unsupported file type: %s", file)
continue
download_url = FW2015_FILE_URLS.get(file)
if not download_url:
log_error("No download URL defined for FW2015 file: %s", file)
raise FileNotFoundError(f"No download URL defined for FW2015 file {file}")
file_path = utilities.resolve_file_path(
file_name=file,
source=source,
download_url=download_url,
local_data_dir=local_data_dir,
redownload=redownload,
)
# open dataset
try:
log.info("Opening fw2015 file: %s", file_path)
mat_data = scipy.io.loadmat(
file_path, squeeze_me=True, struct_as_record=False
)
recon = mat_data.get("recon")
mocgrid = mat_data.get("mocgrid")
time = recon.time # time in decimal years
# Use original MATLAB field names (renaming will happen in standardization)
# Note: time is used as coordinate, not as a data variable
variables = {
"mocproxy": recon.mocproxy,
"ek": recon.ek,
"h1umo": recon.h1umo, # Original name, will be renamed to SSHA in standardization
"gs": recon.gs,
"umoproxy": recon.umoproxy,
"moc": mocgrid.moc, # Grid variables use original names too
"ek_grid": mocgrid.ek, # Use lowercase with underscore for consistency
"gs_grid": mocgrid.gs,
"lnadw": mocgrid.lnadw,
"umo": mocgrid.umo,
"unadw": mocgrid.unadw,
}
# Convert decimal years to datetime
time = np.asarray(time)
time = pd.to_datetime(
(time - 719529).astype("int"), origin="unix", unit="D"
)
# Build dataset
ds = xr.Dataset(
{
name: ("time", np.asarray(values))
for name, values in variables.items()
},
coords={"time": time},
)
# add global attributes
ds.attrs["created"] = recon.created
ds.attrs["url"] = recon.url
ds.attrs["paper"] = recon.paper
ds.attrs["version"] = recon.version
except (OSError, IOError, ValueError, KeyError, AttributeError) as e:
log.exception("Failed to parse .mat file: %s", file_path)
raise ValueError(f"Failed to parse .mat file: {file_path}: {e}") from e
# attach metadata
# Attach metadata with optional tracking
if track_added_attrs:
ds, attr_changes = ReaderUtils.attach_metadata_with_tracking(
ds,
file,
file_path,
global_metadata,
yaml_file_metadata,
FW2015_FILE_METADATA,
DATASOURCE_ID,
track_added_attrs=True,
)
added_attrs_per_dataset.append(attr_changes)
else:
ds = ReaderUtils.attach_metadata_with_tracking(
ds,
file,
file_path,
global_metadata,
yaml_file_metadata,
FW2015_FILE_METADATA,
DATASOURCE_ID,
track_added_attrs=False,
)
datasets.append(ds)
if not datasets:
log.error("No valid FW2015 files found in %s", file_list)
raise FileNotFoundError(f"No valid data files found in {file_list}")
log.info("Successfully loaded %d FW2015 dataset(s)", len(datasets))
if track_added_attrs:
return datasets, added_attrs_per_dataset
else:
return datasets