"""MOVE array data reader for AMOCatlas.
This module provides functions to read and process data from the MOVE
(Meridional Overturning Variability Experiment) observing array located
at 16°N in the Atlantic.
"""
from pathlib import Path
from typing import Union
import xarray as xr
import numpy as np
import pandas as pd
from amocatlas import logger, utilities
from amocatlas.utilities import apply_defaults
from amocatlas.reader_utils import ReaderUtils
log = logger.log # ✅ use the global logger
# Datasource identifier for automatic standardization
DATASOURCE_ID = "move16n"
# Default source and file list
MOVE_DEFAULT_SOURCE = (
"https://dods.ndbc.noaa.gov/thredds/fileServer/oceansites/DATA_GRIDDED/MOVE/"
)
MOVE_DEFAULT_FILES = [
"OS_MOVE_20000206-20221014_DPR_VOLUMETRANSPORT.nc",
"OS_MOVE_20000101-20221021_GRD_CURRENTS-AT-SITES-MOVE3-MOVE4.nc",
"OS_MOVE_20000101-20221018_GRD_TEMPERATURE-SALINITY-AT-SITES-MOVE1-MOVE3.nc",
]
MOVE_TRANSPORT_FILES = ["OS_MOVE_20000206-20221014_DPR_VOLUMETRANSPORT.nc"]
# Global metadata for MOVE
MOVE_METADATA = {
"description": "MOVE transport estimates dataset from UCSD mooring project",
"project": "Meridional Overturning Variability Experiment (MOVE)",
"weblink": "https://dods.ndbc.noaa.gov/thredds/fileServer/oceansites/DATA_GRIDDED/MOVE/",
"comment": "Dataset accessed and processed via http://github.com/AMOCcommunity/amocatlas",
# DOI can be added here when available
"acknowledgement": "The MOVE project is made possible with funding from the NOAA Climate Program Office. Initial funding came from the German Bundesministerium fuer Bildung und Forschung.",
}
# File-specific metadata placeholder
MOVE_FILE_METADATA = {
"OS_MOVE_20000206-20221014_DPR_VOLUMETRANSPORT.nc": {
"data_product": "MOVE transport time series",
# Add specific acknowledgments here if needed in future
},
}
[docs]
@apply_defaults(MOVE_DEFAULT_SOURCE, MOVE_DEFAULT_FILES)
def read_move(
source: str,
file_list: str | list[str],
transport_only: bool = True,
data_dir: Union[str, Path, None] = None,
redownload: bool = False,
track_added_attrs: bool = False,
) -> list[xr.Dataset]:
"""Load the MOVE transport dataset from a URL or local file path into xarray Datasets.
Parameters
----------
source : str, optional
URL or local path to the NetCDF file(s).
Defaults to the MOVE data repository URL.
file_list : str or list of str, optional
Filename or list of filenames to process.
Defaults to MOVE_DEFAULT_FILES.
transport_only : bool, optional
If True, restrict to transport files only.
data_dir : str, Path or None, optional
Optional local data directory.
redownload : bool, optional
If True, force redownload of the data.
track_added_attrs : bool, optional
If True, track which attributes were added by AMOCatlas processing.
Returns tuple (datasets, added_attrs_per_dataset) when enabled.
Returns
-------
list of xr.Dataset or tuple
If track_added_attrs=False: List of loaded xarray datasets.
If track_added_attrs=True: Tuple of (datasets, added_attrs_per_dataset)
where added_attrs_per_dataset is a list of dictionaries containing
'added' and 'modified' attribute tracking information.
Raises
------
ValueError
If the source is neither a valid URL nor a directory path.
FileNotFoundError
If the file cannot be downloaded or does not exist locally.
"""
log.info("Starting to read MOVE dataset")
# Load YAML metadata with fallback
global_metadata, yaml_file_metadata = ReaderUtils.load_array_metadata_with_fallback(
DATASOURCE_ID, MOVE_METADATA
)
if transport_only:
file_list = MOVE_TRANSPORT_FILES
if isinstance(file_list, str):
file_list = [file_list]
local_data_dir = ReaderUtils.setup_data_directory(data_dir)
# Print information about files being loaded
ReaderUtils.print_loading_info(file_list, DATASOURCE_ID, MOVE_FILE_METADATA)
datasets = []
added_attrs_per_dataset = [] if track_added_attrs else None
netcdf_files = ReaderUtils.filter_netcdf_files(file_list)
for file in netcdf_files:
download_url = (
f"{source.rstrip('/')}/{file}" if utilities.is_valid_url(source) else None
)
try:
file_path = utilities.resolve_file_path(
file_name=file,
source=source,
download_url=download_url,
local_data_dir=local_data_dir,
redownload=redownload,
)
except FileNotFoundError as e:
log.warning(f"Skipping {file}: {e}")
continue
# Use ReaderUtils with special decode_times=False for MOVE
# Use ReaderUtils for consistent dataset loading
ds = ReaderUtils.safe_load_dataset(file_path, decode_times=False)
# Clean up time variable
if "TIME" in ds.variables:
time_raw = ds["TIME"].values
valid = (time_raw > 0) & (time_raw < 30000)
n_invalid = (~valid).sum()
if n_invalid > 0:
log.info(
f"Found {n_invalid} invalid time values in {file_path}; replacing with NaN."
)
clean_time = xr.where(valid, time_raw, np.nan)
base = np.datetime64("1950-01-01")
time_converted = base + clean_time * np.timedelta64(1, "D")
# Replace the time in the dataset
ds["TIME"] = ("TIME", time_converted)
ds["TIME"].attrs.update(
{
"units": "days since 1950-01-01",
}
)
log.debug(f"Converted time using base 1950-01-01 for {file_path}")
else:
log.warning(f"No TIME variable found in {file_path}")
# Filter out NaT time values and corresponding dataset entries
time_pd = pd.to_datetime(ds["TIME"].values)
valid_time_mask = ~pd.isna(time_pd)
if (~valid_time_mask).any():
n_removed = (~valid_time_mask).sum()
log.info(
f"Removing {n_removed} entries with invalid NaT time values from {file_path}"
)
ds = ds.isel(TIME=valid_time_mask)
# Attach metadata with optional tracking
if track_added_attrs:
ds, attr_changes = ReaderUtils.attach_metadata_with_tracking(
ds,
file,
file_path,
global_metadata,
yaml_file_metadata,
MOVE_FILE_METADATA,
DATASOURCE_ID,
track_added_attrs=True,
)
added_attrs_per_dataset.append(attr_changes)
else:
ds = ReaderUtils.attach_metadata_with_tracking(
ds,
file,
file_path,
global_metadata,
yaml_file_metadata,
MOVE_FILE_METADATA,
DATASOURCE_ID,
track_added_attrs=False,
)
datasets.append(ds)
# Use ReaderUtils for validation
ReaderUtils.validate_datasets_loaded(datasets, file_list)
# Handle track_added_attrs parameter
if track_added_attrs:
return datasets, added_attrs_per_dataset
else:
return datasets