Source code for amocatlas.data_sources.osnap55n

"""OSNAP array data reader for AMOCatlas.

This module provides functions to read and process data from the OSNAP
(Overturning in the Subpolar North Atlantic Program) observing array.

"""

from pathlib import Path
from typing import Union

import xarray as xr

from amocatlas import logger, utilities
from amocatlas.logger import log_error
from amocatlas.reader_utils import ReaderUtils

log = logger.log  # Use global logger

# Datasource identifier for automatic standardization
DATASOURCE_ID = "osnap55n"

# Default file list - 2020 version (legacy)
OSNAP_DEFAULT_FILES = [
    "OSNAP_MOC_MHT_MFT_TimeSeries_201408_202006_2023.nc",
    "OSNAP_Streamfunction_201408_202006_2023.nc",
    "OSNAP_Gridded_TSV_201408_202006_2023.nc",
]
OSNAP_TRANSPORT_FILES = ["OSNAP_MOC_MHT_MFT_TimeSeries_201408_202006_2023.nc"]

# 2025 file list - extended coverage 2014-2022
OSNAP_2025_DEFAULT_FILES = [
    "OSNAP_MOC_MHT_MFT_TimeSeries_201408_202207_2025.nc",
    "OSNAP_Streamfunction_201408_202207_2025.nc",
    "OSNAP_Gridded_TSV_201408_202207_2025.nc",
]
OSNAP_2025_TRANSPORT_FILES = ["OSNAP_MOC_MHT_MFT_TimeSeries_201408_202207_2025.nc"]

# Mapping of filenames to download URLs
OSNAP_FILE_URLS = {
    # Legacy 2020 files
    "README_OSNAP-MOC_202306.doc": "https://repository.gatech.edu/bitstreams/930261ff-6cca-4cf9-81c8-d27c51a4ca49/download",
    "OSNAP_MOC_MHT_MFT_TimeSeries_201408_202006_2023.nc": "https://repository.gatech.edu/bitstreams/e039e311-dd2e-4511-a525-c2fcfb3be85a/download",
    "OSNAP_Streamfunction_201408_202006_2023.nc": "https://repository.gatech.edu/bitstreams/5edf4cba-a28f-40a6-a4da-24d7436a42ab/download",
    "OSNAP_Gridded_TSV_201408_202006_2023.nc": "https://repository.gatech.edu/bitstreams/598f200a-50ba-4af0-96af-bd29fe692cdc/download",
    # 2025 files - extended coverage 2014-2022
    "OSNAP_MOC_MHT_MFT_TimeSeries_201408_202207_2025.nc": "https://repository.gatech.edu/bitstreams/597db471-e2ea-4109-b1a1-b94451f1b884/download",
    "OSNAP_Streamfunction_201408_202207_2025.nc": "https://repository.gatech.edu/bitstreams/f82339f6-2145-456d-9a53-f3ede32b76a3/download",
    "OSNAP_Gridded_TSV_201408_202207_2025.nc": "https://repository.gatech.edu/bitstreams/af6a47f7-f705-49b4-a64f-5cd086b9b9fb/download",
}

# General metadata (global for OSNAP)
OSNAP_METADATA = {
    "project": "Overturning in the Subpolar North Atlantic Program (OSNAP)",
    "weblink": "https://www.o-snap.org",
    "comment": "Dataset accessed and processed via http://github.com/AMOCcommunity/amocatlas",
    "acknowledgement": "OSNAP data were collected and made freely available by the OSNAP (Overturning in the Subpolar North Atlantic Program) project and all the national programs that contribute to it (www.o-snap.org).",
    "doi": "https://doi.org/10.35090/gatech/70342",
}

# File-specific metadata (placeholder, ready to extend)
OSNAP_FILE_METADATA = {
    # Legacy 2020 files
    "OSNAP_MOC_MHT_MFT_TimeSeries_201408_202006_2023.nc": {
        "data_product": "Time series of MOC, MHT, and MFT",
        "time_coverage_start": "2014-08-01",
        "time_coverage_end": "2020-06-30",
    },
    "OSNAP_Streamfunction_201408_202006_2023.nc": {
        "data_product": "Meridional overturning streamfunction",
        "time_coverage_start": "2014-08-01",
        "time_coverage_end": "2020-06-30",
    },
    "OSNAP_Gridded_TSV_201408_202006_2023.nc": {
        "data_product": "Gridded temperature, salinity, and velocity",
        "time_coverage_start": "2014-08-01",
        "time_coverage_end": "2020-06-30",
    },
    # 2025 files - extended coverage
    "OSNAP_MOC_MHT_MFT_TimeSeries_201408_202207_2025.nc": {
        "data_product": "Time series of MOC, MHT, and MFT (2014-2022)",
        "time_coverage_start": "2014-08-01",
        "time_coverage_end": "2022-07-31",
        "dataset_version": "2025",
        "processing_software": "MATLAB R2024b",
    },
    "OSNAP_Streamfunction_201408_202207_2025.nc": {
        "data_product": "Meridional overturning streamfunction (2014-2022)",
        "time_coverage_start": "2014-08-01",
        "time_coverage_end": "2022-07-31",
        "dataset_version": "2025",
        "processing_software": "MATLAB R2024b",
    },
    "OSNAP_Gridded_TSV_201408_202207_2025.nc": {
        "data_product": "Gridded velocity, temperature, and salinity (2014-2022)",
        "time_coverage_start": "2014-08-01",
        "time_coverage_end": "2022-07-31",
        "dataset_version": "2025",
        "processing_software": "MATLAB R2024b",
        "file_size": "55.98 MB",
    },
}


[docs] def read_osnap( source: str = None, file_list: Union[str, list[str]] = None, transport_only: bool = True, data_dir: Union[str, Path, None] = None, redownload: bool = False, version: str = "2025", track_added_attrs: bool = False, ) -> list[xr.Dataset]: """Load the OSNAP transport datasets from a URL or local file path into xarray Datasets. Parameters ---------- source : str, optional Local path to the data directory (remote source is handled per-file). file_list : str or list of str, optional Filename or list of filenames to process. Defaults depend on version: OSNAP_2025_DEFAULT_FILES for "2025", OSNAP_DEFAULT_FILES for "2020". transport_only : bool, optional If True, restrict to transport files only. data_dir : str, Path or None, optional Optional local data directory. redownload : bool, optional If True, force redownload of the data. version : str, optional Dataset version to use ("2025" for 2014-2022 data, "2020" for 2014-2020 data). Defaults to "2025" (latest version). track_added_attrs : bool, optional If True, track which attributes were added during metadata enrichment. Returns ------- list of xr.Dataset List of loaded xarray datasets with basic inline and file-specific metadata. Raises ------ ValueError If an invalid version is specified. FileNotFoundError If the file cannot be downloaded or does not exist locally. """ log.info("Starting to read OSNAP dataset (version %s)", version) # Load YAML metadata with fallback global_metadata, yaml_file_metadata = ReaderUtils.load_array_metadata_with_fallback( DATASOURCE_ID, OSNAP_METADATA ) # Select appropriate file lists based on version if version == "2025": default_files = OSNAP_2025_DEFAULT_FILES transport_files = OSNAP_2025_TRANSPORT_FILES elif version == "2020": default_files = OSNAP_DEFAULT_FILES transport_files = OSNAP_TRANSPORT_FILES else: raise ValueError(f"Invalid version '{version}'. Must be '2020' or '2025'.") # Ensure file_list has a default if file_list is None: file_list = default_files if transport_only: file_list = transport_files if isinstance(file_list, str): file_list = [file_list] local_data_dir = ReaderUtils.setup_data_directory(data_dir) # Print information about files being loaded ReaderUtils.print_loading_info(file_list, DATASOURCE_ID, OSNAP_FILE_METADATA) datasets = [] added_attrs_per_dataset = [] if track_added_attrs else None netcdf_files = ReaderUtils.filter_netcdf_files(file_list) for file in netcdf_files: download_url = OSNAP_FILE_URLS.get(file) if not download_url: log_error("No download URL defined for OSNAP file: %s", file) raise FileNotFoundError(f"No download URL defined for OSNAP file {file}") file_path = utilities.resolve_file_path( file_name=file, source=source, download_url=download_url, local_data_dir=local_data_dir, redownload=redownload, ) # Use ReaderUtils for consistent dataset loading ds = ReaderUtils.safe_load_dataset(file_path) # Attach metadata with optional tracking if track_added_attrs: ds, attr_changes = ReaderUtils.attach_metadata_with_tracking( ds, file, file_path, global_metadata, yaml_file_metadata, OSNAP_FILE_METADATA, DATASOURCE_ID, track_added_attrs=True, ) added_attrs_per_dataset.append(attr_changes) else: ds = ReaderUtils.attach_metadata_with_tracking( ds, file, file_path, global_metadata, yaml_file_metadata, OSNAP_FILE_METADATA, DATASOURCE_ID, track_added_attrs=False, ) datasets.append(ds) # Use ReaderUtils for validation ReaderUtils.validate_datasets_loaded(datasets, file_list) if track_added_attrs: return datasets, added_attrs_per_dataset else: return datasets
[docs] def read_osnap_2025( source: str = None, file_list: Union[str, list[str]] = None, transport_only: bool = True, data_dir: Union[str, Path, None] = None, redownload: bool = False, ) -> list[xr.Dataset]: """Load the OSNAP 2025 datasets (2014-2022 coverage) from a URL or local file path. This is a convenience function that calls read_osnap with version="2025". Parameters ---------- source : str, optional Local path to the data directory (remote source is handled per-file). file_list : str or list of str, optional Filename or list of filenames to process. Defaults to OSNAP_2025_DEFAULT_FILES. transport_only : bool, optional If True, restrict to transport files only. data_dir : str, Path or None, optional Optional local data directory. redownload : bool, optional If True, force redownload of the data. Returns ------- list of xr.Dataset List of loaded xarray datasets with basic inline and file-specific metadata. """ return read_osnap( source=source, file_list=file_list, transport_only=transport_only, data_dir=data_dir, redownload=redownload, version="2025", )