"""Faroe Bank Channel (FBC) overflow data reader for AMOCatlas.
This module provides functions to read and process data from the Faroe Bank
Channel overflow monitoring system. The FBC is a critical pathway for dense
water overflow from the Nordic Seas into the North Atlantic, representing
an important component of the Atlantic Meridional Overturning Circulation.
The dataset includes overflow transport estimates and hydrographic measurements
from moored instruments in the channel.
Key functions:
- read_fbc(): Main data loading interface for Faroe Bank Channel overflow data
Data source: Faroe Bank Channel overflow monitoring program
Location: Deep channel between Faroe Islands and Faroe Bank
"""
from pathlib import Path
from typing import Union
import xarray as xr
import datetime
import pandas as pd
# Import the modules used
from amocatlas import logger, utilities
from amocatlas.logger import log_error, log_info, log_warning
from amocatlas.utilities import apply_defaults
from amocatlas.reader_utils import ReaderUtils
log = logger.log # Use the global logger
# Datasource identifier for automatic standardization
DATASOURCE_ID = "fbc"
# Default list of FBC data files
FBC_DEFAULT_FILES = [
"FBC_overflow_transport.txt",
]
FBC_TRANSPORT_FILES = ["FBC_overflow_transport.txt"]
FBC_DEFAULT_SOURCE = "https://envofar.fo/var/ftp/Timeseries/"
FBC_METADATA = {
"project": "Faroe Bank Channel overflow 1995-2015",
"weblink": "https://envofar.fo/var/ftp/Timeseries/FBC_overflow_transport.txt",
"comment": "Dataset accessed and processed via http://github.com/AMOCcommunity/amocatlas",
}
FBC_FILE_METADATA = {
"FBC_overflow_transport.txt": {
"data_product": "Daily averaged kinematic FBC-overflow flux (transport) in Sv",
},
}
[docs]
@apply_defaults(FBC_DEFAULT_SOURCE, FBC_DEFAULT_FILES)
def read_fbc(
## source: str,
source: Union[str, Path, None],
file_list: Union[str, list[str]],
transport_only: bool = True,
data_dir: Union[str, Path, None] = None,
redownload: bool = False,
track_added_attrs: bool = False,
) -> list[xr.Dataset]:
"""Load the FBC (Faroe Banks Channel) transport datasets from a URL or local file path into xarray Datasets.
Parameters
----------
source : str, optional
Local path to the data directory (remote source is handled per-file).
file_list : str or list of str, optional
Filename or list of filenames to process.
Defaults to FBC_DEFAULT_FILES.
transport_only : bool, optional
If True, restrict to transport files only.
data_dir : str, Path or None, optional
Optional local data directory.
redownload : bool, optional
If True, force redownload of the data.
track_added_attrs : bool, optional
If True, track which attributes were added during metadata enrichment.
Returns
-------
list of xr.Dataset
List of loaded xarray datasets with basic inline and file-specific metadata.
Raises
------
ValueError
If no source is provided for a file and no default URL mapping is found.
FileNotFoundError
If the file cannot be downloaded or does not exist locally.
"""
log.info("Starting to read FBC dataset")
# Load YAML metadata with fallback
global_metadata, yaml_file_metadata = ReaderUtils.load_array_metadata_with_fallback(
DATASOURCE_ID, FBC_METADATA
)
# Ensure file_list has a default
if file_list is None:
file_list = FBC_DEFAULT_FILES
if transport_only:
file_list = FBC_TRANSPORT_FILES
if isinstance(file_list, str):
file_list = [file_list]
# Determine the local storage path
local_data_dir = Path(data_dir) if data_dir else utilities.get_default_data_dir()
local_data_dir.mkdir(parents=True, exist_ok=True)
# Print information about files being loaded
ReaderUtils.print_loading_info(file_list, DATASOURCE_ID, FBC_FILE_METADATA)
datasets = []
added_attrs_per_dataset = [] if track_added_attrs else None
for file in file_list:
if not (file.lower().endswith(".txt")):
log_warning("Skipping unsupported file type : %s", file)
continue
download_url = (
f"{source.rstrip('/')}/{file}" if utilities.is_valid_url(source) else None
)
file_path = utilities.resolve_file_path(
file_name=file,
source=source,
download_url=download_url,
local_data_dir=local_data_dir,
redownload=redownload,
)
# Open dataset
if file.lower().endswith(".txt"):
# file.txt
try:
# column_names, _ = utilities.parse_ascii_header(
# file_path, comment_char="%"
# )
data_start = utilities.find_data_start(file_path)
df = pd.read_csv(
file_path,
sep=r"\s+",
encoding="latin-1",
skiprows=data_start,
names=["Decimal year", "Month", "Day", "Flux"],
)
except Exception as e:
log_error("Failed to parse ASCII file: %s: %s", file_path, e)
raise FileNotFoundError(
f"Failed to parse ASCII file: {file_path}: {e}"
) from e
# Time handling
try:
df = df.apply(
lambda col: col.astype(str)
.str.replace(",", "", regex=False)
.astype(float)
)
# df['Decimal year'] = df['Decimal year'].astype(str).str.replace(',', '',regex=False).astype(float)
df["TIME"] = df["Decimal year"].apply(
lambda x: datetime.datetime(int(x), 1, 1)
+ datetime.timedelta(
days=(x - int(x))
* (
datetime.datetime(int(x) + 1, 1, 1)
- datetime.datetime(int(x), 1, 1)
).days
)
)
df = df.drop(columns=["Decimal year"])
ds = df.set_index("TIME").to_xarray()
except Exception as e:
log_error(
"Failed to convert DataFrame to xarray Dataset for %s: %s",
file,
e,
)
raise ValueError(
f"Failed to convert DataFrame to xarray Dataset for {file}: {e}",
) from e
# Attach metadata
# Attach metadata with optional tracking
if track_added_attrs:
ds, attr_changes = ReaderUtils.attach_metadata_with_tracking(
ds,
file,
file_path,
global_metadata,
yaml_file_metadata,
FBC_FILE_METADATA,
DATASOURCE_ID,
track_added_attrs=True,
)
added_attrs_per_dataset.append(attr_changes)
else:
ds = ReaderUtils.attach_metadata_with_tracking(
ds,
file,
file_path,
global_metadata,
yaml_file_metadata,
FBC_FILE_METADATA,
DATASOURCE_ID,
track_added_attrs=False,
)
datasets.append(ds)
if not datasets:
log_error("No valid FBC files in %s", file_list)
raise FileNotFoundError(f"No valid data files found in {file_list}")
log_info("Successfully loaded %d FBC dataset(s)", len(datasets))
if track_added_attrs:
return datasets, added_attrs_per_dataset
else:
return datasets