{ "cells": [ { "cell_type": "markdown", "id": "c6a29764-f39c-431c-8e77-fbc6bfe20f01", "metadata": {}, "source": [ "# AMOCatlas conversion & compliance checker\n", "\n", "The purpose of this notebook is to demonstrate the OceanSites format(s) from `AMOCatlas`.\n", "\n", "The demo is organised to show\n", "\n", "- Step 1: Loading and plotting a sample dataset\n", "\n", "- Step 2: Converting one dataset to a standard format\n", "\n", "Note that when you submit a pull request, you should `clear all outputs` from your python notebook for a cleaner merge." ] }, { "cell_type": "code", "execution_count": 1, "id": "6a1920f3", "metadata": { "execution": { "iopub.execute_input": "2025-12-16T15:03:46.698427Z", "iopub.status.busy": "2025-12-16T15:03:46.698240Z", "iopub.status.idle": "2025-12-16T15:03:47.745128Z", "shell.execute_reply": "2025-12-16T15:03:47.744317Z" } }, "outputs": [], "source": [ "import pathlib\n", "import sys\n", "\n", "script_dir = pathlib.Path().parent.absolute()\n", "parent_dir = script_dir.parents[0]\n", "sys.path.append(str(parent_dir))\n", "\n", "import importlib\n", "\n", "import xarray as xr\n", "import os\n", "from amocatlas import readers, plotters, standardise, utilities" ] }, { "cell_type": "code", "execution_count": 2, "id": "1e070d18", "metadata": { "execution": { "iopub.execute_input": "2025-12-16T15:03:47.747477Z", "iopub.status.busy": "2025-12-16T15:03:47.747149Z", "iopub.status.idle": "2025-12-16T15:03:47.749936Z", "shell.execute_reply": "2025-12-16T15:03:47.749297Z" } }, "outputs": [], "source": [ "# Specify the path for writing datafiles\n", "data_path = os.path.join(parent_dir, \"data\")" ] }, { "cell_type": "markdown", "id": "9414445e", "metadata": {}, "source": [ "### Load RAPID 26Β°N" ] }, { "cell_type": "code", "execution_count": 3, "id": "fd849c48", "metadata": { "execution": { "iopub.execute_input": "2025-12-16T15:03:47.751454Z", "iopub.status.busy": "2025-12-16T15:03:47.751295Z", "iopub.status.idle": "2025-12-16T15:03:47.850073Z", "shell.execute_reply": "2025-12-16T15:03:47.849058Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Summary for array 'rapid':\n", "Total datasets loaded: 1\n", "\n", "Dataset 1:\n", " Source file: moc_transports.nc\n", " Dimensions:\n", " - time: 14599\n", " Variables:\n", " - t_therm10: shape (14599,)\n", " - t_aiw10: shape (14599,)\n", " - t_ud10: shape (14599,)\n", " - t_ld10: shape (14599,)\n", " - t_bw10: shape (14599,)\n", " - t_gs10: shape (14599,)\n", " - t_ek10: shape (14599,)\n", " - t_umo10: shape (14599,)\n", " - moc_mar_hc10: shape (14599,)\n", "\n", "Summary for array 'rapid':\n", "Total datasets loaded: 1\n", "\n", "Dataset 1:\n", " Source file: moc_transports.nc\n", " Dimensions:\n", " - time: 14599\n", " Variables:\n", " - t_therm10: shape (14599,)\n", " - t_aiw10: shape (14599,)\n", " - t_ud10: shape (14599,)\n", " - t_ld10: shape (14599,)\n", " - t_bw10: shape (14599,)\n", " - t_gs10: shape (14599,)\n", " - t_ek10: shape (14599,)\n", " - t_umo10: shape (14599,)\n", " - moc_mar_hc10: shape (14599,)\n", "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/home/runner/micromamba/envs/amocatlas/lib/python3.14/site-packages/xarray/backends/plugins.py:109: RuntimeWarning: Engine 'gmt' loading failed:\n", "Error loading GMT shared library at 'libgmt.so'.\n", "libgmt.so: cannot open shared object file: No such file or directory\n", " external_backend_entrypoints = backends_dict_from_pkg(entrypoints_unique)\n" ] } ], "source": [ "# Load data from data/moc_transports (Quick start)\n", "ds_rapid = readers.load_sample_dataset()\n", "ds_rapid = standardise.standardise_rapid(ds_rapid, ds_rapid.attrs[\"source_file\"])\n", "\n", "# Load data from data/moc_transports (Full dataset)\n", "datasetsRAPID = readers.load_dataset(\"rapid\", transport_only=True)\n", "standardRAPID = [\n", " standardise.standardise_rapid(ds, ds.attrs[\"source_file\"]) for ds in datasetsRAPID\n", "]\n", "\n" ] }, { "cell_type": "code", "execution_count": 4, "id": "fb527153", "metadata": { "execution": { "iopub.execute_input": "2025-12-16T15:03:47.851687Z", "iopub.status.busy": "2025-12-16T15:03:47.851518Z", "iopub.status.idle": "2025-12-16T15:03:48.138942Z", "shell.execute_reply": "2025-12-16T15:03:48.137894Z" } }, "outputs": [ { "data": { "text/plain": [ "(
,\n", " )" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Plot RAPID timeseries\n", "\n", "plotters.plot_amoc_timeseries(\n", " data=[standardRAPID[0]],\n", " varnames=[\"moc_mar_hc10\"],\n", " labels=[\"\"],\n", " resample_monthly=True,\n", " plot_raw=True,\n", " title=\"RAPID 26Β°N\"\n", ")" ] }, { "cell_type": "markdown", "id": "2fva7rp084v", "metadata": {}, "source": [ "### Step 2: Convert to AC1 Format\n", "\n", "The next step is to convert the standardised dataset to AC1 format, which follows OceanSITES conventions.\n", "\n", "**Note**: This conversion currently fails because the standardise.py step doesn't add proper units to the TIME coordinate. This demonstrates the architectural principle that convert.py validates rather than assigns units." ] }, { "cell_type": "code", "execution_count": 5, "id": "3z0fh11wbpt", "metadata": { "execution": { "iopub.execute_input": "2025-12-16T15:03:48.140670Z", "iopub.status.busy": "2025-12-16T15:03:48.140476Z", "iopub.status.idle": "2025-12-16T15:03:48.153147Z", "shell.execute_reply": "2025-12-16T15:03:48.152495Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "πŸ”„ Attempting to convert RAPID data to AC1 format...\n", "βœ… Conversion successful!\n", "❌ Conversion failed: 'suggested_filename'\n", "\\nThis is expected because standardise.py needs to be updated to provide proper units.\n", "The convert.py module validates that units are present rather than assigning them.\n" ] } ], "source": [ "from amocatlas import convert, writers, compliance_checker\n", "\n", "# Attempt to convert standardised data to AC1 format\n", "print(\"πŸ”„ Attempting to convert RAPID data to AC1 format...\")\n", "\n", "try:\n", " ac1_datasets = convert.to_AC1(standardRAPID[0])\n", " ac1_ds = ac1_datasets[0]\n", " \n", " print(\"βœ… Conversion successful!\")\n", " print(f\" Suggested filename: {ac1_ds.attrs['suggested_filename']}\")\n", " print(f\" Dimensions: {dict(ac1_ds.dims)}\")\n", " print(f\" Variables: {list(ac1_ds.data_vars.keys())}\")\n", " \n", " # Save the dataset\n", " output_file = os.path.join(data_path, ac1_ds.attrs['suggested_filename'])\n", " success = writers.save_dataset(ac1_ds, output_file)\n", " \n", " if success:\n", " print(f\"πŸ’Ύ Saved AC1 file: {output_file}\")\n", " \n", " # Run compliance check\n", " print(\"\\\\nπŸ” Running compliance check...\")\n", " result = compliance_checker.validate_ac1_file(output_file)\n", " \n", " print(f\"Status: {'βœ… PASS' if result.passed else '❌ FAIL'}\")\n", " print(f\"Errors: {len(result.errors)}\")\n", " print(f\"Warnings: {len(result.warnings)}\")\n", " \n", " if result.errors:\n", " print(\"\\\\nFirst few errors:\")\n", " for i, error in enumerate(result.errors[:3], 1):\n", " print(f\" {i}. {error}\")\n", " \n", "except Exception as e:\n", " print(f\"❌ Conversion failed: {e}\")\n", " print(\"\\\\nThis is expected because standardise.py needs to be updated to provide proper units.\")\n", " print(\"The convert.py module validates that units are present rather than assigning them.\")" ] }, { "cell_type": "code", "execution_count": 6, "id": "0df5ab5b", "metadata": { "execution": { "iopub.execute_input": "2025-12-16T15:03:48.154983Z", "iopub.status.busy": "2025-12-16T15:03:48.154796Z", "iopub.status.idle": "2025-12-16T15:03:48.164658Z", "shell.execute_reply": "2025-12-16T15:03:48.163890Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "information is based on xarray Dataset\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
AttributeValueDType
0ConventionsCF-1.8, OceanSITES-1.4, ACDD-1.3str
1format_version1.4str
2data_typeOceanSITES time-series datastr
3featureTypetimeSeriesstr
4data_modeDstr
5titleRAPID Atlantic Meridional Overturning Circulat...str
6summaryComponent transport time series from the RAPID...str
7sourceRAPID moored array observationsstr
8site_codeRAPIDstr
9arrayRAPIDstr
10geospatial_lat_min26.5float
11geospatial_lat_max26.5float
12geospatial_lon_min-79.0float
13geospatial_lon_max-13.0float
14platform_codeRAPID26Nstr
15time_coverage_start20040402T000000str
16time_coverage_end20240327T235959str
17contributor_nameBen Moat, Ben Moatstr
18contributor_emailben.moat@noc.ac.uk, ben.moat@noc.ac.ukstr
19contributor_idhttps://orcid.org/0000-0001-8676-7779, https:/...str
20contributor_rolecreator, PIstr
21contributing_institutionsNational Oceanography CentreΒ (Southampton) (UK)str
22contributing_institutions_vocabularyhttps://edmo.seadatanet.org/report/17str
23contributing_institutions_rolestr
24contributing_institutions_role_vocabularystr
25contributor_role_vocabularyhttps://vocab.nerc.ac.uk/collection/W08/current/str
26source_acknowledgementData from the RAPID AMOC observing project is ...str
27licenseCC-BY 4.0str
28doidoi: 10.5285/3f24651e-2d44-dee3-e063-7086abc0395estr
29date_created20251216T150348str
30processing_levelData verified against model or other contextua...str
31commentConverted to AC1 format from moc_transports.nc...str
32naming_authorityAMOCatlasstr
33idOS_RAPID_20040402-20240327_DPR_transports_T12Hstr
34cdm_data_typeTimeSeriesstr
35QC_indicatorexcellentstr
36institutionAMOCatlas Communitystr
\n", "
" ], "text/plain": [ " Attribute \\\n", "0 Conventions \n", "1 format_version \n", "2 data_type \n", "3 featureType \n", "4 data_mode \n", "5 title \n", "6 summary \n", "7 source \n", "8 site_code \n", "9 array \n", "10 geospatial_lat_min \n", "11 geospatial_lat_max \n", "12 geospatial_lon_min \n", "13 geospatial_lon_max \n", "14 platform_code \n", "15 time_coverage_start \n", "16 time_coverage_end \n", "17 contributor_name \n", "18 contributor_email \n", "19 contributor_id \n", "20 contributor_role \n", "21 contributing_institutions \n", "22 contributing_institutions_vocabulary \n", "23 contributing_institutions_role \n", "24 contributing_institutions_role_vocabulary \n", "25 contributor_role_vocabulary \n", "26 source_acknowledgement \n", "27 license \n", "28 doi \n", "29 date_created \n", "30 processing_level \n", "31 comment \n", "32 naming_authority \n", "33 id \n", "34 cdm_data_type \n", "35 QC_indicator \n", "36 institution \n", "\n", " Value DType \n", "0 CF-1.8, OceanSITES-1.4, ACDD-1.3 str \n", "1 1.4 str \n", "2 OceanSITES time-series data str \n", "3 timeSeries str \n", "4 D str \n", "5 RAPID Atlantic Meridional Overturning Circulat... str \n", "6 Component transport time series from the RAPID... str \n", "7 RAPID moored array observations str \n", "8 RAPID str \n", "9 RAPID str \n", "10 26.5 float \n", "11 26.5 float \n", "12 -79.0 float \n", "13 -13.0 float \n", "14 RAPID26N str \n", "15 20040402T000000 str \n", "16 20240327T235959 str \n", "17 Ben Moat, Ben Moat str \n", "18 ben.moat@noc.ac.uk, ben.moat@noc.ac.uk str \n", "19 https://orcid.org/0000-0001-8676-7779, https:/... str \n", "20 creator, PI str \n", "21 National Oceanography CentreΒ (Southampton) (UK) str \n", "22 https://edmo.seadatanet.org/report/17 str \n", "23 str \n", "24 str \n", "25 https://vocab.nerc.ac.uk/collection/W08/current/ str \n", "26 Data from the RAPID AMOC observing project is ... str \n", "27 CC-BY 4.0 str \n", "28 doi: 10.5285/3f24651e-2d44-dee3-e063-7086abc0395e str \n", "29 20251216T150348 str \n", "30 Data verified against model or other contextua... str \n", "31 Converted to AC1 format from moc_transports.nc... str \n", "32 AMOCatlas str \n", "33 OS_RAPID_20040402-20240327_DPR_transports_T12H str \n", "34 TimeSeries str \n", "35 excellent str \n", "36 AMOCatlas Community str " ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "plotters.show_attributes(ac1_ds)" ] }, { "cell_type": "markdown", "id": "d9kv2x4qlgj", "metadata": {}, "source": [ "### Demonstration: Working conversion with manual units fix\n", "\n", "To demonstrate what a successful conversion would look like, let's temporarily fix the TIME units and run the complete workflow:" ] }, { "cell_type": "code", "execution_count": 7, "id": "gx3qn1dhq6s", "metadata": { "execution": { "iopub.execute_input": "2025-12-16T15:03:48.166378Z", "iopub.status.busy": "2025-12-16T15:03:48.166200Z", "iopub.status.idle": "2025-12-16T15:03:48.211963Z", "shell.execute_reply": "2025-12-16T15:03:48.211137Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "πŸ”„ Converting RAPID data to AC1 format (with TIME units fixed)...\n", "βœ… Conversion successful!\n", " Suggested filename: OS_RAPID_20040402-20240327_DPR_transports_T12H.nc\n", " Dimensions: {'TIME': 14599, 'LATITUDE': 1, 'N_COMPONENT': 8}\n", " Variables: ['TRANSPORT', 'MOC_TRANSPORT', 'TRANSPORT_NAME', 'TRANSPORT_DESCRIPTION']\n", " TIME units: seconds since 1970-01-01T00:00:00Z\n", " TRANSPORT units: sverdrup\n", "\\nπŸ“Š Dataset structure:\n", " TRANSPORT shape: (8, 14599)\n", " Component names: [np.str_('Florida Straits'), np.str_('Ekman'), np.str_('Upper Mid-Ocean'), np.str_('Thermocline'), np.str_('Intermediate Water'), np.str_('Upper NADW'), np.str_('Lower NADW'), np.str_('AABW')]\n", " Global attributes: 37 attributes\n", "\\nπŸ’Ύ Saving to: /home/runner/work/AMOCatlas/AMOCatlas/data/OS_RAPID_20040402-20240327_DPR_transports_T12H.nc\n", "βœ… Successfully saved AC1 file!\n", " File size: 937,465 bytes\n" ] } ], "source": [ "# Temporarily fix the TIME units to demonstrate successful conversion\n", "# (This would normally be done in standardise.py)\n", "demo_ds = standardRAPID[0].copy()\n", "demo_ds['TIME'].attrs['units'] = 'seconds since 1970-01-01T00:00:00Z'\n", "\n", "print(\"πŸ”„ Converting RAPID data to AC1 format (with TIME units fixed)...\")\n", "\n", "try:\n", " ac1_datasets = convert.to_AC1(demo_ds)\n", " ac1_ds = ac1_datasets[0]\n", " \n", " print(\"βœ… Conversion successful!\")\n", " print(f\" Suggested filename: {ac1_ds.attrs['id']}.nc\")\n", " print(f\" Dimensions: {dict(ac1_ds.sizes)}\")\n", " print(f\" Variables: {list(ac1_ds.data_vars.keys())}\")\n", " print(f\" TIME units: {ac1_ds.TIME.attrs.get('units')}\")\n", " print(f\" TRANSPORT units: {ac1_ds.TRANSPORT.attrs.get('units')}\")\n", " \n", " # Inspect the structure\n", " print(\"\\\\nπŸ“Š Dataset structure:\")\n", " print(f\" TRANSPORT shape: {ac1_ds.TRANSPORT.shape}\")\n", " print(f\" Component names: {list(ac1_ds.TRANSPORT_NAME.values)}\")\n", " print(f\" Global attributes: {len(ac1_ds.attrs)} attributes\")\n", " \n", " # Save the dataset using the writers module\n", " output_file = os.path.join(data_path, ac1_ds.attrs['id'] + \".nc\")\n", " print(f\"\\\\nπŸ’Ύ Saving to: {output_file}\")\n", " success = writers.save_dataset(ac1_ds, output_file)\n", " \n", " if success:\n", " print(f\"βœ… Successfully saved AC1 file!\")\n", " \n", " # File size check\n", " file_size = os.path.getsize(output_file)\n", " print(f\" File size: {file_size:,} bytes\")\n", " \n", " else:\n", " print(\"❌ Failed to save file\")\n", " \n", "except Exception as e:\n", " print(f\"❌ Conversion failed: {e}\")\n", " import traceback\n", " traceback.print_exc()" ] }, { "cell_type": "markdown", "id": "sjfqfe2hmu9", "metadata": {}, "source": [ "### Step 3: Compliance Checking\n", "\n", "Run the AC1 compliance checker to validate the converted file against the specification:" ] }, { "cell_type": "code", "execution_count": 8, "id": "od4qqe2kz8i", "metadata": { "execution": { "iopub.execute_input": "2025-12-16T15:03:48.213707Z", "iopub.status.busy": "2025-12-16T15:03:48.213528Z", "iopub.status.idle": "2025-12-16T15:03:48.227117Z", "shell.execute_reply": "2025-12-16T15:03:48.226279Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "πŸ” Running AC1 compliance check...\n", "\\nπŸ“Š Compliance Results:\n", " Status: βœ… PASS\n", " File Type: component_transports\n", " Errors: 0\n", " Warnings: 0\n", "\\nπŸ”§ What the compliance checker validates:\n", " βœ“ Filename pattern (OceanSITES conventions)\n", " βœ“ Required dimensions and variables\n", " βœ“ Variable attributes (units, standard_name, vocabulary)\n", " βœ“ Global attributes (conventions, metadata)\n", " βœ“ Data value ranges (coordinates, valid_min/max)\n", " βœ“ CF convention compliance (dimension ordering)\n" ] } ], "source": [ "# Run compliance check on the created file\n", "if 'output_file' in locals() and os.path.exists(output_file):\n", " print(\"πŸ” Running AC1 compliance check...\")\n", " \n", " result = compliance_checker.validate_ac1_file(output_file)\n", " \n", " print(f\"\\\\nπŸ“Š Compliance Results:\")\n", " print(f\" Status: {'βœ… PASS' if result.passed else '❌ FAIL'}\")\n", " print(f\" File Type: {result.file_type}\")\n", " print(f\" Errors: {len(result.errors)}\")\n", " print(f\" Warnings: {len(result.warnings)}\")\n", " \n", " if result.errors:\n", " print(f\"\\\\n❌ Errors ({len(result.errors)} total):\")\n", " for i, error in enumerate(result.errors[:5], 1):\n", " print(f\" {i}. {error}\")\n", " if len(result.errors) > 5:\n", " print(f\" ... and {len(result.errors) - 5} more errors\")\n", " \n", " if result.warnings:\n", " print(f\"\\\\n⚠️ Warnings ({len(result.warnings)} total):\")\n", " for i, warning in enumerate(result.warnings[:3], 1):\n", " print(f\" {i}. {warning}\")\n", " if len(result.warnings) > 3:\n", " print(f\" ... and {len(result.warnings) - 3} more warnings\")\n", " \n", " # Show validation categories\n", " print(f\"\\\\nπŸ”§ What the compliance checker validates:\")\n", " print(\" βœ“ Filename pattern (OceanSITES conventions)\")\n", " print(\" βœ“ Required dimensions and variables\")\n", " print(\" βœ“ Variable attributes (units, standard_name, vocabulary)\")\n", " print(\" βœ“ Global attributes (conventions, metadata)\")\n", " print(\" βœ“ Data value ranges (coordinates, valid_min/max)\")\n", " print(\" βœ“ CF convention compliance (dimension ordering)\")\n", " \n", "else:\n", " print(\"❌ No AC1 file available for compliance checking\")\n", " print(\"Please ensure the conversion step above succeeded first.\")" ] } ], "metadata": { "kernelspec": { "display_name": "venv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.14.2" } }, "nbformat": 4, "nbformat_minor": 5 }