Subset SWOT LR L3 Unsmoothed data from AVISO’s THREDDS Data Server
This notebooks explains how to retrieve a geographical subset of unsmoothed (250-m) SWOT LR L3 data on AVISO’s THREDDS Data Server.
L3 Unsmoothed data can be explored at:
Note
Required environment to run this notebook:
xarraynumpysiphonpydapmatplotlib+cartopy
Warning
This tutorial shows how to retrieve datasets via the Opendap protocol of the AVISO’s THREDDS Data Server, that can be time consuming for large datasets. If you need to download large number of half orbits, please use the altimetry_downloader_aviso tool.
Tutorial Objectives
Crawl Aviso’s Thredds Data Server catalogue to find unsmoothed products with cycles/passes numbers using
siphonSubset data intersecting a geographical area using
xarray+pydapOpen data locally using
xarrayVisualise data using
matplotlib
Import + code
[1]:
# Install Cartopy with mamba to avoid discrepancies
# ! mamba install -q -c conda-forge cartopy
[2]:
import warnings
warnings.filterwarnings('ignore')
[3]:
import os
from siphon.catalog import TDSCatalog
from pathlib import Path
import re
import numpy as np
import xarray as xr
from xarray.backends import PydapDataStore
import requests as rq
import cartopy.crs as ccrs
import matplotlib.pyplot as plt
[4]:
def _make_swot_regex(level, variant):
return re.compile(
rf"SWOT_{level}_LR_SSH_{variant}_(\d+)_(\d+)_.*\.nc$"
)
SUBSETS = ["Basic", "Expert", "Unsmoothed", "WindWave", "Technical"]
def _crawl_swot_datasets_pruned(catalog, cycle_min, cycle_max, subset=None, level=0):
"""
Optimised recursive crawl récursif for SWOT catalogs
Args:
subset: subset name. Possible values are: "Basic" | "Expert" | "Unsmoothed" | "WindWave" | "Technical"
cycle_min: minimum cycle
cycle_max: maximum cycle
"""
indent = " " * level
print(f"{indent}Visiting catalog: {catalog.catalog_url}")
for ds in catalog.datasets.values():
yield ds
for ref_name, ref in catalog.catalog_refs.items():
m = re.search(r"cycle_(\d+)", ref_name)
if m:
cycle = int(m.group(1))
if not (cycle_min <= cycle <= cycle_max):
continue
if subset is not None and ref_name in SUBSETS:
if not ref_name == subset:
continue
try:
subcat = ref.follow()
except Exception as e:
continue
yield from _crawl_swot_datasets_pruned(subcat, cycle_min, cycle_max, subset, level=level+1)
def _swot_lr_dataset_filter(dataset, regex, cycle_min, cycle_max, half_orbits):
if not dataset.access_urls or "OPENDAP" not in dataset.access_urls:
return False
if not dataset.url_path:
return False
filename = dataset.url_path.rsplit("/", 1)[-1]
m = regex.match(filename)
if m is None:
return False
cycle_number = int(m.group(1))
pass_number = int(m.group(2))
match = cycle_min <= cycle_number <= cycle_max and pass_number in half_orbits
if match:
print(f"Matched dataset: {filename}")
return match
def retrieve_matching_datasets(url_catalogue, level, subset, cycle_min, cycle_max, half_orbits):
""" Returns the list of datasets's Opendap URLS available in the catalogue, matching required level, subset, half_orbits and cycles range.
Args:
url_catalogue: the catalogue
level: swot LR data level ("L2", "L3")
subset: Swot LR data subset ("Basic", "Expert", "Unsmoothed", "WindWave", "Technical")
cycle_min: minimum cycle
cycle_max: maximum cycle
half_orbits: list of passes numbers
Returns:
The list of matching dataset nodes.
"""
regex = _make_swot_regex(level, subset)
cat = TDSCatalog(url_catalogue)
return [
ds
for ds in _crawl_swot_datasets_pruned(cat, cycle_min, cycle_max, subset)
if _swot_lr_dataset_filter(ds, regex, cycle_min, cycle_max, half_orbits)
]
[5]:
def open_dataset(dataset_url):
""" Open the dataset at dataset_url.
Args:
dataset_url
Returns:
xr.Dataset
"""
session = rq.Session()
try:
store = PydapDataStore.open(dataset_url, session=session, timeout=300, user_charset='UTF-8')
return xr.open_dataset(store)
except Exception as e:
print(f"Something wrong with opendap url {dataset_url}: {e}")
def _open_dataset_coords(dataset_url):
positions_url = dataset_url + '?' + ','.join(["latitude", "longitude", "latitude", "longitude"])
ds_coords = open_dataset(positions_url)
return ds_coords
def _get_indexes(ds, lon_range, lat_range):
mask_left = ((ds["longitude"] >= lon_range[0]) & (ds["longitude"] <= lon_range[1]) &
(ds["latitude"] >= lat_range[0]) & (ds["latitude"] <= lat_range[1]))
mask_right = ((ds["longitude"] >= lon_range[0]) & (ds["longitude"] <= lon_range[1]) &
(ds["latitude"] >= lat_range[0]) & (ds["latitude"] <= lat_range[1]))
mask = (mask_left | mask_right).any('num_pixels')
return np.where(mask)[0][0], np.where(mask)[0][-1]
def _output_dir_prompt():
answer = input('Do you want to write results to Netcdf files? [y/n]')
if not answer or answer[0].lower() != 'y':
return None
return input('Enter existing directory:')
def load_subsets(matching_datasets, variables, lon_range, lat_range, output_dir=None):
""" Loads subsets with variables and lon/lat range, and eventually write them to disk.
Args:
matching_datasets : the datasets nodes in the catalogue
variables : the variables names
lon_range : the longitude range
lat_range : the latitude range
output_dir : the output directory to write the datasets in separated netcdf files
Returns:
xr.Dataset
"""
if not output_dir:
output_dir = _output_dir_prompt()
return [dataset for dataset in [_load_subset(dataset, variables, lon_range, lat_range, output_dir) for dataset in matching_datasets] if dataset is not None]
def _load_subset(dataset_node, variables, lon_range, lat_range, output_dir=None):
output_file = None
if output_dir:
output_file = os.path.join(output_dir, f"subset_{dataset_node.name}")
if os.path.exists(output_file):
print(f"Subset {dataset_node.name} already exists. Reading it...")
return xr.open_dataset(output_file)
# Open the dataset only with coordinates
dataset_url = dataset_node.access_urls["OPENDAP"]
print(f"Reading {dataset_url}...")
ds_positions = _open_dataset_coords(dataset_url)
# Locate indexes of lines matching with the required geographical area
try:
idx_first, idx_last = _get_indexes(ds_positions, lon_range, lat_range)
except IndexError:
return None
# Download subset
dataset = open_dataset(dataset_url)
ds = xr.merge([dataset[var][idx_first:idx_last] for var in variables])
print(f"Load intersection with selected area in dataset {dataset_node.name}: indices ({idx_first}, {idx_last})")
ds.load()
if output_file:
ds.to_netcdf(output_file)
print(f"File {output_file} created.")
print('\n')
return ds
Parameters
Define the variables you want:
[6]:
variables = ['time', 'sigma0', 'ssha_unfiltered', 'quality_flag']
OPTIONAL Define existing output folder to save results:
[7]:
output_dir= Path.home() / "TMP_DATA"
Define the url of the catalog from which you want to extract data
[8]:
url_catalogue="https://tds-odatis.aviso.altimetry.fr/thredds/catalog/dataset-l3-swot-karin-nadir-validated/l3_lr_ssh/v2_0_1/Unsmoothed/catalog.html"
level = 'L3'
subset = 'Unsmoothed'
Define the parameters needed to retrieve data:
geographical area
phase: 1day-orbit (Calval) / 21day-orbit (Science)
cycle min, max
list of half-orbits
Note
Passes matching a geographical area and period can be found using this tutorial
[9]:
# Mediterranean sea
lat_range = 25, 50
lon_range = -15, 40
#phase, cycle_min, cycle_max = "calval", 521, 521
#phase, cycle_min, cycle_max = "calval", 400, 600
phase, cycle_min, cycle_max = "science", 10, 12
half_orbits = [1, 3, 14]
Area extraction
Gather datasets in the provided catalogue, matching the required cycles and half_orbits
[10]:
matching_datasets = retrieve_matching_datasets(url_catalogue, level, subset, cycle_min, cycle_max, half_orbits)
'num datasets =', len(matching_datasets)
Visiting catalog: https://tds-odatis.aviso.altimetry.fr/thredds/catalog/dataset-l3-swot-karin-nadir-validated/l3_lr_ssh/v2_0_1/Unsmoothed/catalog.xml
Visiting catalog: https://tds-odatis.aviso.altimetry.fr/thredds/catalog/dataset-l3-swot-karin-nadir-validated/l3_lr_ssh/v2_0_1/Unsmoothed/cycle_010/catalog.xml
Matched dataset: SWOT_L3_LR_SSH_Unsmoothed_010_001_20240125T001932_20240125T011059_v2.0.1.nc
Matched dataset: SWOT_L3_LR_SSH_Unsmoothed_010_003_20240125T020226_20240125T025352_v2.0.1.nc
Matched dataset: SWOT_L3_LR_SSH_Unsmoothed_010_014_20240125T112821_20240125T121947_v2.0.1.nc
Visiting catalog: https://tds-odatis.aviso.altimetry.fr/thredds/catalog/dataset-l3-swot-karin-nadir-validated/l3_lr_ssh/v2_0_1/Unsmoothed/cycle_011/catalog.xml
Matched dataset: SWOT_L3_LR_SSH_Unsmoothed_011_001_20240214T210437_20240214T215603_v2.0.1.nc
Matched dataset: SWOT_L3_LR_SSH_Unsmoothed_011_003_20240214T224730_20240214T233857_v2.0.1.nc
Matched dataset: SWOT_L3_LR_SSH_Unsmoothed_011_014_20240215T081326_20240215T090452_v2.0.1.nc
Visiting catalog: https://tds-odatis.aviso.altimetry.fr/thredds/catalog/dataset-l3-swot-karin-nadir-validated/l3_lr_ssh/v2_0_1/Unsmoothed/cycle_012/catalog.xml
Matched dataset: SWOT_L3_LR_SSH_Unsmoothed_012_001_20240306T174941_20240306T184108_v2.0.1.nc
Matched dataset: SWOT_L3_LR_SSH_Unsmoothed_012_003_20240306T193234_20240306T202401_v2.0.1.nc
Matched dataset: SWOT_L3_LR_SSH_Unsmoothed_012_014_20240307T045830_20240307T054956_v2.0.1.nc
[10]:
('num datasets =', 9)
[11]:
matching_datasets
[11]:
[SWOT_L3_LR_SSH_Unsmoothed_010_001_20240125T001932_20240125T011059_v2.0.1.nc,
SWOT_L3_LR_SSH_Unsmoothed_010_003_20240125T020226_20240125T025352_v2.0.1.nc,
SWOT_L3_LR_SSH_Unsmoothed_010_014_20240125T112821_20240125T121947_v2.0.1.nc,
SWOT_L3_LR_SSH_Unsmoothed_011_001_20240214T210437_20240214T215603_v2.0.1.nc,
SWOT_L3_LR_SSH_Unsmoothed_011_003_20240214T224730_20240214T233857_v2.0.1.nc,
SWOT_L3_LR_SSH_Unsmoothed_011_014_20240215T081326_20240215T090452_v2.0.1.nc,
SWOT_L3_LR_SSH_Unsmoothed_012_001_20240306T174941_20240306T184108_v2.0.1.nc,
SWOT_L3_LR_SSH_Unsmoothed_012_003_20240306T193234_20240306T202401_v2.0.1.nc,
SWOT_L3_LR_SSH_Unsmoothed_012_014_20240307T045830_20240307T054956_v2.0.1.nc]
Subset data in the required geographical area
Warning
This operation may take some time : for each dataset, it downloads coordinates, calculates indices, loads subset and eventually writes it to netcdf file.
Set output_dir to None if you don’t want to write subsets to netcdf files.
[12]:
datasets_subsets = load_subsets(matching_datasets, variables, lon_range, lat_range, output_dir)
Reading https://tds-odatis.aviso.altimetry.fr/thredds/dodsC/dataset-l3-swot-karin-nadir-validated/l3_lr_ssh/v2_0_1/Unsmoothed/cycle_010/SWOT_L3_LR_SSH_Unsmoothed_010_001_20240125T001932_20240125T011059_v2.0.1.nc...
Load intersection with selected area in dataset SWOT_L3_LR_SSH_Unsmoothed_010_001_20240125T001932_20240125T011059_v2.0.1.nc: indices (53134, 65317)
File /home/atonneau/TMP_DATA/subset_SWOT_L3_LR_SSH_Unsmoothed_010_001_20240125T001932_20240125T011059_v2.0.1.nc created.
Reading https://tds-odatis.aviso.altimetry.fr/thredds/dodsC/dataset-l3-swot-karin-nadir-validated/l3_lr_ssh/v2_0_1/Unsmoothed/cycle_010/SWOT_L3_LR_SSH_Unsmoothed_010_003_20240125T020226_20240125T025352_v2.0.1.nc...
Reading https://tds-odatis.aviso.altimetry.fr/thredds/dodsC/dataset-l3-swot-karin-nadir-validated/l3_lr_ssh/v2_0_1/Unsmoothed/cycle_010/SWOT_L3_LR_SSH_Unsmoothed_010_014_20240125T112821_20240125T121947_v2.0.1.nc...
Load intersection with selected area in dataset SWOT_L3_LR_SSH_Unsmoothed_010_014_20240125T112821_20240125T121947_v2.0.1.nc: indices (17690, 27247)
File /home/atonneau/TMP_DATA/subset_SWOT_L3_LR_SSH_Unsmoothed_010_014_20240125T112821_20240125T121947_v2.0.1.nc created.
Reading https://tds-odatis.aviso.altimetry.fr/thredds/dodsC/dataset-l3-swot-karin-nadir-validated/l3_lr_ssh/v2_0_1/Unsmoothed/cycle_011/SWOT_L3_LR_SSH_Unsmoothed_011_001_20240214T210437_20240214T215603_v2.0.1.nc...
Load intersection with selected area in dataset SWOT_L3_LR_SSH_Unsmoothed_011_001_20240214T210437_20240214T215603_v2.0.1.nc: indices (53129, 65311)
File /home/atonneau/TMP_DATA/subset_SWOT_L3_LR_SSH_Unsmoothed_011_001_20240214T210437_20240214T215603_v2.0.1.nc created.
Reading https://tds-odatis.aviso.altimetry.fr/thredds/dodsC/dataset-l3-swot-karin-nadir-validated/l3_lr_ssh/v2_0_1/Unsmoothed/cycle_011/SWOT_L3_LR_SSH_Unsmoothed_011_003_20240214T224730_20240214T233857_v2.0.1.nc...
Reading https://tds-odatis.aviso.altimetry.fr/thredds/dodsC/dataset-l3-swot-karin-nadir-validated/l3_lr_ssh/v2_0_1/Unsmoothed/cycle_011/SWOT_L3_LR_SSH_Unsmoothed_011_014_20240215T081326_20240215T090452_v2.0.1.nc...
Load intersection with selected area in dataset SWOT_L3_LR_SSH_Unsmoothed_011_014_20240215T081326_20240215T090452_v2.0.1.nc: indices (17691, 29825)
File /home/atonneau/TMP_DATA/subset_SWOT_L3_LR_SSH_Unsmoothed_011_014_20240215T081326_20240215T090452_v2.0.1.nc created.
Reading https://tds-odatis.aviso.altimetry.fr/thredds/dodsC/dataset-l3-swot-karin-nadir-validated/l3_lr_ssh/v2_0_1/Unsmoothed/cycle_012/SWOT_L3_LR_SSH_Unsmoothed_012_001_20240306T174941_20240306T184108_v2.0.1.nc...
Load intersection with selected area in dataset SWOT_L3_LR_SSH_Unsmoothed_012_001_20240306T174941_20240306T184108_v2.0.1.nc: indices (53119, 65300)
File /home/atonneau/TMP_DATA/subset_SWOT_L3_LR_SSH_Unsmoothed_012_001_20240306T174941_20240306T184108_v2.0.1.nc created.
Reading https://tds-odatis.aviso.altimetry.fr/thredds/dodsC/dataset-l3-swot-karin-nadir-validated/l3_lr_ssh/v2_0_1/Unsmoothed/cycle_012/SWOT_L3_LR_SSH_Unsmoothed_012_003_20240306T193234_20240306T202401_v2.0.1.nc...
Reading https://tds-odatis.aviso.altimetry.fr/thredds/dodsC/dataset-l3-swot-karin-nadir-validated/l3_lr_ssh/v2_0_1/Unsmoothed/cycle_012/SWOT_L3_LR_SSH_Unsmoothed_012_014_20240307T045830_20240307T054956_v2.0.1.nc...
Load intersection with selected area in dataset SWOT_L3_LR_SSH_Unsmoothed_012_014_20240307T045830_20240307T054956_v2.0.1.nc: indices (4820, 7982)
File /home/atonneau/TMP_DATA/subset_SWOT_L3_LR_SSH_Unsmoothed_012_014_20240307T045830_20240307T054956_v2.0.1.nc created.
Basic manipulations
Concatenate subsets
all_ds = xr.concat(datasets_subsets, dim='num_lines') all_dsVisualize data
Select one dataset
[13]:
ds = datasets_subsets[0]
[14]:
ds
[14]:
<xarray.Dataset> Size: 209MB
Dimensions: (num_lines: 12183, num_pixels: 519)
Coordinates:
latitude (num_lines, num_pixels) float64 51MB 25.0 25.0 ... 50.0
longitude (num_lines, num_pixels) float64 51MB 3.534 3.537 ... 12.5
Dimensions without coordinates: num_lines, num_pixels
Data variables:
time (num_lines) datetime64[ns] 97kB 2024-01-25T00:52:33.1116...
sigma0 (num_lines, num_pixels) float64 51MB nan nan ... nan nan
ssha_unfiltered (num_lines, num_pixels) float64 51MB nan nan ... nan nan
quality_flag (num_lines, num_pixels) uint8 6MB 102 102 102 ... 102 102
Attributes:
comment: Time of measurement in seconds in the UTC time scale...
leap_second: 0000-00-00T00:00:00Z
long_name: time in UTC
standard_name: time
tai_utc_difference: 37.0
_ChunkSizes: 82242Apply quality flag on Sigma 0
[15]:
ds["sigma0"] = ds.sigma0.where(ds.quality_flag==0)
[16]:
plot_kwargs = dict(
x="longitude",
y="latitude",
cmap="gray_r",
vmin=18,
vmax=37,
)
fig, ax = plt.subplots(figsize=(21, 12), subplot_kw=dict(projection=ccrs.PlateCarree()))
ds.sigma0.plot.pcolormesh(ax=ax, cbar_kwargs={"shrink": 0.3}, **plot_kwargs)
plt.title("Sigma 0")
ax.gridlines(draw_labels=True)
ax.coastlines()
ax.set_extent([6, 11, 35, 45], crs=ccrs.PlateCarree())
[17]:
plot_kwargs = dict(
x="longitude",
y="latitude",
vmin=0,
vmax=0.2,
)
fig, ax = plt.subplots(figsize=(21, 12), subplot_kw=dict(projection=ccrs.PlateCarree()))
ds.ssha_unfiltered.plot.pcolormesh(ax=ax, cbar_kwargs={"shrink": 0.3}, **plot_kwargs)
plt.title('SSHA')
ax.gridlines(draw_labels=True)
ax.coastlines()
ax.set_extent([6, 11, 35, 45], crs=ccrs.PlateCarree())