09 Dask demo

09 Dask demo#

UW Geospatial Data Analysis
CEE467/CEWA567
David Shean, Eric Gagliano, Quinn Brencher

from pathlib import Path
import xarray as xr
import os
import time
from dask.distributed import Client
client = Client()  
# IF YOU ARE RUNNING ON A LOCAL MACHINE
# client
# IF YOU ARE RUNNING ON A JUPYTERHUB SERVER
# Get hub base path (e.g. /2026-winter-cee-467-a/user/gbrench/)
base_url = os.environ.get("JUPYTERHUB_SERVICE_PREFIX", "/")

# Extract dashboard port from the default link
dashboard_link = client.dashboard_link
port = dashboard_link.split(":")[-1].split("/")[0]

# Construct proxied dashboard URL
proxied_link = f"{base_url}proxy/{port}/status"

print("Dask dashboard:")
print(proxied_link)

client
Dask dashboard:
/2026-winter-cee-467-a/user/gbrench/proxy/8787/status

Client

Client-472a4806-1667-11f1-831d-e20dcd31b48f

Connection method: Cluster object Cluster type: distributed.LocalCluster
Dashboard: http://127.0.0.1:8787/status

Cluster Info

era5_data_dir = f'{Path.home()}/gda_demo_data/era5_data'
anom_fn = os.path.join(era5_data_dir, '1month_anomaly_Global_ea_2t.nc') 
def coarsen_and_polyfit(ds, coarsen_factor=2):
    ds = ds.assign_coords(longitude=(((ds.longitude + 180) % 360) - 180)).sortby('longitude')
    ds_coarsened = ds.coarsen(latitude=coarsen_factor, longitude=coarsen_factor, boundary='trim').mean()
    ds_coarsened_polyfit = ds_coarsened.polyfit(dim='time',deg=1)
    return ds_coarsened_polyfit
anom_nochunks_ds = xr.open_dataset(anom_fn, chunks=None)
anom_nochunks_ds
<xarray.Dataset> Size: 2GB
Dimensions:    (time: 517, latitude: 721, longitude: 1440)
Coordinates:
  * time       (time) datetime64[ns] 4kB 1979-01-01 1979-02-01 ... 2022-01-01
  * latitude   (latitude) float64 6kB 90.0 89.75 89.5 ... -89.5 -89.75 -90.0
  * longitude  (longitude) float64 12kB 0.0 0.25 0.5 0.75 ... 359.2 359.5 359.8
Data variables:
    t2m        (time, latitude, longitude) float32 2GB ...
Attributes:
    GRIB_edition:            1
    GRIB_centre:             ecmf
    GRIB_centreDescription:  European Centre for Medium-Range Weather Forecasts
    GRIB_subCentre:          0
    Conventions:             CF-1.7
    institution:             European Centre for Medium-Range Weather Forecasts
    history:                 2022-02-28T07:59 GRIB to CDM+CF via cfgrib-0.9.1...
start = time.time()
coarsen_and_polyfit(anom_nochunks_ds, coarsen_factor=4)
end = time.time()
print(f"Time taken without chunks: {end-start:.2f} seconds")
Time taken without chunks: 26.03 seconds
anom_nochunks_ds = None
anom_chunks_ds = xr.open_dataset(anom_fn, chunks={"time": -1, "latitude": 180, "longitude": 360})
anom_chunks_ds
<xarray.Dataset> Size: 2GB
Dimensions:    (time: 517, latitude: 721, longitude: 1440)
Coordinates:
  * time       (time) datetime64[ns] 4kB 1979-01-01 1979-02-01 ... 2022-01-01
  * latitude   (latitude) float64 6kB 90.0 89.75 89.5 ... -89.5 -89.75 -90.0
  * longitude  (longitude) float64 12kB 0.0 0.25 0.5 0.75 ... 359.2 359.5 359.8
Data variables:
    t2m        (time, latitude, longitude) float32 2GB dask.array<chunksize=(517, 180, 360), meta=np.ndarray>
Attributes:
    GRIB_edition:            1
    GRIB_centre:             ecmf
    GRIB_centreDescription:  European Centre for Medium-Range Weather Forecasts
    GRIB_subCentre:          0
    Conventions:             CF-1.7
    institution:             European Centre for Medium-Range Weather Forecasts
    history:                 2022-02-28T07:59 GRIB to CDM+CF via cfgrib-0.9.1...
start = time.time()
coarsen_and_polyfit(anom_chunks_ds, coarsen_factor=4).compute()
end = time.time()
print(f"Time taken with chunks: {end-start:.2f} seconds")
Time taken with chunks: 10.59 seconds