Real-time flow forecasts with ECCC weather forecasts

This notebook shows how to perform a streamflow forecast, using ECCC weather forecasts. Generates the forecasts and plots them.

%matplotlib inline

# Import the required packages

import datetime as dt

import fiona
import matplotlib.pyplot as plt
import xarray as xr
from clisops.core import average, subset

from ravenpy import Emulator
from ravenpy.config import commands as rc
from ravenpy.config.emulators import GR4JCN
from ravenpy.extractors.forecasts import get_recent_ECCC_forecast
from ravenpy.utilities import forecasting
from ravenpy.utilities.testdata import get_file, open_dataset
# Define the catchment contour. Here we use the Salmon River file we previously generated using the Delineator
# in Tutorial Notebook 01.
basin_contour = get_file("notebook_inputs/salmon_river.geojson")

# Get the most recent ECCC forecast data from the Geomet extraction tool:
forecast_data = get_recent_ECCC_forecast(
    fiona.open(basin_contour), climate_model="GEPS"
)
display(forecast_data)

# We need to write the forecast data as a file for Raven to be able to access it.
fname = "/tmp/forecast.nc"
forecast_data.to_netcdf(fname)
# Define the warmup period dates. Our weather file ends before the forecast date so our states will not be as
# good as those of a model run operationally.
start_date_wu = dt.datetime(2010, 1, 1)
end_date_wu = dt.datetime(2020, 3, 30)

# Define some catchment properties. Could also be replaced by a call to the properties WPS as in
# the Tutorial Notebook 02.
hru = dict(
    area=4250.6,
    elevation=843.0,
    latitude=54.4848,
    longitude=-123.3659,
    hru_type="land",
)

# Observed weather data for the Salmon river. We extracted this using Tutorial Notebook 03 and the
# salmon_river.geojson file as the contour. Used for the model warm-up.
ts = get_file("notebook_inputs/ERA5_weather_data_Salmon.nc")

# Set alternative names for netCDF variables
alt_names = {
    "TEMP_MIN": "tmin",
    "TEMP_MAX": "tmax",
    "PRECIP": "pr",
}

# Data types to extract from netCDF
data_type = ["TEMP_MAX", "TEMP_MIN", "PRECIP"]
data_kwds = {
    "ALL": {
        "elevation": hru["elevation"],
        "Latitude": hru["latitude"],
        "Longitude": hru["longitude"],
    },
}

# Model configuration
model_config_warmup = GR4JCN(
    params=[0.529, -3.396, 407.29, 1.072, 16.9, 0.947],
    Gauge=[
        rc.Gauge.from_nc(
            ts, data_type=data_type, alt_names=alt_names, data_kwds=data_kwds
        )
    ],
    HRUs=[hru],
    StartDate=start_date_wu,
    EndDate=end_date_wu,
    RunName="ESP_vs_NWP_warmup",
)

# Run the model and get the outputs.
out1 = Emulator(config=model_config_warmup).run()

# Extract the path to the final states file that will be used as the next initial states
hotstart = out1.files["solution"]
# Length of the desired forecast, in days
duration = 7

# We need to adjust the data_type and alt_names according to the data in the forecast:
# Set alternative names for netCDF variables
alt_names = {
    "TEMP_AVE": "tas",
    "PRECIP": "pr",
}

# Data types to extract from netCDF
data_type = ["TEMP_AVE", "PRECIP"]

# We will need to reuse this for GR4J. Update according to your needs. For example, here we will also pass
# the catchment latitude and longitude as our CaSPAr data has been averaged at the catchment scale.
# We also need to tell the model to deaccumulate the precipitation and shift it in time by 6 hours for our
# catchment (UTC timezones):
data_kwds = {
    "ALL": {
        "elevation": hru["elevation"],
        "Latitude": hru["latitude"],
        "Longitude": hru["longitude"],
    },
    "PRECIP": {
        "Deaccumulate": True,
        "TimeShift": -0.25,
        "LinearTransform": {
            "scale": 1.0
        },  # Since we are deaccumulating, we need to manually specify scale.
    },  # We are already in mm, so leave it like so (scale = 1.0).
    "TEMP_AVE": {
        "TimeShift": -0.25,
    },
}

# ECCC forecast time format is a bit complex to work with, so we will use cftime to make it more manageable.
fcst_tmp = open_dataset(fname, use_cftime=True)

# Get the first timestep that will be used for the model simulation
start_date = fcst_tmp.time.data[0] + dt.timedelta(days=1)

# Model configuration for forecasting, including correct start date and forecast duration and initial state
model_config_fcst = GR4JCN(
    params=[0.529, -3.396, 407.29, 1.072, 16.9, 0.947],
    Gauge=[
        rc.Gauge.from_nc(
            fname, data_type=data_type, alt_names=alt_names, data_kwds=data_kwds
        )
    ],
    HRUs=[hru],
    StartDate=start_date,
    Duration=duration,
    RunName="Realtime_forecast_NB",
).set_solution(hotstart, timestamp=False)
# Generate the forecast by providing all necessary information to generate virtual stations representing
# the forecast members. Note that we are using the hindcasting tools, becasue there is effectively no difference
# between operational hindcasting and operational forecasting except for the forecast issue time and data
# availability, which we solved by using the most recent ECCC forecasts with a warmed-up model and hotstart file.

forecast_sims = forecasting.hindcast_from_meteo_forecast(
    model_config_fcst,
    forecast=fname,
    ens_dim="members",
    # We also need to provide the necessary information to create gauges inside the forecasting model:
    data_kwds=data_kwds,
    data_type=data_type,
    alt_names=alt_names,
)

display(forecast_sims.hydrograph)

And, for visual representation of the forecasts:

# Simulate an observed streamflow timeseries: Here we take a member from the ensemble, but you should use your own
# observed timeseries:
qq = forecast_sims.hydrograph.q_sim[0, :, 0]

# This is to be replaced with a call to the forecast graphing WPS as soon as merged.
# model.q_sim.plot.line("b", x="time")
forecast_sims.hydrograph.q_sim[:, :, 0].plot.line("b", x="time", add_legend=False)
forecast_sims.hydrograph.q_sim[1, :, 0].plot.line("b", x="time", label="forecasts")
qq.plot.line("r", x="time", label="observations")
plt.legend(loc="upper left")
plt.show()