Source code for ravenpy.extractors.routing_product

import os
import warnings
from collections import defaultdict
from pathlib import Path
from typing import List, Union

import pandas

from ravenpy.utilities import gis_import_error_message

try:
    import geopandas
    from osgeo import __version__ as osgeo_version
    from osgeo import ogr, osr
    from shapely import wkt

except (ImportError, ModuleNotFoundError) as e:
    msg = gis_import_error_message.format(Path(__file__).stem)
    raise ImportError(msg) from e

import netCDF4 as nc4
import numpy as np



[docs]
def open_shapefile(path: Union[str, os.PathLike]):
    """Return GeoDataFrame from shapefile path."""
    if isinstance(path, (Path, str)):
        if Path(path).suffix == ".zip":
            path = f"zip://{path}"
        df = geopandas.read_file(path)
    elif isinstance(path, geopandas.GeoDataFrame):
        df = path
    else:
        raise NotImplementedError(path)
    return df




[docs]
class BasinMakerExtractor:
    """
    This is a class to encapsulate the logic of converting the Routing
    Product into the required data structures to generate the RVH file
    format.

    Parameters
    ----------
    df : GeoDataFrame
      Sub-basin information.
    hru_aspect_convention : {"GRASS", "ArcGIS"}
      How sub-basin aspect is defined.
    routing_product_version: {"2.1", "1.0"}
      Version of the BasinMaker data.
    """

    MAX_RIVER_SLOPE = 0.00001
    USE_LAKE_AS_GAUGE = False
    WEIR_COEFFICIENT = 0.6
    USE_LAND_AS_GAUGE = False
    USE_MANNING_COEFF = False
    MANNING_DEFAULT = 0.035
    HRU_ASPECT_CONVENTION = "GRASS"  # GRASS | ArcGIS
    ROUTING_PRODUCT_VERSION = "2.1"  # 1.0 | 2.1

    def __init__(
        self,
        df,
        hru_aspect_convention=HRU_ASPECT_CONVENTION,
        routing_product_version=ROUTING_PRODUCT_VERSION,
    ):
        self._df = df
        self.hru_aspect_convention = hru_aspect_convention
        self.routing_product_version = routing_product_version


[docs]
    def extract(self, hru_from_sb: bool = False) -> dict:
        """Extract data from the Routing Product shapefile and return
        dictionaries that can be parsed into Raven Commands.

        Parameters
        ----------
        hru_from_sb : bool
            If True, draw HRU information from subbasin information.
            This is likely to yield crude results.

        Returns
        -------
        dict
            "sub_basins"
               Sequence of dictionaries with `SubBasin` attributes.
            "sub_basin_group"
               Sequence of dictionaries with `SubBasinGroup` attributes.
            "reservoirs"
               Sequence of dictionaries with `Reservoir` attributes.
            "channel_profile"
               Sequence of dictionaries with `ChannelProfile` attributes.
            "hrus"
               Sequence of dictionaries with `HRU` attributes.

        """
        # As one subbasin can be mapped to many rows (HRUs), we use a dict,
        # keyed by their IDs, which we'll transform into a list in the end
        subbasin_recs = {}  #
        land_sb_ids = []
        lake_sb_ids = []
        reservoirs = []  # those are meant to be injected inline in the RVH
        channel_profiles = []  # those are meant to be injected inline in the RVP
        hru_recs = []  # each row corresponds to a HRU

        # Collect all subbasin_ids for fast lookup in next loop
        subbasin_ids = {int(row["SubId"]) for _, row in self._df.iterrows()}

        for _, row in self._df.iterrows():
            # HRU
            if hru_from_sb:
                hru_recs.append(self._extract_hru_from_sb(row))
            else:
                hru_recs.append(self._extract_hru(row))

            subbasin_id = int(row["SubId"])

            is_lake = row["Lake_Cat"] > 0
            if not hru_from_sb:
                is_lake = is_lake and row["HRU_IsLake"] > 0

            if is_lake:
                lake_sb_ids.append(subbasin_id)
                reservoirs.append(self._extract_reservoir(row))
            else:
                land_sb_ids.append(subbasin_id)

            # Subbasin
            sb = self._extract_subbasin(row, is_lake, subbasin_ids)

            if sb["subbasin_id"] not in subbasin_recs:
                subbasin_recs[sb["subbasin_id"]] = sb

                # ChannelProfile
                channel_profiles.append(self._extract_channel_profile(row))

        return dict(
            sub_basins=list(subbasin_recs.values()),
            sub_basin_group=[
                {"name": "Land", "sb_ids": land_sb_ids},
                {"name": "Lakes", "sb_ids": lake_sb_ids},
            ],
            reservoirs=reservoirs,
            channel_profile=channel_profiles,
            hrus=hru_recs,
        )


    def _extract_subbasin(self, row, is_lake, subbasin_ids) -> dict:
        subbasin_id = int(row["SubId"])
        # is_lake = row["HRU_IsLake"] >= 0
        riv_length_field = (
            "Rivlen" if self.routing_product_version == "1.0" else "RivLength"
        )
        river_length_in_kms = 0 if is_lake else round(row[riv_length_field] / 1000, 5)
        # river_slope = max(
        #     row["RivSlope"], RoutingProductShapefileExtractor.MAX_RIVER_SLOPE
        # )
        # downstream_id
        downstream_id = int(row["DowSubId"])
        if downstream_id == subbasin_id:
            downstream_id = -1
        elif downstream_id not in subbasin_ids:
            downstream_id = -1
        has_gauge_field = (
            "IsObs" if self.routing_product_version == "1.0" else "Has_Gauge"
        )
        gauged = row[has_gauge_field] > 0 or (
            is_lake and BasinMakerExtractor.USE_LAKE_AS_GAUGE
        )
        gauge_id = row["Obs_NM"] if gauged else ""
        return dict(
            subbasin_id=subbasin_id,
            name=f"sub_{subbasin_id}",
            downstream_id=downstream_id,
            profile=f"chn_{subbasin_id}",
            reach_length=river_length_in_kms,
            gauged=gauged,
            gauge_id=gauge_id,
        )

    @staticmethod
    def _extract_reservoir(row) -> dict:
        lake_id = int(row["HyLakeId"])

        return dict(
            subbasin_id=int(row["SubId"]),
            hru_id=int(row.get("HRU_ID", row["SubId"])),
            name=f"Lake_{lake_id}",
            weir_coefficient=BasinMakerExtractor.WEIR_COEFFICIENT,
            crest_width=row["BkfWidth"],
            max_depth=row["LakeDepth"],
            lake_area=row["LakeArea"],
        )

    @staticmethod
    def _extract_channel_profile(row) -> dict:
        subbasin_id = int(row["SubId"])
        slope = max(row["RivSlope"], BasinMakerExtractor.MAX_RIVER_SLOPE)

        # SWAT: top width of channel when filled with water; bankfull width W_bnkfull
        channel_width = max(row["BkfWidth"], 1)
        # SWAT: depth of water in channel when filled to top of bank
        channel_depth = max(row["BkfDepth"], 1)
        channel_elev = row["MeanElev"]
        floodn = row["FloodP_n"]
        channeln = row["Ch_n"]

        # channel profile calculations are based on theory SWAT model is based on
        # see: https://swat.tamu.edu/media/99192/swat2009-theory.pdf
        #      --> "Channel Characteristics" p. 429 ff

        # inverse of channel side slope; channel sides assumed to have 2:1 run to rise ratio
        zch = 2
        # river side width
        sidwd = zch * channel_depth
        # river bottom width W_btm
        botwd = channel_width - 2 * sidwd

        # if derived bottom width is negative, set bottom width to 0.5*bankfull width and recalculate zch
        if botwd < 0:
            botwd = 0.5 * channel_width
            sidwd = 0.5 * 0.5 * channel_width
            zch = (channel_width - botwd) / (2 * channel_depth)

        # inverse of floodplain side slope; flood plain side slopes assumed to have 4:1 run to rise ratio
        zfld = 4 + channel_elev
        # floodplain bottom width
        zbot = channel_elev - channel_depth
        # floodplain side width
        sidwdfp = 4 / 0.25

        # geometry of the channel and floodplain
        # (see figure 7:1-2 in SWAT theory document)
        survey_points = (
            (0, zfld),
            (sidwdfp, channel_elev),
            (sidwdfp + 2 * channel_width, channel_elev),
            (sidwdfp + 2 * channel_width + sidwd, zbot),
            (sidwdfp + 2 * channel_width + sidwd + botwd, zbot),
            (sidwdfp + 2 * channel_width + 2 * sidwd + botwd, channel_elev),
            (sidwdfp + 4 * channel_width + 2 * sidwd + botwd, channel_elev),
            (2 * sidwdfp + 4 * channel_width + 2 * sidwd + botwd, zfld),
        )

        if BasinMakerExtractor.USE_MANNING_COEFF:
            mann = channeln
        else:
            mann = BasinMakerExtractor.MANNING_DEFAULT

        # roughness zones of channel and floodplain
        roughness_zones = (
            (0, floodn),
            (sidwdfp + 2 * channel_width, mann),
            (sidwdfp + 2 * channel_width + 2 * sidwd + botwd, floodn),
        )

        return dict(
            name=f"chn_{subbasin_id}",
            bed_slope=slope,
            survey_points=survey_points,
            roughness_zones=roughness_zones,
        )

    def _extract_hru_from_sb(self, row) -> dict:
        """Here we assume one HRU per sub-basin."""
        aspect = row["BasAspect"]

        if self.hru_aspect_convention == "GRASS":
            aspect -= 360
            if aspect < 0:
                aspect += 360
        elif self.hru_aspect_convention == "ArcGIS":
            aspect = 360 - aspect
        else:
            assert False

        return dict(
            hru_id=int(row["SubId"]),
            area=row["BasArea"] / 1e6,
            elevation=row["MeanElev"],
            latitude=row["centroid_y"],
            longitude=row["centroid_x"],
            subbasin_id=int(row["SubId"]),
            slope=row["BasSlope"],
            aspect=aspect,
            hru_type="lake" if row["Lake_Cat"] == 1 else "land",
        )

    def _extract_hru(self, row) -> dict:
        aspect = row["HRU_A_mean"]

        if self.hru_aspect_convention == "GRASS":
            aspect -= 360
            if aspect < 0:
                aspect += 360
        elif self.hru_aspect_convention == "ArcGIS":
            aspect = 360 - aspect
        else:
            assert False

        return dict(
            hru_id=int(row["HRU_ID"]),
            area=row["HRU_Area"] / 1_000_000,
            elevation=row["HRU_E_mean"],
            latitude=row["HRU_CenY"],
            longitude=row["HRU_CenX"],
            subbasin_id=int(row["SubId"]),
            aquifer_profile="[NONE]",
            terrain_class="[NONE]",
            land_use_class=row["LAND_USE_C"],
            veg_class=row["VEG_C"],
            soil_profile=row["SOIL_PROF"],
            slope=row["HRU_S_mean"],
            aspect=aspect,
        )




[docs]
class GridWeightExtractor:
    """Class to extract grid weights.

    Notes
    -----
    The original version of this algorithm can be found at: https://github.com/julemai/GridWeightsGenerator
    """

    DIM_NAMES = ("lon_dim", "lat_dim")
    VAR_NAMES = ("longitude", "latitude")
    ROUTING_ID_FIELD = "SubId"
    NETCDF_INPUT_FIELD = "NetCDF_col"
    AREA_ERROR_THRESHOLD = 0.05
    CRS_LLDEG = 4326  # EPSG id of lat/lon (deg) coordinate reference system (CRS)
    CRS_CAEA = 3573  # EPSG id of equal-area coordinate reference system (CRS)

    def __init__(
        self,
        input_file_path,
        routing_file_path,
        dim_names=DIM_NAMES,
        var_names=VAR_NAMES,
        routing_id_field=ROUTING_ID_FIELD,
        netcdf_input_field=NETCDF_INPUT_FIELD,
        gauge_ids=None,
        sub_ids=None,
        area_error_threshold=AREA_ERROR_THRESHOLD,
    ):
        """
        Parameters
        ----------
        input_file_path : Path or str
            NetCDF file or shapefile with the data to be weighted.
        routing_file_path : Path or str
            Sub-basin delineation.
        """
        self._dim_names = tuple(dim_names)
        self._var_names = tuple(var_names)
        self._routing_id_field = routing_id_field
        self._netcdf_input_field = netcdf_input_field
        self._gauge_ids = gauge_ids or []
        self._sub_ids = sub_ids or []
        self._area_error_threshold = area_error_threshold

        assert not (
            self._gauge_ids and self._sub_ids
        ), "Only one of gauge_ids or sub_ids can be specified"

        input_file_path = Path(input_file_path)
        if input_file_path.suffix == ".nc":
            # Note that we cannot use xarray because it complains about variables and dimensions
            # having the same name.
            self._input_data = nc4.Dataset(input_file_path)
            self._input_is_netcdf = True
        elif input_file_path.suffix in [".zip", ".shp"]:
            self._input_data = open_shapefile(input_file_path)
            self._input_is_netcdf = False
        else:
            raise ValueError(
                "The input file must be a shapefile (.shp or .zip) or NetCDF"
            )

        self._routing_data = open_shapefile(routing_file_path)


[docs]
    def extract(self) -> dict:
        """Return dictionary to create a GridWeights command."""

        self._prepare_input_data()

        # Read routing data

        # WGS 84 / North Pole LAEA Canada
        self._routing_data = self._routing_data.to_crs(
            epsg=GridWeightExtractor.CRS_CAEA
        )

        def keep_only_valid_downsubid_and_obs_nm(g):
            """
            This function receives a group (g) of routing rows that have been grouped by HRU_ID.
            If there is only one row, there is nothing to do (we return it). If there are more than
            one rows, we want to return the one with the DowSubId != -1 (if there are more than one
            we emit a warning). We also want to search for an Obs_NM value different from -9999 in
            any row of the group, and if we find it we set it in the corresponding field of the
            returning row (if we can't find one we also emit a warning).
            """
            if len(g) == 1:
                return g
            rid = self._routing_id_field
            row = g[g["DowSubId"] != -1].copy()
            if len(row) > 1:
                row = row[:1].copy()
                warnings.warn(
                    f"More than one row with ID={row[rid]} having DowSubId = -1"
                )
            obs_nm = g[g["Obs_NM"] != -9999]
            if not obs_nm.empty:
                row["Obs_NM"] = obs_nm["Obs_NM"].iloc[0]
            else:
                warnings.warn(
                    f"All values of Obs_NM are -9999 for rows with ID={row[rid]}"
                )

            return row

        # Remove duplicate HRU_IDs while making sure that we keed relevant DowSubId and Obs_NM values
        self._routing_data = self._routing_data.groupby(
            self._routing_id_field, group_keys=False
        ).apply(keep_only_valid_downsubid_and_obs_nm)

        # Make sure those are ints
        self._routing_data.SubId = self._routing_data.SubId.astype(int)
        self._routing_data.DowSubId = self._routing_data.DowSubId.astype(int)

        if self._gauge_ids:
            # Extract the SubIDs of the gauges that were specified at input
            self._sub_ids = (
                self._routing_data.loc[self._routing_data.Obs_NM.isin(self._gauge_ids)]
                .SubId.unique()
                .tolist()
            )
            if not self._sub_ids:
                raise ValueError(
                    f"No shapes were found with gauge ID (Obs_NM) in {self._gauge_ids}"
                )

        if self._sub_ids:
            # Here we want to extract the network of connected subbasins by going upstream via their DowSubId,
            # starting from the list supplied by the user (either directly, or via their gauge IDs).. We first
            # build a map of downSubID -> subID for efficient lookup
            downsubid_to_subids = defaultdict(set)
            for _, r in self._routing_data.iterrows():
                downsubid_to_subids[r.DowSubId].add(r.SubId)

            expanded_sub_ids = set(self._sub_ids)
            prev = expanded_sub_ids.copy()
            while True:
                curr = set()
                for did in prev:
                    curr |= downsubid_to_subids[did]
                if curr:
                    expanded_sub_ids |= curr
                    prev = curr
                else:
                    break

            self._sub_ids = expanded_sub_ids

        # Reduce the initial dataset with the target Sub IDs
        if self._sub_ids:
            self._routing_data = self._routing_data[
                self._routing_data.SubId.isin(self._sub_ids)
            ]

        # -------------------------------
        # construct all grid cell polygons
        # -------------------------------

        grid_cell_geom_gpd_wkt = self._compute_grid_cell_polygons()

        # -------------------------------
        # Derive overlay and calculate weights
        # -------------------------------

        grid_weights = []

        for _, row in self._routing_data.iterrows():
            poly = ogr.CreateGeometryFromWkt(wkt.dumps(row.geometry))

            area_basin = poly.Area()
            # bounding box around basin (for easy check of proximity)
            enve_basin = poly.GetEnvelope()

            area_all = 0.0
            # ncells = 0

            row_grid_weights = []

            for ilat in range(self._nlat):
                for ilon in range(self._nlon):
                    # bounding box around grid-cell (for easy check of proximity)
                    enve_gridcell = grid_cell_geom_gpd_wkt[ilat][ilon].GetEnvelope()

                    grid_is_close = self._check_proximity_of_envelops(
                        enve_gridcell, enve_basin
                    )

                    # this check decreases runtime DRASTICALLY (from ~6h to ~1min)
                    if not grid_is_close:
                        continue

                    # grid_cell_area = grid_cell_geom_gpd_wkt[ilat][ilon].Area()

                    # "fake" buffer to avoid invalid polygons and weirdos dumped by ArcGIS
                    inter = grid_cell_geom_gpd_wkt[ilat][ilon].Intersection(
                        poly.Buffer(0.0)
                    )

                    area_intersect = inter.Area()

                    area_all += area_intersect

                    if area_intersect > 0:
                        hru_id = int(row[self._routing_id_field])
                        cell_id = ilat * self._nlon + ilon
                        weight = area_intersect / area_basin
                        row_grid_weights.append((hru_id, cell_id, weight))

            # mismatch between area of subbasin (routing product) and sum of all contributions of grid cells (model output)
            error = (area_basin - area_all) / area_basin

            if abs(error) > self._area_error_threshold and area_basin > 500000.0:
                # record all basins with errors larger 5% (if basin is larger than 0.5 km2)
                # error_dict[int(ibasin[key_colname])] = [error, area_basin]
                grid_weights += row_grid_weights

            else:
                # adjust such that weights sum up to 1.0
                for hru_id, cell_id, weight in row_grid_weights:
                    corrected_weight = weight * 1.0 / (1.0 - error)
                    grid_weights.append((hru_id, cell_id, corrected_weight))

                # if error < 1.0:
                #     area_all *= 1.0 / (1.0 - error)
                # error = 0.0

        return dict(
            number_hrus=len(self._routing_data),
            number_grid_cells=self._nlon * self._nlat,
            data=tuple(grid_weights),
        )


    def _prepare_input_data(self):
        if self._input_is_netcdf:
            # Raven numbering is:
            #
            #      [      1      2      3   ...     1*nlon
            #        nlon+1 nlon+2 nlon+3   ...     2*nlon
            #           ...    ...    ...   ...     ...
            #           ...    ...    ...   ...  nlat*nlon ]
            #
            # --> Making sure shape of lat/lon fields is like that
            #

            # TODO: Replace with cf_xarray logic
            lon_var = self._input_data.variables[self._var_names[0]]
            lon_dims = lon_var.dimensions
            lon_var = lon_var[:]

            lat_var = self._input_data.variables[self._var_names[1]]
            lat_dims = lat_var.dimensions
            lat_var = lat_var[:]

            if len(lon_dims) == 2 and len(lat_dims) == 2:
                if lon_dims == self._dim_names:
                    lon_var = np.transpose(lon_var)

                if lat_dims == self._dim_names:
                    lat_var = np.transpose(lat_var)

            elif len(lon_dims) == 1 and len(lat_dims) == 1:
                # Coord vars are 1d, make them 2d
                lon_var_size = lon_var.size
                lon_var = np.tile(lon_var, (lat_var.size, 1))
                lat_var = np.transpose(np.tile(lat_var, (lon_var_size, 1)))
                assert lon_var.shape == lat_var.shape

            else:
                raise ValueError(
                    "The coord variables of the input data must have the same number of dimensions (either 1 or 2)"
                )

            self._lath, self._lonh = self._create_gridcells_from_centers(
                lat_var, lon_var
            )

            self._nlon = np.shape(lon_var)[1]
            self._nlat = np.shape(lat_var)[0]

        else:
            # input data is a shapefile

            self._input_data = self._input_data.to_crs(
                epsg=GridWeightExtractor.CRS_CAEA
            )

            self._nlon = 1  # only for consistency

            # number of shapes in model "discretization" shapefile (not routing toolbox shapefile)
            self._nlat = self._input_data.geometry.count()  # only for consistency

    def _compute_grid_cell_polygons(self):
        grid_cell_geom_gpd_wkt: List[List[List[ogr.Geometry]]] = [
            [[] for ilon in range(self._nlon)] for ilat in range(self._nlat)
        ]

        if self._input_is_netcdf:
            lath = self._lath
            lonh = self._lonh

            for ilat in range(self._nlat):
                for ilon in range(self._nlon):
                    # -------------------------
                    # EPSG:3035   needs a swap before and after transform ...
                    # -------------------------
                    # gridcell_edges = [ [lath[ilat,ilon]    , lonh[ilat,  ilon]    ],            # for some reason need to switch lat/lon that transform works
                    #                    [lath[ilat+1,ilon]  , lonh[ilat+1,ilon]    ],
                    #                    [lath[ilat+1,ilon+1], lonh[ilat+1,ilon+1]  ],
                    #                    [lath[ilat,ilon+1]  , lonh[ilat,  ilon+1]  ]]

                    # tmp = self._routing_data_to_geometry(gridcell_edges, epsg=crs_caea)
                    # tmp.SwapXY()              # switch lat/lon back
                    # grid_cell_geom_gpd_wkt[ilat][ilon] = tmp

                    # -------------------------
                    # EPSG:3573   does not need a swap after transform ... and is much faster than transform with EPSG:3035
                    # -------------------------
                    #
                    # Windows            Python 3.8.5 GDAL 3.1.3 --> lat/lon (Ming)
                    # MacOS 10.15.6      Python 3.8.5 GDAL 3.1.3 --> lat/lon (Julie)
                    # Graham             Python 3.8.2 GDAL 3.0.4 --> lat/lon (Julie)
                    # Graham             Python 3.6.3 GDAL 2.2.1 --> lon/lat (Julie)
                    # Ubuntu 18.04.2 LTS Python 3.6.8 GDAL 2.2.3 --> lon/lat (Etienne)
                    #
                    if osgeo_version < "3.0":
                        gridcell_edges = [
                            [
                                lonh[ilat, ilon],
                                lath[ilat, ilon],
                            ],  # for some reason need to switch lat/lon that transform works
                            [lonh[ilat + 1, ilon], lath[ilat + 1, ilon]],
                            [lonh[ilat + 1, ilon + 1], lath[ilat + 1, ilon + 1]],
                            [lonh[ilat, ilon + 1], lath[ilat, ilon + 1]],
                        ]
                    else:
                        gridcell_edges = [
                            [
                                lath[ilat, ilon],
                                lonh[ilat, ilon],
                            ],  # for some reason lat/lon order works
                            [lath[ilat + 1, ilon], lonh[ilat + 1, ilon]],
                            [lath[ilat + 1, ilon + 1], lonh[ilat + 1, ilon + 1]],
                            [lath[ilat, ilon + 1], lonh[ilat, ilon + 1]],
                        ]

                    tmp = self._shape_to_geometry(
                        gridcell_edges, epsg=GridWeightExtractor.CRS_CAEA
                    )
                    grid_cell_geom_gpd_wkt[ilat][ilon] = tmp

        else:
            for ishape in range(self._nlat):
                idx = np.where(self._input_data[self._netcdf_input_field] == ishape)[0]
                if len(idx) == 0:
                    # print(
                    #     "Polygon ID = {} not found in '{}'. Numbering of shapefile attribute '{}' needs to be [0 ... {}-1].".format(
                    #         ishape, input_file, key_colname_model, nshapes
                    #     )
                    # )
                    raise ValueError("Polygon ID not found.")
                if len(idx) > 1:
                    # print(
                    #     "Polygon ID = {} found multiple times in '{}' but needs to be unique. Numbering of shapefile attribute '{}' needs to be [0 ... {}-1].".format(
                    #         ishape, input_file, key_colname_model, nshapes
                    #     )
                    # )
                    raise ValueError("Polygon ID not unique.")
                idx = idx[0]
                poly = self._input_data.loc[idx].geometry
                grid_cell_geom_gpd_wkt[ishape][0] = ogr.CreateGeometryFromWkt(
                    wkt.dumps(poly)
                ).Buffer(
                    0.0
                )  # We add an empty buffer here to fix problems with bad polygon topology (actually caused by ESRI's historical incompetence)

        return grid_cell_geom_gpd_wkt

    def _create_gridcells_from_centers(self, lat, lon):
        # create array of edges where (x,y) are always center cells
        nlon = np.shape(lon)[1]
        nlat = np.shape(lat)[0]
        lonh = np.empty((nlat + 1, nlon + 1), dtype=np.float64)
        lath = np.empty((nlat + 1, nlon + 1), dtype=np.float64)
        tmp1 = [
            [(lat[ii + 1, jj + 1] - lat[ii, jj]) / 2 for jj in range(nlon - 1)]
            + [(lat[ii + 1, nlon - 1] - lat[ii, nlon - 2]) / 2]
            for ii in range(nlat - 1)
        ]
        tmp2 = [
            [(lon[ii + 1, jj + 1] - lon[ii, jj]) / 2 for jj in range(nlon - 1)]
            + [(lon[ii + 1, nlon - 1] - lon[ii, nlon - 2]) / 2]
            for ii in range(nlat - 1)
        ]
        dlat = np.array(tmp1 + [tmp1[-1]])
        dlon = np.array(tmp2 + [tmp2[-1]])
        lonh[0:nlat, 0:nlon] = lon - dlon
        lath[0:nlat, 0:nlon] = lat - dlat

        # make lat and lon one column and row wider such that all
        lonh[nlat, 0:nlon] = lonh[nlat - 1, 0:nlon] + (
            lonh[nlat - 1, 0:nlon] - lonh[nlat - 2, 0:nlon]
        )
        lath[nlat, 0:nlon] = lath[nlat - 1, 0:nlon] + (
            lath[nlat - 1, 0:nlon] - lath[nlat - 2, 0:nlon]
        )
        lonh[0:nlat, nlon] = lonh[0:nlat, nlon - 1] + (
            lonh[0:nlat, nlon - 1] - lonh[0:nlat, nlon - 2]
        )
        lath[0:nlat, nlon] = lath[0:nlat, nlon - 1] + (
            lath[0:nlat, nlon - 1] - lath[0:nlat, nlon - 2]
        )
        lonh[nlat, nlon] = lonh[nlat - 1, nlon - 1] + (
            lonh[nlat - 1, nlon - 1] - lonh[nlat - 2, nlon - 2]
        )
        lath[nlat, nlon] = lath[nlat - 1, nlon - 1] + (
            lath[nlat - 1, nlon - 1] - lath[nlat - 2, nlon - 2]
        )

        return [lath, lonh]

    def _shape_to_geometry(self, shape_from_jsonfile, epsg=None):
        # converts shape read from shapefile to geometry
        # epsg :: integer EPSG code

        ring_shape = ogr.Geometry(ogr.wkbLinearRing)

        for ii in shape_from_jsonfile:
            ring_shape.AddPoint_2D(ii[0], ii[1])
        # close ring
        ring_shape.AddPoint_2D(shape_from_jsonfile[0][0], shape_from_jsonfile[0][1])

        poly_shape = ogr.Geometry(ogr.wkbPolygon)
        poly_shape.AddGeometry(ring_shape)

        if epsg:
            source = osr.SpatialReference()
            # usual lat/lon projection
            source.ImportFromEPSG(GridWeightExtractor.CRS_LLDEG)

            target = osr.SpatialReference()
            target.ImportFromEPSG(epsg)  # any projection to convert to

            transform = osr.CoordinateTransformation(source, target)
            poly_shape.Transform(transform)

        return poly_shape

    @staticmethod
    def _check_proximity_of_envelops(gridcell_envelop, shape_envelop):
        # checks if two envelops are in proximity (intersect)

        # minX  --> env[0]
        # maxX  --> env[1]
        # minY  --> env[2]
        # maxY  --> env[3]

        return (
            gridcell_envelop[0] <= shape_envelop[1]
            and gridcell_envelop[1] >= shape_envelop[0]
            and gridcell_envelop[2] <= shape_envelop[3]
            and gridcell_envelop[3] >= shape_envelop[2]
        )

    @staticmethod
    def _check_gridcell_in_proximity_of_shape(gridcell_edges, shape_from_jsonfile):
        # checks if a grid cell falls into the bounding box of the shape
        # does not mean it intersects but it is a quick and cheap way to
        # determine cells that might intersect

        # gridcell_edges = [(lon1,lat1),(lon2,lat2),(lon3,lat3),(lon4,lat4)]
        # shape_from_jsonfile

        min_lat_cell = np.min([ii[1] for ii in gridcell_edges])
        max_lat_cell = np.max([ii[1] for ii in gridcell_edges])
        min_lon_cell = np.min([ii[0] for ii in gridcell_edges])
        max_lon_cell = np.max([ii[0] for ii in gridcell_edges])

        lat_shape = np.array(
            [icoord[1] for icoord in shape_from_jsonfile]
        )  # is it lat???
        lon_shape = np.array(
            [icoord[0] for icoord in shape_from_jsonfile]
        )  # is it lon???

        min_lat_shape = np.min(lat_shape)
        max_lat_shape = np.max(lat_shape)
        min_lon_shape = np.min(lon_shape)
        max_lon_shape = np.max(lon_shape)

        return (
            min_lat_cell <= max_lat_shape
            and max_lat_cell >= min_lat_shape
            and min_lon_cell <= max_lon_shape
            and max_lon_cell >= min_lon_shape
        )




[docs]
def upstream_from_id(
    fid: int, df: Union[pandas.DataFrame, geopandas.GeoDataFrame]
) -> Union[pandas.DataFrame, geopandas.GeoDataFrame]:
    """Return upstream sub-basins by evaluating the downstream networks.

    Parameters
    ----------
    fid : int
        feature ID of the downstream feature of interest.
    df : pandas.DataFrame or geopandas.GeoDataFrame
        A GeoDataframe comprising the watershed attributes.

    Returns
    -------
    pandas.DataFrame or geopandas.GeoDataFrame
        Basins ids including `fid` and its upstream contributors.
    """
    from ravenpy.utilities.geo import determine_upstream_ids

    return determine_upstream_ids(
        fid, df, basin_field="SubId", downstream_field="DowSubId"
    )




[docs]
def upstream_from_coords(
    lon: float, lat: float, df: Union[pandas.DataFrame, geopandas.GeoDataFrame]
) -> Union[pandas.DataFrame, geopandas.GeoDataFrame]:
    """Return the sub-basins located upstream from outlet.

    Parameters
    ----------
    lon : float
        Longitude of outlet.
    lat : float
        Latitude of outlet.
    df : pandas.DataFrame or geopandas.GeoDataFrame
        Routing product.

    Returns
    -------
    pandas.DataFrame or geopandas.GeoDataFrame
        Sub-basins located upstream from outlet.
    """
    from ravenpy.utilities.geo import find_geometry_from_coord

    # Find the outlet sub-basin ID
    out_sb = find_geometry_from_coord(lon, lat, df)
    out_sb_id = int(out_sb["SubId"])

    # Find upstream sub-basins
    up_ids = upstream_from_id(out_sb_id, df)

    return up_ids