Module `w4h.mapping`

The Mapping module contains the functions used for geospatial analysis throughout the package. This includes some input/output as well as functions to make manipulatin of geospatial data more simple

Expand source code

"""The Mapping module contains the functions used for geospatial analysis throughout the package.
This includes some input/output as well as functions to make manipulatin of geospatial data more simple
"""

import datetime
import inspect
import json
import pathlib
import os
import warnings

import rioxarray as rxr
import xarray as xr
import geopandas as gpd
import pandas as pd
import numpy as np
import pyproj
import shapely
from owslib.wcs import WebCoverageService
from shapely.geometry import Point
from urllib.request import urlopen
from rasterio import MemoryFile

import w4h

from w4h import logger_function, verbose_print

lidarURL = r'https://data.isgs.illinois.edu/arcgis/services/Elevation/IL_Statewide_Lidar_DEM_WGS/ImageServer/WCSServer?request=GetCapabilities&service=WCS'

#Read study area shapefile (or other file) into geopandas
def read_study_area(study_area=None, output_crs='EPSG:4269', buffer=None, return_original=False, log=False, verbose=False, **read_file_kwargs):
    """Read study area geospatial file into geopandas

    Parameters
    ----------
    study_area : str, pathlib.Path, geopandas.GeoDataFrame, or shapely.Geometry
        Filepath to any geospatial file readable by geopandas. 
        Polygon is best, but may work with other types if extent is correct.
    study_area_crs : str, tuple, dict, optional
        CRS designation readable by geopandas/pyproj
    buffer : None or numeric, default=None
        If None, no buffer created. If a numeric value is given (float or int, for example), a buffer will be created at that distance in the unit of the study_area_crs.
    return_original : bool, default=False
        Whether to return the (reprojected) study area as well as the (reprojected) buffered study area. Study area is only used for clipping data, so usually return_original=False is sufficient.
    log : bool, default = False
        Whether to log results to log file, by default False
    verbose : bool, default=False
        Whether to print status and results to terminal

    Returns
    -------
    studyAreaIN : geopandas dataframe
        Geopandas dataframe with polygon geometry.
    """
    
    if study_area is None:
        study_area = w4h.get_resources()['study_area']

    logger_function(log, locals(), inspect.currentframe().f_code.co_name)
    if verbose:
        verbose_print(read_study_area, locals())
    
    if isinstance(study_area, (gpd.GeoDataFrame, gpd.GeoSeries)):
        studyAreaIN = study_area
    elif isinstance(study_area, shapely.Geometry):
        if 'crs' in read_file_kwargs:
            crs = read_file_kwargs['crs']
        else:
            warnings.warn('A shapely Geometry object was read in as the study area, but no crs specified. Using CRS specified in output_crs: {output_crs}.\
                          You can specify the crs as a keyword argument in the read_study_area() function')
            crs = output_crs
        studyAreaIN = gpd.GeoDataFrame(index=[0], crs=crs, geometry=[study_area])
    else:
        studyAreaIN = gpd.read_file(study_area, **read_file_kwargs)
    studyAreaIN.to_crs(output_crs, inplace=True)

    # Create a buffered study area for clipping
    if buffer is not None:
        if return_original:
            studyAreaNoBuffer = studyAreaIN
        studyAreaIN = studyAreaIN.buffer(distance=buffer)
        
        if verbose:
            print('\tBuffer applied.')
    
    if verbose:
        print("\tStudy area read.")

    if return_original:
        return studyAreaIN, studyAreaNoBuffer
    return studyAreaIN

#Convert coords in columns to geometry in geopandas dataframe
def coords2geometry(df_no_geometry, xcol='LONGITUDE', ycol='LATITUDE', zcol='ELEV_FT', input_coords_crs='EPSG:4269', use_z=False, wkt_col='WKT', geometry_source='coords', verbose=False, log=False):
    """Adds geometry to points with xy coordinates in the specified coordinate reference system.

    Parameters
    ----------
    df_no_geometry : pandas.Dataframe
        a Pandas dataframe containing points
    xcol : str, default='LONGITUDE'
        Name of column holding x coordinate data in df_no_geometry
    ycol : str, default='LATITUDE'
        Name of column holding y coordinate data in df_no_geometry
    zcol : str, default='ELEV_FT'
        Name of column holding z coordinate data in df_no_geometry
    input_coords_crs : str, default='EPSG:4269
        Name of crs used for geometry
    use_z : bool, default=False
        Whether to use z column in calculation
    geometry_source : str {'coords', 'wkt', 'geometry'}
    log : bool, default = False
        Whether to log results to log file, by default False

    Returns
    -------
    gdf : geopandas.GeoDataFrame
        Geopandas dataframe with points and their geometry values

    """
    logger_function(log, locals(), inspect.currentframe().f_code.co_name)

    if verbose:
        verbose_print(coords2geometry, locals(), exclude_params=['df_no_geometry'])

    if isinstance(df_no_geometry, gpd.GeoDataFrame):
        gdf = df_no_geometry
    else:
        wktList = ['wkt', 'well known text', 'wellknowntext', 'well_known_text', 'w']
        coords_list = ['coords', 'coordinates', 'xycoords', 'xy', 'xyz', 'xyzcoords', 'xycoordinates', 'xyzcoordinates', 'c']
        geometryList = ['geometry', 'geom', 'geo', 'g']
        if geometry_source.lower() in wktList:
            from shapely import wkt
            df_no_geometry['geometry'] = df_no_geometry[wkt_col].apply(wkt.loads)
            df_no_geometry.drop(wkt_col, axis=1, inplace=True) #Drop WKT column

        elif geometry_source.lower() in coords_list:#coords = pd.concat([y, x], axis=1)
            x = df_no_geometry[xcol].to_numpy()
            y = df_no_geometry[ycol].to_numpy()
            if use_z:
                z = df_no_geometry[zcol].to_numpy()
                df_no_geometry["geometry"] = gpd.points_from_xy(x, y, z=z, crs=input_coords_crs)
            else:
                df_no_geometry["geometry"] = gpd.points_from_xy(x, y, crs=input_coords_crs)
        elif geometry_source.lower() in geometryList:
            pass
        else:
            warnings.warn(message=f"""The parameter geometry_source={geometry_source} is not recognized.
                        Should be one of 'coords' (if x, y (and/or z) columns with coordintes used), 
                        'wkt' (if column with wkt string used), or 
                        'geometry' (if column with shapely geometry objects used, as with a GeoDataFrame)""")
            
        gdf = gpd.GeoDataFrame(df_no_geometry, geometry='geometry', crs=input_coords_crs)
    return gdf

#Clip a geodataframe to a study area
def clip_gdf2study_area(study_area, gdf, log=False, verbose=False):
    """Clips dataframe to only include things within study area.

    Parameters
    ----------
    study_area : geopandas.GeoDataFrame
        Inputs study area polygon
    gdf : geopandas.GeoDataFrame
        Inputs point data
    log : bool, default = False
        Whether to log results to log file, by default False

    Returns
    -------
    gdfClip : geopandas.GeoDataFrame
        Contains only points within the study area
    
    """
    logger_function(log, locals(), inspect.currentframe().f_code.co_name)

    if verbose:
        verbose_print(clip_gdf2study_area, locals(), exclude_params=['study_area', 'gdf'])

    if study_area is None:
        return gdf
    else:
        studyArea_proj = study_area.to_crs(gdf.crs).copy()
        gdfClip = gpd.clip(gdf, studyArea_proj) #Easier to project just study area to ensure data fit
        gdfClip.reset_index(inplace=True, drop=True) #Reset index
    
    return gdfClip

#Function to sample raster points to points specified in geodataframe
def sample_raster_points(raster=None, points_df=None, well_id_col='API_NUMBER', xcol='LONGITUDE', ycol='LATITUDE', new_col='SAMPLED', verbose=True, log=False):  
    """Sample raster values to points from geopandas geodataframe.

    Parameters
    ----------
    raster : rioxarray data array
        Raster containing values to be sampled.
    points_df : geopandas.geodataframe
        Geopandas dataframe with geometry column containing point values to sample.
    well_id_col : str, default="API_NUMBER"
        Column that uniquely identifies each well so multiple sampling points are not taken per well
    xcol : str, default='LONGITUDE'
        Column containing name for x-column, by default 'LONGITUDE.'
        This is used to output (potentially) reprojected point coordinates so as not to overwrite the original.
    ycol : str, default='LATITUDE'
        Column containing name for y-column, by default 'LATITUDE.'
        This is used to output (potentially) reprojected point coordinates so as not to overwrite the original.    new_col : str, optional
    new_col : str, default='SAMPLED'
        Name for name of new column containing points sampled from the raster, by default 'SAMPLED'.
    verbose : bool, default=True
        Whether to send to print() information about progress of function, by default True.
    log : bool, default = False
        Whether to log results to log file, by default False

    Returns
    -------
    points_df : geopandas.geodataframe
        Same as points_df, but with sampled values and potentially with reprojected coordinates.
    """
    logger_function(log, locals(), inspect.currentframe().f_code.co_name)

    if raster is None:
        raster = w4h.get_resources()['surf_elev']
    if points_df is None:
        points_df = w4h.get_resources()['well_data']

    if verbose:
        verbose_print(sample_raster_points, locals(), exclude_params=['raster', 'points_df'])
        print(f"\tSampling {new_col} grid for at all well locations.")

    #Project points to raster CRS
    rastercrsWKT=raster.spatial_ref.crs_wkt
    if rastercrsWKT != points_df.crs:
        if verbose:
            print("\tTemporarily reprojecting raster data to point data's CRS.")
        pointCRS = points_df.crs.to_epsg()
        if pointCRS is None:
            pointCRS = points_df.crs.to_wkt()

        raster = raster.rio.reproject(pointCRS)
        #points_df = points_df.to_crs(rastercrsWKT)

    xCOLOUT = xcol+'_PROJ'
    yCOLOUT = ycol+'_PROJ'
    points_df[xCOLOUT] = points_df['geometry'].x
    points_df[yCOLOUT] = points_df['geometry'].y
    xData = np.array(points_df[xCOLOUT].values)
    yData = np.array(points_df[yCOLOUT].values)
    zData = []
    zID = []
    zInd = []

    # Get unique well values to reduce sampling time
    uniqueWells = points_df.drop_duplicates(subset=[well_id_col])
    if verbose:
        print(f"\t{uniqueWells.shape[0]} unique wells idenfied using {well_id_col} column")
    
    # Loop over DataFrame rows
    for i, row in uniqueWells.iterrows():
        # Select data from DataArray at current coordinates and append to list
        zInd.append(i)
        zData.append([row[well_id_col], raster.sel(x=row[xCOLOUT], y=row[yCOLOUT], method='nearest').item()])
    
    inputtype = points_df.dtypes[well_id_col]
    wellZDF = pd.DataFrame(zData, columns=[well_id_col, new_col], index=pd.Index(zInd))

    # Merge each unique well's data with all well intervals
    wellZDF[well_id_col].astype(inputtype, copy=False)
    points_df = points_df.merge(wellZDF, how='left', on=well_id_col)
    #points_df[new_col] = zData#sampleDF[new_col]
    return points_df

#Merge xyz dataframe into a metadata dataframe
def xyz_metadata_merge(xyz, metadata, verbose=False, log=False):
    """Add elevation to header data file.

    Parameters
    ----------
    xyz : pandas.Dataframe
        Contains elevation for the points
    metadata : pandas dataframe
        Header data file
    log : bool, default = False
        Whether to log results to log file, by default False

    Returns
    -------
    headerXYZData : pandas.Dataframe
        Header dataset merged to get elevation values

    """
    logger_function(log, locals(), inspect.currentframe().f_code.co_name)
    if verbose:
        verbose_print(xyz_metadata_merge, locals(), exclude_params=['xyz'])
    headerXYZData = metadata.merge(xyz, how='left', on='API_NUMBER')
    headerXYZData.drop(['LATITUDE_x', 'LONGITUDE_x'], axis=1, inplace=True)
    headerXYZData.rename({'LATITUDE_y':'LATITUDE', 'LONGITUDE_y':'LONGITUDE'}, axis=1, inplace=True)
    return headerXYZData

#Read wcsservice into rioxarray
def read_wcs(study_area, wcs_url=lidarURL, res_x=30, res_y=30, verbose=False, log=False, **kwargs):
    """Reads a WebCoverageService from a url and returns a rioxarray dataset containing it.

    Parameters
    ----------
    study_area : geopandas.GeoDataFrame
        Dataframe containing study area polygon
    wcs_url : str, default=lidarURL
    Represents the url for the WCS
    res_x : int, default=30
        Sets resolution for x axis
    res_y : int, default=30
        Sets resolution for y axis
    log : bool, default = False
        Whether to log results to log file, by default False
    **kwargs

    Returns
    -------
    wcsData_rxr : xarray.DataArray
        A xarray dataarray holding the image from the WebCoverageService
    """
    #Drawn largely from: https://git.wur.nl/isric/soilgrids/soilgrids.notebooks/-/blob/master/01-WCS-basics.ipynb
    
    #30m DEM
    #wcs_url = r'https://data.isgs.illinois.edu/arcgis/services/Elevation/IL_DEM_30M/ImageServer/WCSServer?request=GetCapabilities&service=WCS'
    #lidar url:
    #lidarURL = r'https://data.isgs.illinois.edu/arcgis/services/Elevation/IL_Statewide_Lidar_DEM_WGS/ImageServer/WCSServer?request=GetCapabilities&service=WCS'

    #studyAreaPath = r"\\isgs-sinkhole.ad.uillinois.edu\geophysics\Balikian\ISWS_HydroGeo\WellDataAutoClassification\SampleData\ESL_StudyArea_5mi.shp"
    #study_area = gpd.read_file(studyAreaPath)
    if study_area is None:
        print('ERROR: study_area must be specified to use read_wcs (currently set to {})'.format(study_area))
        return

    logger_function(log, locals(), inspect.currentframe().f_code.co_name)

    if verbose:
        verbose_print(read_wcs, locals(), exclude_params=['study_area'])      

    if 'wcs_url' in kwargs:
        wcs_url = kwargs['wcs_url']
    if 'res_x' in kwargs:
        res_x = kwargs['res_x']
    if 'res_y' in kwargs:
        res_y = kwargs['res_y']
    
    width_in = ''
    height_in= ''

    #Create coverage object
    my_wcs = WebCoverageService(wcs_url, version='1.0.0') 
    #names = [k for k in my_wcs.contents.keys()]
    #print(names)
    dataID = 'IL_Statewide_Lidar_DEM'
    data = my_wcs.contents[dataID]
    dBBox = data.boundingboxes #Is this an error?
    
    study_area = study_area.to_crs(data.boundingboxes[0]['nativeSrs'])
    saBBox = study_area.total_bounds
    
    #In case study area bounding box goes outside data bounding box, use data bounding box values
    newBBox = []
    for i,c in enumerate(dBBox[0]['bbox']):
        if i == 0 or i==2:
            if saBBox[i] < c:
                newBBox.append(saBBox[i])
            else:
                newBBox.append(c)
        else:
            if saBBox[i] > c:
                newBBox.append(saBBox[i])
            else:
                newBBox.append(c)

    #Recalculate resolution if it is too fine to read in
    #Start by getting the area of the study area bounding box
    saWidth = saBBox[2]-saBBox[0]
    saHeight = saBBox[3]-saBBox[1]
    saBBoxAreaM = saWidth*saHeight
    saBBoxAreaKM = saBBoxAreaM/(1000*1000) #Area in km^2

    if saBBoxAreaM/(res_x*res_y) > (4100*15000)*0.457194: #What I think might be the max download size?
        print("Resolution inputs overriden, file request too large.")
        res_x=str(round(saWidth/2500, 2))

        width_in  = str(int(saWidth/float(res_x )))
        height_in = str(int(saHeight/float(res_x)))
        
        res_y=str(round(saHeight/height_in, 2))

        print('New resolution is: '+res_x+'m_x X '+res_y+'m_y' )
        print('Dataset size: '+width_in+' pixels_x X '+height_in+' pixels_y')

    bBox = tuple(newBBox)
    bBox_str = str(tuple(newBBox)[1:-1]).replace(' ','')
    dataCRS = 'EPSG:3857'

    #Format WCS request using owslib
    response = my_wcs.getCoverage(
        identifier=my_wcs[dataID].id, 
        crs=dataCRS,#'urn:ogc:def:crs:EPSG::26716',
        bbox=bBox,
        resx=res_x, 
        resy=res_y,
        timeout=60,
        #width = width_in, height=height_in,
        format='GeoTIFF')
    response

    #If I can figure out url, this might be better?
    #baseURL = r'https://data.isgs.illinois.edu/arcgis/services/Elevation/'+dataID+'/ImageServer/WCSServer'
    #addonRequestURL = '?request=GetCoverage&service=WCS&bbox='+bBox_str+'&srs='+dataCRS+'&format=GeoTIFF'+'&WIDTH='+width_in+'&HEIGHT='+height_in+')'
    #reqURL = baseURL+addonRequestURL
    #wcsData_rxr =  rxr.open_rasterio(reqURL)

    with MemoryFile(response) as memfile:
        with memfile.open() as dataset:
            wcsData_rxr =  rxr.open_rasterio(dataset)

    return wcsData_rxr

#Read wms service into rioxarray
def read_wms(study_area, layer_name='IL_Statewide_Lidar_DEM_WGS:None', wms_url=lidarURL, srs='EPSG:3857', 
             clip_to_studyarea=True, bbox=[-9889002.615500,5134541.069716,-9737541.607038,5239029.627400],
             res_x=30, res_y=30, size_x=512, size_y=512, 
             format='image/tiff', verbose=False, log=False, **kwargs):
    """
    Reads a WebMapService from a url and returns a rioxarray dataset containing it.

    Parameters
    ----------
    study_area : geopandas.GeoDataFrame
        Dataframe containg study area polygon
    layer_name : str, default='IL_Statewide_Lidar_DEM_WGS:None'
        Represents the layer name in the WMS
    wms_url : str, default=lidarURL
        Represents the url for the WMS
    srs : str, default='EPSG:3857'
        Sets the srs
    clip_to_studyarea : bool, default=True
        Whether to clip to study area or not
    res_x : int, default=30
        Sets resolution for x axis
    res_y : int, default=512
        Sets resolution for y axis
    size_x : int, default=512
        Sets width of result
    size_y : int, default=512
        Sets height of result
    log : bool, default = False
        Whether to log results to log file, by default False

    Returns
    -------
    wmsData_rxr : xarray.DataArray
        Holds the image from the WebMapService
    """
    if study_area is None:
        print('ERROR: study_area must be specified to use read_wms (currently set to {})'.format(study_area))
        return
    
    logger_function(log, locals(), inspect.currentframe().f_code.co_name)
    if verbose:
        verbose_print(read_wms, locals(), exclude_params=['study_area'])
    from owslib.wms import WebMapService
    # Define WMS endpoint URL
    if 'wms_url' in kwargs:
        wms_url = kwargs['wms_url']
    else:
        wms_url = wms_url

    # Create WMS connection object
    wms = WebMapService(wms_url)
    # Print available layers
    #print(wms.contents)

    # Select desired layer
    if 'layer_name' in kwargs:
        layer = kwargs['layer_name']
    else:
        layer = layer_name
    
    data = wms.contents#[layer]
    if 'srs' in kwargs:
        studyArea_proj = study_area.to_crs(kwargs['srs'])
        saBBox = studyArea_proj.total_bounds
    else:
        studyArea_proj = study_area.to_crs(srs)
    
    saBBox = studyArea_proj.total_bounds

    if layer == 'IL_Statewide_Lidar_DEM_WGS:None':
        dBBox = data['0'].boundingBox #Is this an error?

        gpdDict = {'Label': ['Surf Data Box'], 'geometry': [shapely.geometry.Polygon(((dBBox[0], dBBox[1]), (dBBox[0], dBBox[3]), (dBBox[2], dBBox[3]), (dBBox[2], dBBox[1]), (dBBox[0], dBBox[1])))]}
        dBBoxGDF = gpd.GeoDataFrame(gpdDict, crs=dBBox[4])
        dBBoxGDF.to_crs(srs)

        #In case study area bounding box goes outside data bounding box, use data bounding box values
        newBBox = []
        for i,c in enumerate(dBBox):
            if type(c) is str:
                pass
            elif i == 0 or i==2:
                if saBBox[i] < c:
                    newBBox.append(saBBox[i])
                else:
                    newBBox.append(c)
            else:
                if saBBox[i] > c:
                    newBBox.append(saBBox[i])
                else:
                    newBBox.append(c)

    saWidth = saBBox[2]-saBBox[0]
    saHeight = saBBox[3]-saBBox[1]    
    # Check kwargs for rest of parameters
    if 'size_x' in kwargs:
        size_x = kwargs['size_x']
    if 'size_y' in kwargs:
        size_y = kwargs['size_y']
    if 'format' in kwargs:
        format = kwargs['format']
    if 'clip_to_studyarea' in kwargs:
        clip_to_studyarea = kwargs['clip_to_studyarea']
   
    #get the wms
    if clip_to_studyarea:
        img = wms.getmap(layers=[layer], srs=srs, bbox=saBBox, size=(size_x, size_y), format=format, transparent=True, timeout=60)        
    else:
        img = wms.getmap(layers=[layer], srs=srs, bbox=bbox, size=(size_x, size_y), format=format, transparent=True, timeout=60)

    #Save wms in memory to a raster dataset
    with MemoryFile(img) as memfile:
        with memfile.open() as dataset:
            wmsData_rxr = rxr.open_rasterio(dataset)

    #if clip_to_studyarea:
    #    wmsData_rxr = wmsData_rxr.sel(x=slice(saBBox[0], saBBox[2]), y=slice(saBBox[3], saBBox[1]))#.sel(band=1)

    return wmsData_rxr

#Clip a grid to a study area
def grid2study_area(study_area, grid, output_crs='EPSG:4269',verbose=False, log=False):
    """Clips grid to study area.

    Parameters
    ----------
    study_area : geopandas.GeoDataFrame
        inputs study area polygon
    grid : xarray.DataArray
        inputs grid array
    output_crs : str, default='EPSG:4269'
        inputs the coordinate reference system for the study area
    log : bool, default = False
        Whether to log results to log file, by default False

    Returns
    -------
    grid : xarray.DataArray
        returns xarray containing grid clipped only to area within study area

    """
    logger_function(log, locals(), inspect.currentframe().f_code.co_name)
    if verbose:
        verbose_print(grid2study_area, locals(), exclude_params=['study_area', 'grid'])
    
    if output_crs=='':
        output_crs=study_area.crs

    #Get input CRS's
    grid_crs = pyproj.CRS.from_wkt(grid.rio.crs.to_wkt())
    study_area_crs = study_area.crs
    
    # Reproject if needed
    if output_crs != grid_crs:
        grid = grid.rio.reproject(output_crs)
    grid_crs = pyproj.CRS.from_wkt(grid.rio.crs.to_wkt())

    if grid_crs != study_area_crs:
        study_area = study_area.to_crs(grid_crs)   

    # We'll just clip to outer bounds of study area
    saExtent = study_area.total_bounds

    if grid['y'][-1].values - grid['y'][0].values > 0:
        miny=saExtent[1]
        maxy=saExtent[3]
    else:
        miny=saExtent[3]
        maxy=saExtent[1]        
        
    if grid['x'][-1].values - grid['x'][0].values > 0:
        minx=saExtent[0]
        maxx=saExtent[2]
    else:
        minx=saExtent[2]
        maxx=saExtent[0]
    
    # "Clip" it
    grid = grid.sel(x=slice(minx, maxx), y=slice(miny, maxy))
    if 'band' in grid.dims:
        grid = grid.sel(band=1)

    return grid

#Read the model grid into (rio)xarray
def read_model_grid(model_grid_path, study_area=None, no_data_val_grid=0, read_grid=True, node_byspace=True, grid_crs=None, output_crs='EPSG:4269', verbose=False, log=False):
    """Reads in model grid to xarray data array

    Parameters
    ----------
    grid_path : str
        Path to model grid file
    study_area : geopandas.GeoDataFrame, default=None
        Dataframe containing study area polygon
    no_data_val_grid : int, default=0
        value assigned to areas with no data
    readGrid : bool, default=True
        Whether function to either read grid or create grid
    node_byspace : bool, default=False
        Denotes how to create grid
    output_crs : str, default='EPSG:4269'
        Inputs study area crs
    grid_crs : str, default=None
        Inputs grid crs
    log : bool, default = False
        Whether to log results to log file, by default False

    Returns
    -------
    modelGrid : xarray.DataArray
        Data array containing model grid
    """
    logger_function(log, locals(), inspect.currentframe().f_code.co_name)
    if verbose:
        verbose_print(read_model_grid, locals(), exclude_params='study_area')
    
    if read_grid and model_grid_path is not None:
        modelGridIN = rxr.open_rasterio(model_grid_path)

        if grid_crs is None:
            try:
                grid_crs=modelGridIN.spatial_ref.crs_wkt
            except Exception:
                iswsCRSPath = w4h.get_resources()['ISWS_CRS']
                iswsCRS = w4h.read_dict(iswsCRSPath, keytype=None)
                grid_crs = iswsCRS              
                modelGridIN.rio.write_crs(grid_crs)
        elif isinstance(grid_crs, str) and grid_crs.lower()=='isws':
            iswsCRSPath = w4h.get_resources()['ISWS_CRS']
            iswsCRS = w4h.read_dict(iswsCRSPath, keytype=None)            
            grid_crs = iswsCRS              
            modelGridIN.rio.write_crs(grid_crs)
        elif isinstance(grid_crs, pathlib.PurePath) or (isinstance(grid_crs, str) and pathlib.Path(grid_crs).exists()):
            iswsCRSPath = w4h.get_resources()['ISWS_CRS']
            grid_crs = w4h.read_dict(iswsCRSPath, keytype=None)            
            modelGridIN.rio.write_crs(grid_crs)
        else:
            warnings.warn(f'CRS Specification for grid is {grid_crs}, but this cannot be written to the grid')
        
        modelGridIN = modelGridIN.rio.reproject(output_crs) 
           
        if study_area is not None:                
            study_area = study_area.to_crs(output_crs)
            modelGrid = grid2study_area(study_area=study_area, grid=modelGridIN, output_crs=output_crs)
        else:
            modelGrid = modelGridIN

        try:
            noDataVal = float(modelGrid.attrs['_FillValue']) #Extract from dataset itsel
        except:
            noDataVal = no_data_val_grid

        modelGrid = modelGrid.where(modelGrid != noDataVal, other=np.nan)   #Replace no data values with NaNs
        modelGrid = modelGrid.where(modelGrid == np.nan, other=1) #Replace all other values with 1
        modelGrid.rio.reproject(grid_crs, inplace=True)     
    elif model_grid_path is None and study_area is None:
        if verbose:
            print("ERROR: Either model_grid_path or study_area must be defined.")
    else:
        spatRefDict = w4h.read_dict(iswsCRSPath, keytype=None)            

        saExtent = study_area.total_bounds

        startX = saExtent[0] #Starting X Coordinate
        startY = saExtent[1] #starting Y Coordinate
        
        endX = saExtent[2]
        endY = saExtent[3]
        
        if node_byspace:
            xSpacing = 625 #X Node spacing 
            ySpacing = xSpacing #Y Node spacing  
            
            x = np.arange(startX, endX, xSpacing)
            y = np.arange(startY, endY, ySpacing)
        else:
            xNodes = 100 #Number of X Nodes
            yNodes = 100 #Number of Y Nodes

            x = np.linspace(startX, endX, num=xNodes)
            y = np.linspace(startY, endY, num=yNodes)        
        
        xx, yy = np.meshgrid(x, y)
        zz = np.ones_like(xx).transpose()

        yIn = np.flipud(y)

        coords = {'x':x,'y':yIn, 'spatial_ref':0}
        dims = {'x':x,'y':yIn}
        
        modelGrid = xr.DataArray(data=zz,coords=coords,attrs={'_FillValue':no_data_val_grid}, dims=dims)
        modelGrid.spatial_ref.attrs['spatial_ref'] = {}
        if grid_crs is None or grid_crs=='isws' or grid_crs=='ISWS':
            for k in spatRefDict:
                modelGrid.spatial_ref.attrs[k] = spatRefDict[k]
    return modelGrid

#Read a grid from a file in using rioxarray
def read_grid(grid_path=None, grid_type='model', no_data_val_grid=0, use_service=False, study_area=None,  grid_crs=None, output_crs='EPSG:4269', verbose=False, log=False, **kwargs):
    """Reads in grid

    Parameters
    ----------
    grid_path : str or pathlib.Path, default=None
        Path to a grid file
    grid_type : str, default='model'
        Sets what type of grid to load in
    no_data_val_grid : int, default=0
        Sets the no data value of the grid
    use_service : str, default=False
        Sets which service the function uses
    study_area : geopandas.GeoDataFrame, default=None
        Dataframe containing study area polygon
    grid_crs : str, default=None
        Sets crs to use if clipping to study area
    log : bool, default = False
        Whether to log results to log file, by default False

    Returns
    -------
    gridIN : xarray.DataArray
        Returns grid
    
    """
    logger_function(log, locals(), inspect.currentframe().f_code.co_name)
    if verbose:
        verbose_print(read_grid, locals(), exclude_params=['study_area'])
        
    if grid_type=='model':
        if 'read_grid' in list(kwargs.keys()):
            rgrid = kwargs['read_grid']
        else:
            rgrid=True
        gridIN = read_model_grid(model_grid_path=grid_path, study_area=study_area,  no_data_val_grid=0, read_grid=rgrid, grid_crs=grid_crs, output_crs=output_crs, verbose=verbose)
    else:
        if use_service==False:
            gridIN = rxr.open_rasterio(grid_path)
        elif use_service.lower()=='wcs':
            gridIN = read_wcs(study_area, wcs_url=lidarURL, **kwargs)
        elif use_service.lower()=='wms':
            gridIN = read_wms(study_area, wcs_url=lidarURL, **kwargs)
        elif use_service:
            #Deafults to wms
            gridIN = read_wms(study_area, wcs_url=lidarURL, **kwargs)
        
        if study_area is not None:
            study_area_crs = study_area.crs
            if grid_crs is None:
                try:
                    grid_crs = pyproj.CRS.from_wkt(gridIN.rio.crs.to_wkt())
                except Exception:
                    iswsCRS = pyproj.CRS.from_json_dict(w4h.read_dict(w4h.get_resources()['ISWS_CRS']))
                    gridIN.rio.write_crs(iswsCRS)
            elif grid_crs.lower()=='isws':
                iswsCRS = pyproj.CRS.from_json_dict(w4h.read_dict(w4h.get_resources()['ISWS_CRS']))
                gridIN.rio.write_crs(iswsCRS)
                        
            if study_area_crs is None:
                study_area_crs=study_area.crs
            study_area = study_area.to_crs(output_crs)
            study_area_crs=study_area.crs
            
            gridIN = gridIN.rio.reproject(output_crs)
            gridIN = grid2study_area(study_area=study_area, grid=gridIN, output_crs=output_crs,)
        else:
            gridIN = gridIN.rio.reproject(output_crs)
            if 'band' in gridIN.dims:
                gridIN = gridIN.sel(band=1)

        try:
            no_data_val_grid = gridIN.attrs['_FillValue'] #Extract from dataset itself
        except:
            pass
                
        gridIN = gridIN.where(gridIN != no_data_val_grid, other=np.nan)  #Replace no data values with NaNs

    return gridIN

#Align and coregister rasters
def align_rasters(grids_unaligned=None, model_grid=None,
                  no_data_val_grid=0, verbose=False, log=False):
    """Reprojects two rasters and aligns their pixels

    Parameters
    ----------
    grids_unaligned : list or xarray.DataArray
        Contains a list of grids or one unaligned grid
    model_grid : xarray.DataArray
        Contains model grid
    no_data_val_grid : int, default=0
        Sets value of no data pixels
    log : bool, default = False
        Whether to log results to log file, by default False

    Returns
    -------
    alignedGrids : list or xarray.DataArray
        Contains aligned grids
    """
    
    if grids_unaligned is None:
        grids_unaligned = [w4h.get_resources()['surf_elev'], w4h.get_resources()['bedrock_elev']]
    if model_grid is None:
        model_grid = w4h.get_resources()['model_grid']

    logger_function(log, locals(), inspect.currentframe().f_code.co_name)
    if verbose:
        verbose_print(align_rasters, locals(), exclude_params=['grids_unaligned', 'model_grid'])
    if isinstance(grids_unaligned, (tuple, list)):
        alignedGrids = []
        for g in grids_unaligned:
            alignedGrid = g.rio.reproject_match(model_grid)

            try:
                no_data_val_grid = alignedGrid.attrs['_FillValue'] #Extract from dataset itself
            except:
                pass
            alignedGrid = alignedGrid.where(alignedGrid != no_data_val_grid)  #Replace no data values with NaNs
            alignedGrids.append(alignedGrid)
    else:
        alignedGrid = grids_unaligned.rio.reproject_match(model_grid)

        try:
            noDataVal = alignedGrid.attrs['_FillValue'] #Extract from dataset itself
        except:
            pass

        alignedGrids = alignedGrid.where(alignedGrid != noDataVal, other=np.nan)  #Replace no data values with NaNs
        
    return alignedGrids

#Get drift and layer thickness, given a surface and bedrock grid
def get_drift_thick(surface_elev=None, bedrock_elev=None, layers=9, plot=False, verbose=False, log=False):
    """Finds the distance from surface_elev to bedrock_elev and then divides by number of layers to get layer thickness.

    Parameters
    ----------
    surface_elev : rioxarray.DataArray
        array holding surface elevation
    bedrock_elev : rioxarray.DataArray
        array holding bedrock elevation
    layers : int, default=9
        number of layers needed to calculate thickness for
    plot : bool, default=False
        tells function to either plot the data or not

    Returns
    -------
    driftThick : rioxarray.DataArray
        Contains data array containing depth to bedrock at each point
    layerThick : rioxarray.DataArray
        Contains data array with layer thickness at each point

    """
    
    if surface_elev is None:
        surface_elev = w4h.get_resources()['surf_elev']
    if bedrock_elev is None:
        bedrock_elev = w4h.get_resources()['bedrock_elev']
    
    logger_function(log, locals(), inspect.currentframe().f_code.co_name)
    if verbose:
        verbose_print(get_drift_thick, locals(), exclude_params=['surface_elev', 'bedrock_elev'])
    xr.set_options(keep_attrs=True)

    driftThick = surface_elev - bedrock_elev
    driftThick = driftThick.clip(0,max=5000,keep_attrs=True)
    if plot:
        import matplotlib.pyplot as plt
        fig, ax = plt.subplots()
        maxThick = np.nanmax(driftThick)
        if maxThick > 550:
            maxThick = 550
        dtPlot = driftThick.plot(vmin=0, vmax=maxThick, ax=ax)
        ax.set_title("Drift Thickness")
    try:
        noDataVal = driftThick.attrs['_FillValue'] #Extract from dataset itself
    except:
        noDataVal = 100001
    
    driftThick = driftThick.where(driftThick <100000, other=np.nan)  #Replace no data values with NaNs
    driftThick = driftThick.where(driftThick >-100000, other=np.nan)  #Replace no data values with NaNs

    layerThick = driftThick/layers
    
    xr.set_options(keep_attrs='default')

    return driftThick, layerThick

Functions

def align_rasters(grids_unaligned=None, model_grid=None, no_data_val_grid=0, verbose=False, log=False)

Reprojects two rasters and aligns their pixels

Parameters

grids_unaligned : list or xarray.DataArray: Contains a list of grids or one unaligned grid
model_grid : xarray.DataArray: Contains model grid
no_data_val_grid : int, default=0: Sets value of no data pixels
log : bool, default = False: Whether to log results to log file, by default False

Returns

alignedGrids : list or xarray.DataArray: Contains aligned grids

Expand source code

def align_rasters(grids_unaligned=None, model_grid=None,
                  no_data_val_grid=0, verbose=False, log=False):
    """Reprojects two rasters and aligns their pixels

    Parameters
    ----------
    grids_unaligned : list or xarray.DataArray
        Contains a list of grids or one unaligned grid
    model_grid : xarray.DataArray
        Contains model grid
    no_data_val_grid : int, default=0
        Sets value of no data pixels
    log : bool, default = False
        Whether to log results to log file, by default False

    Returns
    -------
    alignedGrids : list or xarray.DataArray
        Contains aligned grids
    """
    
    if grids_unaligned is None:
        grids_unaligned = [w4h.get_resources()['surf_elev'], w4h.get_resources()['bedrock_elev']]
    if model_grid is None:
        model_grid = w4h.get_resources()['model_grid']

    logger_function(log, locals(), inspect.currentframe().f_code.co_name)
    if verbose:
        verbose_print(align_rasters, locals(), exclude_params=['grids_unaligned', 'model_grid'])
    if isinstance(grids_unaligned, (tuple, list)):
        alignedGrids = []
        for g in grids_unaligned:
            alignedGrid = g.rio.reproject_match(model_grid)

            try:
                no_data_val_grid = alignedGrid.attrs['_FillValue'] #Extract from dataset itself
            except:
                pass
            alignedGrid = alignedGrid.where(alignedGrid != no_data_val_grid)  #Replace no data values with NaNs
            alignedGrids.append(alignedGrid)
    else:
        alignedGrid = grids_unaligned.rio.reproject_match(model_grid)

        try:
            noDataVal = alignedGrid.attrs['_FillValue'] #Extract from dataset itself
        except:
            pass

        alignedGrids = alignedGrid.where(alignedGrid != noDataVal, other=np.nan)  #Replace no data values with NaNs
        
    return alignedGrids

def clip_gdf2study_area(study_area, gdf, log=False, verbose=False)

Clips dataframe to only include things within study area.

Parameters

study_area : geopandas.GeoDataFrame: Inputs study area polygon
gdf : geopandas.GeoDataFrame: Inputs point data
log : bool, default = False: Whether to log results to log file, by default False

Returns

gdfClip : geopandas.GeoDataFrame: Contains only points within the study area

Expand source code

def clip_gdf2study_area(study_area, gdf, log=False, verbose=False):
    """Clips dataframe to only include things within study area.

    Parameters
    ----------
    study_area : geopandas.GeoDataFrame
        Inputs study area polygon
    gdf : geopandas.GeoDataFrame
        Inputs point data
    log : bool, default = False
        Whether to log results to log file, by default False

    Returns
    -------
    gdfClip : geopandas.GeoDataFrame
        Contains only points within the study area
    
    """
    logger_function(log, locals(), inspect.currentframe().f_code.co_name)

    if verbose:
        verbose_print(clip_gdf2study_area, locals(), exclude_params=['study_area', 'gdf'])

    if study_area is None:
        return gdf
    else:
        studyArea_proj = study_area.to_crs(gdf.crs).copy()
        gdfClip = gpd.clip(gdf, studyArea_proj) #Easier to project just study area to ensure data fit
        gdfClip.reset_index(inplace=True, drop=True) #Reset index
    
    return gdfClip

def coords2geometry(df_no_geometry, xcol='LONGITUDE', ycol='LATITUDE', zcol='ELEV_FT', input_coords_crs='EPSG:4269', use_z=False, wkt_col='WKT', geometry_source='coords', verbose=False, log=False)

Adds geometry to points with xy coordinates in the specified coordinate reference system.

Parameters

df_no_geometry : pandas.Dataframe: a Pandas dataframe containing points
xcol : str, default='LONGITUDE': Name of column holding x coordinate data in df_no_geometry
ycol : str, default='LATITUDE': Name of column holding y coordinate data in df_no_geometry
zcol : str, default='ELEV_FT': Name of column holding z coordinate data in df_no_geometry
input_coords_crs : str, default='EPSG:4269: Name of crs used for geometry
use_z : bool, default=False: Whether to use z column in calculation
geometry_source : str {'coords', 'wkt', 'geometry'}
log : bool, default = False: Whether to log results to log file, by default False

Returns

gdf : geopandas.GeoDataFrame: Geopandas dataframe with points and their geometry values

Expand source code

def coords2geometry(df_no_geometry, xcol='LONGITUDE', ycol='LATITUDE', zcol='ELEV_FT', input_coords_crs='EPSG:4269', use_z=False, wkt_col='WKT', geometry_source='coords', verbose=False, log=False):
    """Adds geometry to points with xy coordinates in the specified coordinate reference system.

    Parameters
    ----------
    df_no_geometry : pandas.Dataframe
        a Pandas dataframe containing points
    xcol : str, default='LONGITUDE'
        Name of column holding x coordinate data in df_no_geometry
    ycol : str, default='LATITUDE'
        Name of column holding y coordinate data in df_no_geometry
    zcol : str, default='ELEV_FT'
        Name of column holding z coordinate data in df_no_geometry
    input_coords_crs : str, default='EPSG:4269
        Name of crs used for geometry
    use_z : bool, default=False
        Whether to use z column in calculation
    geometry_source : str {'coords', 'wkt', 'geometry'}
    log : bool, default = False
        Whether to log results to log file, by default False

    Returns
    -------
    gdf : geopandas.GeoDataFrame
        Geopandas dataframe with points and their geometry values

    """
    logger_function(log, locals(), inspect.currentframe().f_code.co_name)

    if verbose:
        verbose_print(coords2geometry, locals(), exclude_params=['df_no_geometry'])

    if isinstance(df_no_geometry, gpd.GeoDataFrame):
        gdf = df_no_geometry
    else:
        wktList = ['wkt', 'well known text', 'wellknowntext', 'well_known_text', 'w']
        coords_list = ['coords', 'coordinates', 'xycoords', 'xy', 'xyz', 'xyzcoords', 'xycoordinates', 'xyzcoordinates', 'c']
        geometryList = ['geometry', 'geom', 'geo', 'g']
        if geometry_source.lower() in wktList:
            from shapely import wkt
            df_no_geometry['geometry'] = df_no_geometry[wkt_col].apply(wkt.loads)
            df_no_geometry.drop(wkt_col, axis=1, inplace=True) #Drop WKT column

        elif geometry_source.lower() in coords_list:#coords = pd.concat([y, x], axis=1)
            x = df_no_geometry[xcol].to_numpy()
            y = df_no_geometry[ycol].to_numpy()
            if use_z:
                z = df_no_geometry[zcol].to_numpy()
                df_no_geometry["geometry"] = gpd.points_from_xy(x, y, z=z, crs=input_coords_crs)
            else:
                df_no_geometry["geometry"] = gpd.points_from_xy(x, y, crs=input_coords_crs)
        elif geometry_source.lower() in geometryList:
            pass
        else:
            warnings.warn(message=f"""The parameter geometry_source={geometry_source} is not recognized.
                        Should be one of 'coords' (if x, y (and/or z) columns with coordintes used), 
                        'wkt' (if column with wkt string used), or 
                        'geometry' (if column with shapely geometry objects used, as with a GeoDataFrame)""")
            
        gdf = gpd.GeoDataFrame(df_no_geometry, geometry='geometry', crs=input_coords_crs)
    return gdf

def get_drift_thick(surface_elev=None, bedrock_elev=None, layers=9, plot=False, verbose=False, log=False)

Finds the distance from surface_elev to bedrock_elev and then divides by number of layers to get layer thickness.

Parameters

surface_elev : rioxarray.DataArray: array holding surface elevation
bedrock_elev : rioxarray.DataArray: array holding bedrock elevation
layers : int, default=9: number of layers needed to calculate thickness for
plot : bool, default=False: tells function to either plot the data or not

Returns

driftThick : rioxarray.DataArray: Contains data array containing depth to bedrock at each point
layerThick : rioxarray.DataArray: Contains data array with layer thickness at each point

Expand source code

def get_drift_thick(surface_elev=None, bedrock_elev=None, layers=9, plot=False, verbose=False, log=False):
    """Finds the distance from surface_elev to bedrock_elev and then divides by number of layers to get layer thickness.

    Parameters
    ----------
    surface_elev : rioxarray.DataArray
        array holding surface elevation
    bedrock_elev : rioxarray.DataArray
        array holding bedrock elevation
    layers : int, default=9
        number of layers needed to calculate thickness for
    plot : bool, default=False
        tells function to either plot the data or not

    Returns
    -------
    driftThick : rioxarray.DataArray
        Contains data array containing depth to bedrock at each point
    layerThick : rioxarray.DataArray
        Contains data array with layer thickness at each point

    """
    
    if surface_elev is None:
        surface_elev = w4h.get_resources()['surf_elev']
    if bedrock_elev is None:
        bedrock_elev = w4h.get_resources()['bedrock_elev']
    
    logger_function(log, locals(), inspect.currentframe().f_code.co_name)
    if verbose:
        verbose_print(get_drift_thick, locals(), exclude_params=['surface_elev', 'bedrock_elev'])
    xr.set_options(keep_attrs=True)

    driftThick = surface_elev - bedrock_elev
    driftThick = driftThick.clip(0,max=5000,keep_attrs=True)
    if plot:
        import matplotlib.pyplot as plt
        fig, ax = plt.subplots()
        maxThick = np.nanmax(driftThick)
        if maxThick > 550:
            maxThick = 550
        dtPlot = driftThick.plot(vmin=0, vmax=maxThick, ax=ax)
        ax.set_title("Drift Thickness")
    try:
        noDataVal = driftThick.attrs['_FillValue'] #Extract from dataset itself
    except:
        noDataVal = 100001
    
    driftThick = driftThick.where(driftThick <100000, other=np.nan)  #Replace no data values with NaNs
    driftThick = driftThick.where(driftThick >-100000, other=np.nan)  #Replace no data values with NaNs

    layerThick = driftThick/layers
    
    xr.set_options(keep_attrs='default')

    return driftThick, layerThick

def grid2study_area(study_area, grid, output_crs='EPSG:4269', verbose=False, log=False)

Clips grid to study area.

Parameters

study_area : geopandas.GeoDataFrame: inputs study area polygon
grid : xarray.DataArray: inputs grid array
output_crs : str, default='EPSG:4269': inputs the coordinate reference system for the study area
log : bool, default = False: Whether to log results to log file, by default False

Returns

grid : xarray.DataArray: returns xarray containing grid clipped only to area within study area

Expand source code

def grid2study_area(study_area, grid, output_crs='EPSG:4269',verbose=False, log=False):
    """Clips grid to study area.

    Parameters
    ----------
    study_area : geopandas.GeoDataFrame
        inputs study area polygon
    grid : xarray.DataArray
        inputs grid array
    output_crs : str, default='EPSG:4269'
        inputs the coordinate reference system for the study area
    log : bool, default = False
        Whether to log results to log file, by default False

    Returns
    -------
    grid : xarray.DataArray
        returns xarray containing grid clipped only to area within study area

    """
    logger_function(log, locals(), inspect.currentframe().f_code.co_name)
    if verbose:
        verbose_print(grid2study_area, locals(), exclude_params=['study_area', 'grid'])
    
    if output_crs=='':
        output_crs=study_area.crs

    #Get input CRS's
    grid_crs = pyproj.CRS.from_wkt(grid.rio.crs.to_wkt())
    study_area_crs = study_area.crs
    
    # Reproject if needed
    if output_crs != grid_crs:
        grid = grid.rio.reproject(output_crs)
    grid_crs = pyproj.CRS.from_wkt(grid.rio.crs.to_wkt())

    if grid_crs != study_area_crs:
        study_area = study_area.to_crs(grid_crs)   

    # We'll just clip to outer bounds of study area
    saExtent = study_area.total_bounds

    if grid['y'][-1].values - grid['y'][0].values > 0:
        miny=saExtent[1]
        maxy=saExtent[3]
    else:
        miny=saExtent[3]
        maxy=saExtent[1]        
        
    if grid['x'][-1].values - grid['x'][0].values > 0:
        minx=saExtent[0]
        maxx=saExtent[2]
    else:
        minx=saExtent[2]
        maxx=saExtent[0]
    
    # "Clip" it
    grid = grid.sel(x=slice(minx, maxx), y=slice(miny, maxy))
    if 'band' in grid.dims:
        grid = grid.sel(band=1)

    return grid

def read_grid(grid_path=None, grid_type='model', no_data_val_grid=0, use_service=False, study_area=None, grid_crs=None, output_crs='EPSG:4269', verbose=False, log=False, **kwargs)

Reads in grid

Parameters

grid_path : str or pathlib.Path, default=None: Path to a grid file
grid_type : str, default='model': Sets what type of grid to load in
no_data_val_grid : int, default=0: Sets the no data value of the grid
use_service : str, default=False: Sets which service the function uses
study_area : geopandas.GeoDataFrame, default=None: Dataframe containing study area polygon
grid_crs : str, default=None: Sets crs to use if clipping to study area
log : bool, default = False: Whether to log results to log file, by default False

Returns

gridIN : xarray.DataArray: Returns grid

Expand source code

def read_grid(grid_path=None, grid_type='model', no_data_val_grid=0, use_service=False, study_area=None,  grid_crs=None, output_crs='EPSG:4269', verbose=False, log=False, **kwargs):
    """Reads in grid

    Parameters
    ----------
    grid_path : str or pathlib.Path, default=None
        Path to a grid file
    grid_type : str, default='model'
        Sets what type of grid to load in
    no_data_val_grid : int, default=0
        Sets the no data value of the grid
    use_service : str, default=False
        Sets which service the function uses
    study_area : geopandas.GeoDataFrame, default=None
        Dataframe containing study area polygon
    grid_crs : str, default=None
        Sets crs to use if clipping to study area
    log : bool, default = False
        Whether to log results to log file, by default False

    Returns
    -------
    gridIN : xarray.DataArray
        Returns grid
    
    """
    logger_function(log, locals(), inspect.currentframe().f_code.co_name)
    if verbose:
        verbose_print(read_grid, locals(), exclude_params=['study_area'])
        
    if grid_type=='model':
        if 'read_grid' in list(kwargs.keys()):
            rgrid = kwargs['read_grid']
        else:
            rgrid=True
        gridIN = read_model_grid(model_grid_path=grid_path, study_area=study_area,  no_data_val_grid=0, read_grid=rgrid, grid_crs=grid_crs, output_crs=output_crs, verbose=verbose)
    else:
        if use_service==False:
            gridIN = rxr.open_rasterio(grid_path)
        elif use_service.lower()=='wcs':
            gridIN = read_wcs(study_area, wcs_url=lidarURL, **kwargs)
        elif use_service.lower()=='wms':
            gridIN = read_wms(study_area, wcs_url=lidarURL, **kwargs)
        elif use_service:
            #Deafults to wms
            gridIN = read_wms(study_area, wcs_url=lidarURL, **kwargs)
        
        if study_area is not None:
            study_area_crs = study_area.crs
            if grid_crs is None:
                try:
                    grid_crs = pyproj.CRS.from_wkt(gridIN.rio.crs.to_wkt())
                except Exception:
                    iswsCRS = pyproj.CRS.from_json_dict(w4h.read_dict(w4h.get_resources()['ISWS_CRS']))
                    gridIN.rio.write_crs(iswsCRS)
            elif grid_crs.lower()=='isws':
                iswsCRS = pyproj.CRS.from_json_dict(w4h.read_dict(w4h.get_resources()['ISWS_CRS']))
                gridIN.rio.write_crs(iswsCRS)
                        
            if study_area_crs is None:
                study_area_crs=study_area.crs
            study_area = study_area.to_crs(output_crs)
            study_area_crs=study_area.crs
            
            gridIN = gridIN.rio.reproject(output_crs)
            gridIN = grid2study_area(study_area=study_area, grid=gridIN, output_crs=output_crs,)
        else:
            gridIN = gridIN.rio.reproject(output_crs)
            if 'band' in gridIN.dims:
                gridIN = gridIN.sel(band=1)

        try:
            no_data_val_grid = gridIN.attrs['_FillValue'] #Extract from dataset itself
        except:
            pass
                
        gridIN = gridIN.where(gridIN != no_data_val_grid, other=np.nan)  #Replace no data values with NaNs

    return gridIN

def read_model_grid(model_grid_path, study_area=None, no_data_val_grid=0, read_grid=True, node_byspace=True, grid_crs=None, output_crs='EPSG:4269', verbose=False, log=False)

Reads in model grid to xarray data array

Parameters

grid_path : str: Path to model grid file
study_area : geopandas.GeoDataFrame, default=None: Dataframe containing study area polygon
no_data_val_grid : int, default=0: value assigned to areas with no data
readGrid : bool, default=True: Whether function to either read grid or create grid
node_byspace : bool, default=False: Denotes how to create grid
output_crs : str, default='EPSG:4269': Inputs study area crs
grid_crs : str, default=None: Inputs grid crs
log : bool, default = False: Whether to log results to log file, by default False

Returns

modelGrid : xarray.DataArray: Data array containing model grid

Expand source code

def read_model_grid(model_grid_path, study_area=None, no_data_val_grid=0, read_grid=True, node_byspace=True, grid_crs=None, output_crs='EPSG:4269', verbose=False, log=False):
    """Reads in model grid to xarray data array

    Parameters
    ----------
    grid_path : str
        Path to model grid file
    study_area : geopandas.GeoDataFrame, default=None
        Dataframe containing study area polygon
    no_data_val_grid : int, default=0
        value assigned to areas with no data
    readGrid : bool, default=True
        Whether function to either read grid or create grid
    node_byspace : bool, default=False
        Denotes how to create grid
    output_crs : str, default='EPSG:4269'
        Inputs study area crs
    grid_crs : str, default=None
        Inputs grid crs
    log : bool, default = False
        Whether to log results to log file, by default False

    Returns
    -------
    modelGrid : xarray.DataArray
        Data array containing model grid
    """
    logger_function(log, locals(), inspect.currentframe().f_code.co_name)
    if verbose:
        verbose_print(read_model_grid, locals(), exclude_params='study_area')
    
    if read_grid and model_grid_path is not None:
        modelGridIN = rxr.open_rasterio(model_grid_path)

        if grid_crs is None:
            try:
                grid_crs=modelGridIN.spatial_ref.crs_wkt
            except Exception:
                iswsCRSPath = w4h.get_resources()['ISWS_CRS']
                iswsCRS = w4h.read_dict(iswsCRSPath, keytype=None)
                grid_crs = iswsCRS              
                modelGridIN.rio.write_crs(grid_crs)
        elif isinstance(grid_crs, str) and grid_crs.lower()=='isws':
            iswsCRSPath = w4h.get_resources()['ISWS_CRS']
            iswsCRS = w4h.read_dict(iswsCRSPath, keytype=None)            
            grid_crs = iswsCRS              
            modelGridIN.rio.write_crs(grid_crs)
        elif isinstance(grid_crs, pathlib.PurePath) or (isinstance(grid_crs, str) and pathlib.Path(grid_crs).exists()):
            iswsCRSPath = w4h.get_resources()['ISWS_CRS']
            grid_crs = w4h.read_dict(iswsCRSPath, keytype=None)            
            modelGridIN.rio.write_crs(grid_crs)
        else:
            warnings.warn(f'CRS Specification for grid is {grid_crs}, but this cannot be written to the grid')
        
        modelGridIN = modelGridIN.rio.reproject(output_crs) 
           
        if study_area is not None:                
            study_area = study_area.to_crs(output_crs)
            modelGrid = grid2study_area(study_area=study_area, grid=modelGridIN, output_crs=output_crs)
        else:
            modelGrid = modelGridIN

        try:
            noDataVal = float(modelGrid.attrs['_FillValue']) #Extract from dataset itsel
        except:
            noDataVal = no_data_val_grid

        modelGrid = modelGrid.where(modelGrid != noDataVal, other=np.nan)   #Replace no data values with NaNs
        modelGrid = modelGrid.where(modelGrid == np.nan, other=1) #Replace all other values with 1
        modelGrid.rio.reproject(grid_crs, inplace=True)     
    elif model_grid_path is None and study_area is None:
        if verbose:
            print("ERROR: Either model_grid_path or study_area must be defined.")
    else:
        spatRefDict = w4h.read_dict(iswsCRSPath, keytype=None)            

        saExtent = study_area.total_bounds

        startX = saExtent[0] #Starting X Coordinate
        startY = saExtent[1] #starting Y Coordinate
        
        endX = saExtent[2]
        endY = saExtent[3]
        
        if node_byspace:
            xSpacing = 625 #X Node spacing 
            ySpacing = xSpacing #Y Node spacing  
            
            x = np.arange(startX, endX, xSpacing)
            y = np.arange(startY, endY, ySpacing)
        else:
            xNodes = 100 #Number of X Nodes
            yNodes = 100 #Number of Y Nodes

            x = np.linspace(startX, endX, num=xNodes)
            y = np.linspace(startY, endY, num=yNodes)        
        
        xx, yy = np.meshgrid(x, y)
        zz = np.ones_like(xx).transpose()

        yIn = np.flipud(y)

        coords = {'x':x,'y':yIn, 'spatial_ref':0}
        dims = {'x':x,'y':yIn}
        
        modelGrid = xr.DataArray(data=zz,coords=coords,attrs={'_FillValue':no_data_val_grid}, dims=dims)
        modelGrid.spatial_ref.attrs['spatial_ref'] = {}
        if grid_crs is None or grid_crs=='isws' or grid_crs=='ISWS':
            for k in spatRefDict:
                modelGrid.spatial_ref.attrs[k] = spatRefDict[k]
    return modelGrid

def read_study_area(study_area=None, output_crs='EPSG:4269', buffer=None, return_original=False, log=False, verbose=False, **read_file_kwargs)

Read study area geospatial file into geopandas

Parameters

study_area : str, pathlib.Path, geopandas.GeoDataFrame, or shapely.Geometry: Filepath to any geospatial file readable by geopandas. Polygon is best, but may work with other types if extent is correct.
study_area_crs : str, tuple, dict, optional: CRS designation readable by geopandas/pyproj
buffer : None or numeric, default=None: If None, no buffer created. If a numeric value is given (float or int, for example), a buffer will be created at that distance in the unit of the study_area_crs.
return_original : bool, default=False: Whether to return the (reprojected) study area as well as the (reprojected) buffered study area. Study area is only used for clipping data, so usually return_original=False is sufficient.
log : bool, default = False: Whether to log results to log file, by default False
verbose : bool, default=False: Whether to print status and results to terminal

Returns

studyAreaIN : geopandas dataframe: Geopandas dataframe with polygon geometry.

Expand source code

def read_study_area(study_area=None, output_crs='EPSG:4269', buffer=None, return_original=False, log=False, verbose=False, **read_file_kwargs):
    """Read study area geospatial file into geopandas

    Parameters
    ----------
    study_area : str, pathlib.Path, geopandas.GeoDataFrame, or shapely.Geometry
        Filepath to any geospatial file readable by geopandas. 
        Polygon is best, but may work with other types if extent is correct.
    study_area_crs : str, tuple, dict, optional
        CRS designation readable by geopandas/pyproj
    buffer : None or numeric, default=None
        If None, no buffer created. If a numeric value is given (float or int, for example), a buffer will be created at that distance in the unit of the study_area_crs.
    return_original : bool, default=False
        Whether to return the (reprojected) study area as well as the (reprojected) buffered study area. Study area is only used for clipping data, so usually return_original=False is sufficient.
    log : bool, default = False
        Whether to log results to log file, by default False
    verbose : bool, default=False
        Whether to print status and results to terminal

    Returns
    -------
    studyAreaIN : geopandas dataframe
        Geopandas dataframe with polygon geometry.
    """
    
    if study_area is None:
        study_area = w4h.get_resources()['study_area']

    logger_function(log, locals(), inspect.currentframe().f_code.co_name)
    if verbose:
        verbose_print(read_study_area, locals())
    
    if isinstance(study_area, (gpd.GeoDataFrame, gpd.GeoSeries)):
        studyAreaIN = study_area
    elif isinstance(study_area, shapely.Geometry):
        if 'crs' in read_file_kwargs:
            crs = read_file_kwargs['crs']
        else:
            warnings.warn('A shapely Geometry object was read in as the study area, but no crs specified. Using CRS specified in output_crs: {output_crs}.\
                          You can specify the crs as a keyword argument in the read_study_area() function')
            crs = output_crs
        studyAreaIN = gpd.GeoDataFrame(index=[0], crs=crs, geometry=[study_area])
    else:
        studyAreaIN = gpd.read_file(study_area, **read_file_kwargs)
    studyAreaIN.to_crs(output_crs, inplace=True)

    # Create a buffered study area for clipping
    if buffer is not None:
        if return_original:
            studyAreaNoBuffer = studyAreaIN
        studyAreaIN = studyAreaIN.buffer(distance=buffer)
        
        if verbose:
            print('\tBuffer applied.')
    
    if verbose:
        print("\tStudy area read.")

    if return_original:
        return studyAreaIN, studyAreaNoBuffer
    return studyAreaIN

def read_wcs(study_area, wcs_url='https://data.isgs.illinois.edu/arcgis/services/Elevation/IL_Statewide_Lidar_DEM_WGS/ImageServer/WCSServer?request=GetCapabilities&service=WCS', res_x=30, res_y=30, verbose=False, log=False, **kwargs)

Reads a WebCoverageService from a url and returns a rioxarray dataset containing it.

Parameters

study_area : geopandas.GeoDataFrame: Dataframe containing study area polygon
wcs_url : str, default=lidarURL
Represents the url for the WCS
res_x : int, default=30: Sets resolution for x axis
res_y : int, default=30: Sets resolution for y axis
log : bool, default = False: Whether to log results to log file, by default False
**kwargs

Returns

wcsData_rxr : xarray.DataArray: A xarray dataarray holding the image from the WebCoverageService

Expand source code

def read_wcs(study_area, wcs_url=lidarURL, res_x=30, res_y=30, verbose=False, log=False, **kwargs):
    """Reads a WebCoverageService from a url and returns a rioxarray dataset containing it.

    Parameters
    ----------
    study_area : geopandas.GeoDataFrame
        Dataframe containing study area polygon
    wcs_url : str, default=lidarURL
    Represents the url for the WCS
    res_x : int, default=30
        Sets resolution for x axis
    res_y : int, default=30
        Sets resolution for y axis
    log : bool, default = False
        Whether to log results to log file, by default False
    **kwargs

    Returns
    -------
    wcsData_rxr : xarray.DataArray
        A xarray dataarray holding the image from the WebCoverageService
    """
    #Drawn largely from: https://git.wur.nl/isric/soilgrids/soilgrids.notebooks/-/blob/master/01-WCS-basics.ipynb
    
    #30m DEM
    #wcs_url = r'https://data.isgs.illinois.edu/arcgis/services/Elevation/IL_DEM_30M/ImageServer/WCSServer?request=GetCapabilities&service=WCS'
    #lidar url:
    #lidarURL = r'https://data.isgs.illinois.edu/arcgis/services/Elevation/IL_Statewide_Lidar_DEM_WGS/ImageServer/WCSServer?request=GetCapabilities&service=WCS'

    #studyAreaPath = r"\\isgs-sinkhole.ad.uillinois.edu\geophysics\Balikian\ISWS_HydroGeo\WellDataAutoClassification\SampleData\ESL_StudyArea_5mi.shp"
    #study_area = gpd.read_file(studyAreaPath)
    if study_area is None:
        print('ERROR: study_area must be specified to use read_wcs (currently set to {})'.format(study_area))
        return

    logger_function(log, locals(), inspect.currentframe().f_code.co_name)

    if verbose:
        verbose_print(read_wcs, locals(), exclude_params=['study_area'])      

    if 'wcs_url' in kwargs:
        wcs_url = kwargs['wcs_url']
    if 'res_x' in kwargs:
        res_x = kwargs['res_x']
    if 'res_y' in kwargs:
        res_y = kwargs['res_y']
    
    width_in = ''
    height_in= ''

    #Create coverage object
    my_wcs = WebCoverageService(wcs_url, version='1.0.0') 
    #names = [k for k in my_wcs.contents.keys()]
    #print(names)
    dataID = 'IL_Statewide_Lidar_DEM'
    data = my_wcs.contents[dataID]
    dBBox = data.boundingboxes #Is this an error?
    
    study_area = study_area.to_crs(data.boundingboxes[0]['nativeSrs'])
    saBBox = study_area.total_bounds
    
    #In case study area bounding box goes outside data bounding box, use data bounding box values
    newBBox = []
    for i,c in enumerate(dBBox[0]['bbox']):
        if i == 0 or i==2:
            if saBBox[i] < c:
                newBBox.append(saBBox[i])
            else:
                newBBox.append(c)
        else:
            if saBBox[i] > c:
                newBBox.append(saBBox[i])
            else:
                newBBox.append(c)

    #Recalculate resolution if it is too fine to read in
    #Start by getting the area of the study area bounding box
    saWidth = saBBox[2]-saBBox[0]
    saHeight = saBBox[3]-saBBox[1]
    saBBoxAreaM = saWidth*saHeight
    saBBoxAreaKM = saBBoxAreaM/(1000*1000) #Area in km^2

    if saBBoxAreaM/(res_x*res_y) > (4100*15000)*0.457194: #What I think might be the max download size?
        print("Resolution inputs overriden, file request too large.")
        res_x=str(round(saWidth/2500, 2))

        width_in  = str(int(saWidth/float(res_x )))
        height_in = str(int(saHeight/float(res_x)))
        
        res_y=str(round(saHeight/height_in, 2))

        print('New resolution is: '+res_x+'m_x X '+res_y+'m_y' )
        print('Dataset size: '+width_in+' pixels_x X '+height_in+' pixels_y')

    bBox = tuple(newBBox)
    bBox_str = str(tuple(newBBox)[1:-1]).replace(' ','')
    dataCRS = 'EPSG:3857'

    #Format WCS request using owslib
    response = my_wcs.getCoverage(
        identifier=my_wcs[dataID].id, 
        crs=dataCRS,#'urn:ogc:def:crs:EPSG::26716',
        bbox=bBox,
        resx=res_x, 
        resy=res_y,
        timeout=60,
        #width = width_in, height=height_in,
        format='GeoTIFF')
    response

    #If I can figure out url, this might be better?
    #baseURL = r'https://data.isgs.illinois.edu/arcgis/services/Elevation/'+dataID+'/ImageServer/WCSServer'
    #addonRequestURL = '?request=GetCoverage&service=WCS&bbox='+bBox_str+'&srs='+dataCRS+'&format=GeoTIFF'+'&WIDTH='+width_in+'&HEIGHT='+height_in+')'
    #reqURL = baseURL+addonRequestURL
    #wcsData_rxr =  rxr.open_rasterio(reqURL)

    with MemoryFile(response) as memfile:
        with memfile.open() as dataset:
            wcsData_rxr =  rxr.open_rasterio(dataset)

    return wcsData_rxr

def read_wms(study_area, layer_name='IL_Statewide_Lidar_DEM_WGS:None', wms_url='https://data.isgs.illinois.edu/arcgis/services/Elevation/IL_Statewide_Lidar_DEM_WGS/ImageServer/WCSServer?request=GetCapabilities&service=WCS', srs='EPSG:3857', clip_to_studyarea=True, bbox=[-9889002.6155, 5134541.069716, -9737541.607038, 5239029.6274], res_x=30, res_y=30, size_x=512, size_y=512, format='image/tiff', verbose=False, log=False, **kwargs)

Reads a WebMapService from a url and returns a rioxarray dataset containing it.

Parameters

study_area : geopandas.GeoDataFrame: Dataframe containg study area polygon
layer_name : str, default='IL_Statewide_Lidar_DEM_WGS:None': Represents the layer name in the WMS
wms_url : str, default=lidarURL: Represents the url for the WMS
srs : str, default='EPSG:3857': Sets the srs
clip_to_studyarea : bool, default=True: Whether to clip to study area or not
res_x : int, default=30: Sets resolution for x axis
res_y : int, default=512: Sets resolution for y axis
size_x : int, default=512: Sets width of result
size_y : int, default=512: Sets height of result
log : bool, default = False: Whether to log results to log file, by default False

Returns

wmsData_rxr : xarray.DataArray: Holds the image from the WebMapService

Expand source code

def read_wms(study_area, layer_name='IL_Statewide_Lidar_DEM_WGS:None', wms_url=lidarURL, srs='EPSG:3857', 
             clip_to_studyarea=True, bbox=[-9889002.615500,5134541.069716,-9737541.607038,5239029.627400],
             res_x=30, res_y=30, size_x=512, size_y=512, 
             format='image/tiff', verbose=False, log=False, **kwargs):
    """
    Reads a WebMapService from a url and returns a rioxarray dataset containing it.

    Parameters
    ----------
    study_area : geopandas.GeoDataFrame
        Dataframe containg study area polygon
    layer_name : str, default='IL_Statewide_Lidar_DEM_WGS:None'
        Represents the layer name in the WMS
    wms_url : str, default=lidarURL
        Represents the url for the WMS
    srs : str, default='EPSG:3857'
        Sets the srs
    clip_to_studyarea : bool, default=True
        Whether to clip to study area or not
    res_x : int, default=30
        Sets resolution for x axis
    res_y : int, default=512
        Sets resolution for y axis
    size_x : int, default=512
        Sets width of result
    size_y : int, default=512
        Sets height of result
    log : bool, default = False
        Whether to log results to log file, by default False

    Returns
    -------
    wmsData_rxr : xarray.DataArray
        Holds the image from the WebMapService
    """
    if study_area is None:
        print('ERROR: study_area must be specified to use read_wms (currently set to {})'.format(study_area))
        return
    
    logger_function(log, locals(), inspect.currentframe().f_code.co_name)
    if verbose:
        verbose_print(read_wms, locals(), exclude_params=['study_area'])
    from owslib.wms import WebMapService
    # Define WMS endpoint URL
    if 'wms_url' in kwargs:
        wms_url = kwargs['wms_url']
    else:
        wms_url = wms_url

    # Create WMS connection object
    wms = WebMapService(wms_url)
    # Print available layers
    #print(wms.contents)

    # Select desired layer
    if 'layer_name' in kwargs:
        layer = kwargs['layer_name']
    else:
        layer = layer_name
    
    data = wms.contents#[layer]
    if 'srs' in kwargs:
        studyArea_proj = study_area.to_crs(kwargs['srs'])
        saBBox = studyArea_proj.total_bounds
    else:
        studyArea_proj = study_area.to_crs(srs)
    
    saBBox = studyArea_proj.total_bounds

    if layer == 'IL_Statewide_Lidar_DEM_WGS:None':
        dBBox = data['0'].boundingBox #Is this an error?

        gpdDict = {'Label': ['Surf Data Box'], 'geometry': [shapely.geometry.Polygon(((dBBox[0], dBBox[1]), (dBBox[0], dBBox[3]), (dBBox[2], dBBox[3]), (dBBox[2], dBBox[1]), (dBBox[0], dBBox[1])))]}
        dBBoxGDF = gpd.GeoDataFrame(gpdDict, crs=dBBox[4])
        dBBoxGDF.to_crs(srs)

        #In case study area bounding box goes outside data bounding box, use data bounding box values
        newBBox = []
        for i,c in enumerate(dBBox):
            if type(c) is str:
                pass
            elif i == 0 or i==2:
                if saBBox[i] < c:
                    newBBox.append(saBBox[i])
                else:
                    newBBox.append(c)
            else:
                if saBBox[i] > c:
                    newBBox.append(saBBox[i])
                else:
                    newBBox.append(c)

    saWidth = saBBox[2]-saBBox[0]
    saHeight = saBBox[3]-saBBox[1]    
    # Check kwargs for rest of parameters
    if 'size_x' in kwargs:
        size_x = kwargs['size_x']
    if 'size_y' in kwargs:
        size_y = kwargs['size_y']
    if 'format' in kwargs:
        format = kwargs['format']
    if 'clip_to_studyarea' in kwargs:
        clip_to_studyarea = kwargs['clip_to_studyarea']
   
    #get the wms
    if clip_to_studyarea:
        img = wms.getmap(layers=[layer], srs=srs, bbox=saBBox, size=(size_x, size_y), format=format, transparent=True, timeout=60)        
    else:
        img = wms.getmap(layers=[layer], srs=srs, bbox=bbox, size=(size_x, size_y), format=format, transparent=True, timeout=60)

    #Save wms in memory to a raster dataset
    with MemoryFile(img) as memfile:
        with memfile.open() as dataset:
            wmsData_rxr = rxr.open_rasterio(dataset)

    #if clip_to_studyarea:
    #    wmsData_rxr = wmsData_rxr.sel(x=slice(saBBox[0], saBBox[2]), y=slice(saBBox[3], saBBox[1]))#.sel(band=1)

    return wmsData_rxr

def sample_raster_points(raster=None, points_df=None, well_id_col='API_NUMBER', xcol='LONGITUDE', ycol='LATITUDE', new_col='SAMPLED', verbose=True, log=False)

Sample raster values to points from geopandas geodataframe.

Parameters

raster : rioxarray data array: Raster containing values to be sampled.
points_df : geopandas.geodataframe: Geopandas dataframe with geometry column containing point values to sample.
well_id_col : str, default="API_NUMBER": Column that uniquely identifies each well so multiple sampling points are not taken per well
xcol : str, default='LONGITUDE': Column containing name for x-column, by default 'LONGITUDE.' This is used to output (potentially) reprojected point coordinates so as not to overwrite the original.
ycol : str, default='LATITUDE': Column containing name for y-column, by default 'LATITUDE.' This is used to output (potentially) reprojected point coordinates so as not to overwrite the original. new_col : str, optional
new_col : str, default='SAMPLED': Name for name of new column containing points sampled from the raster, by default 'SAMPLED'.
verbose : bool, default=True: Whether to send to print() information about progress of function, by default True.
log : bool, default = False: Whether to log results to log file, by default False

Returns

points_df : geopandas.geodataframe: Same as points_df, but with sampled values and potentially with reprojected coordinates.

Expand source code

def sample_raster_points(raster=None, points_df=None, well_id_col='API_NUMBER', xcol='LONGITUDE', ycol='LATITUDE', new_col='SAMPLED', verbose=True, log=False):  
    """Sample raster values to points from geopandas geodataframe.

    Parameters
    ----------
    raster : rioxarray data array
        Raster containing values to be sampled.
    points_df : geopandas.geodataframe
        Geopandas dataframe with geometry column containing point values to sample.
    well_id_col : str, default="API_NUMBER"
        Column that uniquely identifies each well so multiple sampling points are not taken per well
    xcol : str, default='LONGITUDE'
        Column containing name for x-column, by default 'LONGITUDE.'
        This is used to output (potentially) reprojected point coordinates so as not to overwrite the original.
    ycol : str, default='LATITUDE'
        Column containing name for y-column, by default 'LATITUDE.'
        This is used to output (potentially) reprojected point coordinates so as not to overwrite the original.    new_col : str, optional
    new_col : str, default='SAMPLED'
        Name for name of new column containing points sampled from the raster, by default 'SAMPLED'.
    verbose : bool, default=True
        Whether to send to print() information about progress of function, by default True.
    log : bool, default = False
        Whether to log results to log file, by default False

    Returns
    -------
    points_df : geopandas.geodataframe
        Same as points_df, but with sampled values and potentially with reprojected coordinates.
    """
    logger_function(log, locals(), inspect.currentframe().f_code.co_name)

    if raster is None:
        raster = w4h.get_resources()['surf_elev']
    if points_df is None:
        points_df = w4h.get_resources()['well_data']

    if verbose:
        verbose_print(sample_raster_points, locals(), exclude_params=['raster', 'points_df'])
        print(f"\tSampling {new_col} grid for at all well locations.")

    #Project points to raster CRS
    rastercrsWKT=raster.spatial_ref.crs_wkt
    if rastercrsWKT != points_df.crs:
        if verbose:
            print("\tTemporarily reprojecting raster data to point data's CRS.")
        pointCRS = points_df.crs.to_epsg()
        if pointCRS is None:
            pointCRS = points_df.crs.to_wkt()

        raster = raster.rio.reproject(pointCRS)
        #points_df = points_df.to_crs(rastercrsWKT)

    xCOLOUT = xcol+'_PROJ'
    yCOLOUT = ycol+'_PROJ'
    points_df[xCOLOUT] = points_df['geometry'].x
    points_df[yCOLOUT] = points_df['geometry'].y
    xData = np.array(points_df[xCOLOUT].values)
    yData = np.array(points_df[yCOLOUT].values)
    zData = []
    zID = []
    zInd = []

    # Get unique well values to reduce sampling time
    uniqueWells = points_df.drop_duplicates(subset=[well_id_col])
    if verbose:
        print(f"\t{uniqueWells.shape[0]} unique wells idenfied using {well_id_col} column")
    
    # Loop over DataFrame rows
    for i, row in uniqueWells.iterrows():
        # Select data from DataArray at current coordinates and append to list
        zInd.append(i)
        zData.append([row[well_id_col], raster.sel(x=row[xCOLOUT], y=row[yCOLOUT], method='nearest').item()])
    
    inputtype = points_df.dtypes[well_id_col]
    wellZDF = pd.DataFrame(zData, columns=[well_id_col, new_col], index=pd.Index(zInd))

    # Merge each unique well's data with all well intervals
    wellZDF[well_id_col].astype(inputtype, copy=False)
    points_df = points_df.merge(wellZDF, how='left', on=well_id_col)
    #points_df[new_col] = zData#sampleDF[new_col]
    return points_df

def xyz_metadata_merge(xyz, metadata, verbose=False, log=False)

Add elevation to header data file.

Parameters

xyz : pandas.Dataframe: Contains elevation for the points
metadata : pandas dataframe: Header data file
log : bool, default = False: Whether to log results to log file, by default False

Returns

headerXYZData : pandas.Dataframe: Header dataset merged to get elevation values

Expand source code

def xyz_metadata_merge(xyz, metadata, verbose=False, log=False):
    """Add elevation to header data file.

    Parameters
    ----------
    xyz : pandas.Dataframe
        Contains elevation for the points
    metadata : pandas dataframe
        Header data file
    log : bool, default = False
        Whether to log results to log file, by default False

    Returns
    -------
    headerXYZData : pandas.Dataframe
        Header dataset merged to get elevation values

    """
    logger_function(log, locals(), inspect.currentframe().f_code.co_name)
    if verbose:
        verbose_print(xyz_metadata_merge, locals(), exclude_params=['xyz'])
    headerXYZData = metadata.merge(xyz, how='left', on='API_NUMBER')
    headerXYZData.drop(['LATITUDE_x', 'LONGITUDE_x'], axis=1, inplace=True)
    headerXYZData.rename({'LATITUDE_y':'LATITUDE', 'LONGITUDE_y':'LONGITUDE'}, axis=1, inplace=True)
    return headerXYZData