Module w4h.export

The Export module contains functions for exporting processed data.

Expand source code
"""The Export module contains functions for exporting processed data.
"""

import datetime
import inspect
import pathlib

import pandas as pd
import xarray as xr

from w4h import logger_function, verbose_print

#Export data
def export_dataframe(df, out_dir, filename, date_stamp=True, log=False):
    """Function to export dataframes

    Parameters
    ----------
    df : pandas dataframe, or list of pandas dataframes
        Data frame or list of dataframes to be exported
    out_dir : string or pathlib.Path object
        Directory to which to export dataframe object(s) as .csv
    filename : str or list of strings
        Filename(s) of output files
    date_stamp : bool, default=True
        Whether to include a datestamp in the filename. If true, file ends with _yyyy-mm-dd.csv of current date, by default True.
    log : bool, default = True
        Whether to log inputs and outputs to log file.        
    """
    logger_function(log, locals(), inspect.currentframe().f_code.co_name)

    if date_stamp:
        nowTime = datetime.datetime.now()
        nowTime = str(nowTime).replace(':', '-').replace(' ','_').split('.')[0]
        nowTimeStr = '_'+str(nowTime)
    else:
        nowTimeStr=''

    if type(out_dir) is str or isinstance(out_dir, pathlib.PurePath):
        out_dir = str(out_dir)
        out_dir = out_dir.replace('\\', '/').replace('\\'[-1], '/')
        if out_dir[-1] != '/':
            out_dir = out_dir + '/'
    else:
        print('Please input string or pathlib object for out_dir parameters')
        return

    if type(filename) is str:
        dfOutFile =  out_dir+filename+nowTimeStr+'.csv'
        df.to_csv(dfOutFile, index_label='ID')
        print('Exported '+filename+nowTimeStr+'.csv')
    elif type(filename) is list and type(df) is list and len(df) == len(filename):
        for i, f in enumerate(df):
            fname = filename[i]
            dfOutFile =  out_dir+fname+nowTimeStr+'.csv'
            f.to_csv(dfOutFile, index_label='ID')
            print('Exported '+fname+nowTimeStr+'.csv')

#Export (rio)xarray dataarrays and datasets
def export_grids(grid_data, out_path, file_id='',filetype='tif', variable_sep=True, date_stamp=True, verbose=False, log=False):
    """Function to export grids to files.

    Parameters
    ----------
    grid_data : xarray DataArray or xarray Dataset
        Dataset or dataarray to be exported
    out_path : str or pathlib.Path object
        Output location for data export. If variable_sep=True, this should be a directory. Otherwise, this should also include the filename. The file extension should not be included here.
    file_id : str, optional
        If specified, will add this after 'LayerXX' or 'AllLayers' in the filename, just before datestamp, if used. Example filename for file_id='Coarse': Layer1_Coarse_2023-04-18.tif.
    filetype : str, optional
        Output filetype. Can either be pickle or any file extension supported by rioxarray.rio.to_raster(). Can either include period or not., by default 'tif'
    variable_sep : bool, optional
        If grid_data is an xarray Dataset, this will export each variable in the dataset as a separate file, including the variable name in the filename, by default False
    date_stamp : bool, optional
        Whether to include a date stamp in the file name., by default True
    log : bool, default = True
        Whether to log inputs and outputs to log file.        
    """
    logger_function(log, locals(), inspect.currentframe().f_code.co_name)
    if verbose:
        verbose_print(export_grids, locals(), exclude_params=['grid_data'])
    #Initialize lists to determine which filetype will be used for export
    ncdfList = ['netcdf', 'ncdf', 'n']
    tifList = ['tif', 'tiff', 'geotiff', 'geotif', 't']
    pickleList = ['pickle', 'pkl', 'p']

    filenames = []

    #Format output string(s)
    #Format output filepath
    if isinstance(out_path, (pathlib.PurePath, str)):
        if isinstance(out_path, pathlib.PurePath):
            pass
        else:
            out_path = pathlib.Path(out_path)
            
        if out_path.parent.exists() == False:
            print('Directory does not exist. Please enter a different value for the out_path parameter.')
            return        

        if out_path.is_dir():
            if isinstance(grid_data, xr.DataArray):
                if variable_sep:
                    lyrs = grid_data.coords['Layer'].values
                    filenames = []
                    for l in lyrs:
                        filenames.append('Layer'+str(l))
                else:
                    filenames = ['AllLayers']
            if isinstance(grid_data, xr.Dataset):
                if variable_sep:
                    filenames = []
                    for var in grid_data:
                        filenames.append(var)
                else:
                    filenames = ['AllLayers']    
        else:
            filenames = [out_path.stem]
            out_path = out_path.parent
    else:
        print('No output path specified (out_path). Please input string or pathlib object for out_path parameters')
        return
    
    #Format datestamp, if desired in output filename
    if date_stamp:
        nowTime = datetime.datetime.now()
        nowTime = str(nowTime).replace(':', '-').replace(' ','_').split('.')[0]
        nowTimeStr = '_'+str(nowTime)
    else:
        nowTimeStr=''

    #Ensure the file suffix includes .
    if filetype[0] == '.':
        pass
    else:
        filetype = '.' + filetype

    if file_id != '':
        file_id = '_'+file_id

    out_path = out_path.as_posix()+'/'
    
    if verbose:
        print('Export filepath(s):')
        
    outPaths = []
    for f in filenames:
        currOutPath = out_path+f+file_id+nowTimeStr+filetype
        outPaths.append(currOutPath)
        if verbose:
            print('\t {}'.format(currOutPath))
        
    #Do export
    if filetype.lower() in pickleList:
        import pickle
        for op in outPaths:
            try:
                with open(op, 'wb') as f:
                    pickle.dump(grid_data, f)
            except:
                print('An error occured during export.')
                print(op, 'could not be exported as a pickle object.')
                print('Try again using different parameters.')
    else:
        import rioxarray as rxr
        try:
            if isinstance(grid_data, xr.Dataset):
                if variable_sep:
                    for i, var in enumerate(grid_data.data_vars):
                        grid_data[var].rio.to_raster(outPaths[i])
                else:
                    grid_data.rio.to_raster(outPaths[0])
            elif isinstance(grid_data, xr.DataArray):
                if variable_sep:
                    lyrs = grid_data.coords['Layer'].values
                    for i, l in enumerate(lyrs):
                        out_grid = grid_data.sel(Layer = l).copy()
                        out_grid.rio.to_raster(outPaths[i])
                else:
                    grid_data.rio.to_raster(outPaths[0])
            else:
                grid_data.rio.to_raster(outPaths[0])
        except:
            print('An error occured during export.')
            print('{} could not be exported as {} file.'.format(outPaths, filetype))
            print('Try again using different parameters.')

    return

Functions

def export_dataframe(df, out_dir, filename, date_stamp=True, log=False)

Function to export dataframes

Parameters

df : pandas dataframe, or list of pandas dataframes
Data frame or list of dataframes to be exported
out_dir : string or pathlib.Path object
Directory to which to export dataframe object(s) as .csv
filename : str or list of strings
Filename(s) of output files
date_stamp : bool, default=True
Whether to include a datestamp in the filename. If true, file ends with _yyyy-mm-dd.csv of current date, by default True.
log : bool, default = True
Whether to log inputs and outputs to log file.
Expand source code
def export_dataframe(df, out_dir, filename, date_stamp=True, log=False):
    """Function to export dataframes

    Parameters
    ----------
    df : pandas dataframe, or list of pandas dataframes
        Data frame or list of dataframes to be exported
    out_dir : string or pathlib.Path object
        Directory to which to export dataframe object(s) as .csv
    filename : str or list of strings
        Filename(s) of output files
    date_stamp : bool, default=True
        Whether to include a datestamp in the filename. If true, file ends with _yyyy-mm-dd.csv of current date, by default True.
    log : bool, default = True
        Whether to log inputs and outputs to log file.        
    """
    logger_function(log, locals(), inspect.currentframe().f_code.co_name)

    if date_stamp:
        nowTime = datetime.datetime.now()
        nowTime = str(nowTime).replace(':', '-').replace(' ','_').split('.')[0]
        nowTimeStr = '_'+str(nowTime)
    else:
        nowTimeStr=''

    if type(out_dir) is str or isinstance(out_dir, pathlib.PurePath):
        out_dir = str(out_dir)
        out_dir = out_dir.replace('\\', '/').replace('\\'[-1], '/')
        if out_dir[-1] != '/':
            out_dir = out_dir + '/'
    else:
        print('Please input string or pathlib object for out_dir parameters')
        return

    if type(filename) is str:
        dfOutFile =  out_dir+filename+nowTimeStr+'.csv'
        df.to_csv(dfOutFile, index_label='ID')
        print('Exported '+filename+nowTimeStr+'.csv')
    elif type(filename) is list and type(df) is list and len(df) == len(filename):
        for i, f in enumerate(df):
            fname = filename[i]
            dfOutFile =  out_dir+fname+nowTimeStr+'.csv'
            f.to_csv(dfOutFile, index_label='ID')
            print('Exported '+fname+nowTimeStr+'.csv')
def export_grids(grid_data, out_path, file_id='', filetype='tif', variable_sep=True, date_stamp=True, verbose=False, log=False)

Function to export grids to files.

Parameters

grid_data : xarray DataArray or xarray Dataset
Dataset or dataarray to be exported
out_path : str or pathlib.Path object
Output location for data export. If variable_sep=True, this should be a directory. Otherwise, this should also include the filename. The file extension should not be included here.
file_id : str, optional
If specified, will add this after 'LayerXX' or 'AllLayers' in the filename, just before datestamp, if used. Example filename for file_id='Coarse': Layer1_Coarse_2023-04-18.tif.
filetype : str, optional
Output filetype. Can either be pickle or any file extension supported by rioxarray.rio.to_raster(). Can either include period or not., by default 'tif'
variable_sep : bool, optional
If grid_data is an xarray Dataset, this will export each variable in the dataset as a separate file, including the variable name in the filename, by default False
date_stamp : bool, optional
Whether to include a date stamp in the file name., by default True
log : bool, default = True
Whether to log inputs and outputs to log file.
Expand source code
def export_grids(grid_data, out_path, file_id='',filetype='tif', variable_sep=True, date_stamp=True, verbose=False, log=False):
    """Function to export grids to files.

    Parameters
    ----------
    grid_data : xarray DataArray or xarray Dataset
        Dataset or dataarray to be exported
    out_path : str or pathlib.Path object
        Output location for data export. If variable_sep=True, this should be a directory. Otherwise, this should also include the filename. The file extension should not be included here.
    file_id : str, optional
        If specified, will add this after 'LayerXX' or 'AllLayers' in the filename, just before datestamp, if used. Example filename for file_id='Coarse': Layer1_Coarse_2023-04-18.tif.
    filetype : str, optional
        Output filetype. Can either be pickle or any file extension supported by rioxarray.rio.to_raster(). Can either include period or not., by default 'tif'
    variable_sep : bool, optional
        If grid_data is an xarray Dataset, this will export each variable in the dataset as a separate file, including the variable name in the filename, by default False
    date_stamp : bool, optional
        Whether to include a date stamp in the file name., by default True
    log : bool, default = True
        Whether to log inputs and outputs to log file.        
    """
    logger_function(log, locals(), inspect.currentframe().f_code.co_name)
    if verbose:
        verbose_print(export_grids, locals(), exclude_params=['grid_data'])
    #Initialize lists to determine which filetype will be used for export
    ncdfList = ['netcdf', 'ncdf', 'n']
    tifList = ['tif', 'tiff', 'geotiff', 'geotif', 't']
    pickleList = ['pickle', 'pkl', 'p']

    filenames = []

    #Format output string(s)
    #Format output filepath
    if isinstance(out_path, (pathlib.PurePath, str)):
        if isinstance(out_path, pathlib.PurePath):
            pass
        else:
            out_path = pathlib.Path(out_path)
            
        if out_path.parent.exists() == False:
            print('Directory does not exist. Please enter a different value for the out_path parameter.')
            return        

        if out_path.is_dir():
            if isinstance(grid_data, xr.DataArray):
                if variable_sep:
                    lyrs = grid_data.coords['Layer'].values
                    filenames = []
                    for l in lyrs:
                        filenames.append('Layer'+str(l))
                else:
                    filenames = ['AllLayers']
            if isinstance(grid_data, xr.Dataset):
                if variable_sep:
                    filenames = []
                    for var in grid_data:
                        filenames.append(var)
                else:
                    filenames = ['AllLayers']    
        else:
            filenames = [out_path.stem]
            out_path = out_path.parent
    else:
        print('No output path specified (out_path). Please input string or pathlib object for out_path parameters')
        return
    
    #Format datestamp, if desired in output filename
    if date_stamp:
        nowTime = datetime.datetime.now()
        nowTime = str(nowTime).replace(':', '-').replace(' ','_').split('.')[0]
        nowTimeStr = '_'+str(nowTime)
    else:
        nowTimeStr=''

    #Ensure the file suffix includes .
    if filetype[0] == '.':
        pass
    else:
        filetype = '.' + filetype

    if file_id != '':
        file_id = '_'+file_id

    out_path = out_path.as_posix()+'/'
    
    if verbose:
        print('Export filepath(s):')
        
    outPaths = []
    for f in filenames:
        currOutPath = out_path+f+file_id+nowTimeStr+filetype
        outPaths.append(currOutPath)
        if verbose:
            print('\t {}'.format(currOutPath))
        
    #Do export
    if filetype.lower() in pickleList:
        import pickle
        for op in outPaths:
            try:
                with open(op, 'wb') as f:
                    pickle.dump(grid_data, f)
            except:
                print('An error occured during export.')
                print(op, 'could not be exported as a pickle object.')
                print('Try again using different parameters.')
    else:
        import rioxarray as rxr
        try:
            if isinstance(grid_data, xr.Dataset):
                if variable_sep:
                    for i, var in enumerate(grid_data.data_vars):
                        grid_data[var].rio.to_raster(outPaths[i])
                else:
                    grid_data.rio.to_raster(outPaths[0])
            elif isinstance(grid_data, xr.DataArray):
                if variable_sep:
                    lyrs = grid_data.coords['Layer'].values
                    for i, l in enumerate(lyrs):
                        out_grid = grid_data.sel(Layer = l).copy()
                        out_grid.rio.to_raster(outPaths[i])
                else:
                    grid_data.rio.to_raster(outPaths[0])
            else:
                grid_data.rio.to_raster(outPaths[0])
        except:
            print('An error occured during export.')
            print('{} could not be exported as {} file.'.format(outPaths, filetype))
            print('Try again using different parameters.')

    return