Module sprit.sprit_hvsr
This module is the main SpRIT module that contains all the functions needed to run HVSR analysis.
The functions defined here are read both by the SpRIT graphical user interface and by the command-line interface to run HVSR analysis on input data.
See documentation for individual functions for more information.
Functions
def batch_data_read(batch_data,
batch_type='table',
param_col=None,
batch_params=None,
verbose=False,
**readcsv_getMeta_fetch_kwargs)-
Expand source code
def batch_data_read(batch_data, batch_type='table', param_col=None, batch_params=None, verbose=False, **readcsv_getMeta_fetch_kwargs): """Function to read data in data as a batch of multiple data files. This is best used through sprit.fetch_data(*args, source='batch', **other_kwargs). Parameters ---------- batch_data : filepath or list Input data information for how to read in data as batch. Can be filepath or list of filepaths/stream objects. If filepath, should point to .csv (or similar that can be read by pandas.read_csv()) with batch data information. batch_type : str, optional Type of batch read, only 'table' and 'filelist' accepted. If 'table', will read data from a file read in using pandas.read_csv(), by default 'table' param_col : None or str, optional Name of parameter column from batch information file. Only used if a batch_type='table' and single parameter column is used, rather than one column per parameter (for single parameter column, parameters are formatted with = between keys/values and , between item pairs), by default None batch_params : list, dict, or None, default = None Parameters to be used if batch_type='filelist'. If it is a list, needs to be the same length as batch_data. If it is a dict, will be applied to all files in batch_data and will combined with extra keyword arguments caught by **readcsv_getMeta_fetch_kwargs. verbose : bool, optional Whether to print information to terminal during batch read, by default False **readcsv_getMeta_fetch_kwargs Keyword arguments that will be read into pandas.read_csv(), sprit.input_params, sprit.get_metadata(), and/or sprit.fetch_data() Returns ------- hvsrBatch HVSRBatch object with each item representing a different HVSRData object Raises ------ IndexError _description_ """ if verbose: print(f'Processing batch data from {batch_type}:') print(f" Batch data source: {batch_data}") # First figure out which parameters go with which function input_params_params = inspect.signature(input_params).parameters get_metadata_params = inspect.signature(get_metadata).parameters fetch_data_params = inspect.signature(fetch_data).parameters calculate_azimuth_params = inspect.signature(calculate_azimuth).parameters remove_noise_params = inspect.signature(remove_noise).parameters generate_ppsds_params = inspect.signature(generate_psds).parameters remove_outlier_curves_params = inspect.signature(remove_outlier_curves).parameters process_hvsr_params = inspect.signature(process_hvsr).parameters check_peaks_params = inspect.signature(check_peaks).parameters get_report_params = inspect.signature(get_report).parameters dict_of_params = {'input_params': input_params_params, 'get_metadata': get_metadata_params, 'fetch_data_params': fetch_data_params, 'calculate_azimuth_params': calculate_azimuth_params, 'remove_noise_params': remove_noise_params, 'generate_ppsds_params': generate_ppsds_params, 'remove_outlier_curves_params': remove_outlier_curves_params, 'process_hvsr_params': process_hvsr_params, 'check_peaks_params': check_peaks_params, 'get_report_params': get_report_params} def __get_run_functions(): # Get a list of all functions (for which paramters are used) in sprit.run() run_functions_list = [input_params, fetch_data, batch_data_read, get_metadata, calculate_azimuth, remove_noise, generate_psds, remove_outlier_curves, process_hvsr, check_peaks, get_report, export_hvsr] return run_functions_list SPRIT_RUN_FUNCTIONS = __get_run_functions() # Get default values of all functions in a dict default_dict = {} for i, fun in enumerate(SPRIT_RUN_FUNCTIONS): for param_name, param_info in inspect.signature(fun).parameters.items(): if param_info.default is not inspect._empty: default_dict[param_name] = param_info.default if batch_type == 'sample' or batch_data in sampleFileKeyMap.keys(): sample_data = True batch_type = 'table' else: sample_data = False # Dictionary to store the stream objects stream_dict = {} data_dict = {} if batch_type == 'table': # If this is sample data, we need to create absolute paths to the filepaths if sample_data: dataReadInfoDF = pd.read_csv(sampleFileKeyMap['sample_batch']) for index, row in dataReadInfoDF.iterrows(): dataReadInfoDF.loc[index, 'input_data'] = SAMPLE_DATA_DIR.joinpath(row.loc['input_data']) elif isinstance(batch_data, pd.DataFrame): dataReadInfoDF = batch_data elif isinstance(batch_data, dict): # For params input dataReadInfoDF = pd.DataFrame.from_dict(batch_data) pass else: # Read csv read_csv_kwargs = {k: v for k, v in locals()['readcsv_getMeta_fetch_kwargs'].items() if k in inspect.signature(pd.read_csv).parameters} dataReadInfoDF = pd.read_csv(batch_data, **read_csv_kwargs) if 'input_data' in dataReadInfoDF.columns: filelist = list(dataReadInfoDF['input_data']) # Generate site names if they don't exist already if 'site' not in dataReadInfoDF.columns: siterows = [] filldigs = len(str(dataReadInfoDF.shape[0])) # Number of digits in df shape for i, row in dataReadInfoDF.iterrows(): siterows.append(f'HVSRSite_{str(i).zfill(filldigs)}') dataReadInfoDF['site'] = siterows # Print information about batch read, as specified print(f" {dataReadInfoDF.shape[0]} sites found: {list(dataReadInfoDF['site'])}") if verbose: maxLength = 25 maxColWidth = 12 if dataReadInfoDF.shape[0] > maxLength: print(f'\t Showing information for first {maxLength} files only:') print() # Print nicely formatted df # Print column names print(' ', end='') for col in dataReadInfoDF.columns: print(str(col)[:maxColWidth].ljust(maxColWidth), end=' ') print('\n', end='') # Print separator tableLen = (maxColWidth+2)*len(dataReadInfoDF.columns) for r in range(tableLen): print('-', end='') print() #Print columns/rows for index, row in dataReadInfoDF.iterrows(): print(' ', end='') for col in row: if len(str(col)) > maxColWidth: print((str(col)[:maxColWidth-3]+'...').ljust(maxColWidth), end=' ') else: print(str(col)[:maxColWidth].ljust(maxColWidth), end=' ') print() if dataReadInfoDF.shape[0] > maxLength: endline = f'\t...{dataReadInfoDF.shape[0]-maxLength} more rows in file.\n' else: endline = '\n' print(endline) print('Fetching the following files:') # Get processing parameters, either from column param_col or from individual columns # If param_col, format is string of format: "param_name=param_val, param_name2=param_val2" param_dict_list = [] verboseStatement = [] if param_col is None: # Not a single parameter column, each col=parameter for row_ind in range(dataReadInfoDF.shape[0]): param_dict = {} verboseStatement.append([]) for col in dataReadInfoDF.columns: for fun in SPRIT_RUN_FUNCTIONS: if col in inspect.signature(fun).parameters: currParam = dataReadInfoDF.loc[row_ind, col] if pd.isna(currParam) or currParam == 'nan': if col in default_dict.keys(): param_dict[col] = default_dict[col] # Get default value if verbose: if type(default_dict[col]) is str: verboseStatement[row_ind].append("\t\t'{}' parameter not specified in batch file. Using {}='{}'".format(col, col, default_dict[col])) else: verboseStatement[row_ind].append("\t\t'{}' parameter not specified in batch file. Using {}={}".format(col, col, default_dict[col])) else: param_dict[col] = None else: param_dict[col] = dataReadInfoDF.loc[row_ind, col] param_dict_list.append(param_dict) else: if param_col not in dataReadInfoDF.columns: raise IndexError('{} is not a column in {} (columns are: {})'.format(param_col, batch_data, dataReadInfoDF.columns)) for row in dataReadInfoDF[param_col]: param_dict = {} splitRow = str(row).split(',') for item in splitRow: param_dict[item.split('=')[0]] = item.split('=')[1] param_dict_list.append(param_dict) elif batch_type == 'filelist': if not isinstance(batch_data, (list, tuple)): raise RuntimeError(f"If batch_type is specified as 'filelist' or 'list', batch_data must be list or tuple, not {type(batch_data)}.") # Update formatting of batch_params for rest of processing if batch_params is None: batch_params = [{}] * len(batch_data) # Get batch_parameters if isinstance(batch_params, list): if len(batch_params) != len(batch_data): raise RuntimeError('If batch_params is list, it must be the same length as batch_data. len(batch_params)={} != len(batch_data)={}'.format(len(batch_params), len(batch_data))) param_dict_list = batch_params elif isinstance(batch_params, dict): batch_params.update(readcsv_getMeta_fetch_kwargs) param_dict_list = [] for i in range(len(batch_data)): param_dict_list.append(batch_params) # Read and process each MiniSEED file for i, file in enumerate(batch_data): param_dict_list[i]['input_data'] = file # Get a uniformly formatted input DataFrame input_df_uniformatted = pd.DataFrame(param_dict_list) # Do batch fun of input_params() and fetch_data() (these are skipped in run() if batch mode is used) hvsr_batchDict = {} zfillDigs = len(str(len(param_dict_list))) # Get number of digits of length of param_dict_list i = 0 for i, param_dict in enumerate(param_dict_list): # Read the data file into a Stream object input_params_kwargs = {k: v for k, v in locals()['readcsv_getMeta_fetch_kwargs'].items() if k in inspect.signature(input_params).parameters} input_params_kwargs2 = {k: v for k, v in param_dict.items() if k in inspect.signature(input_params).parameters} input_params_kwargs.update(input_params_kwargs2) # Run input_params() try: ipverboseString = '\tinput_params: <No parameters specified>, ' for arg, value in input_params_kwargs.items(): ipverboseString = ipverboseString.replace('<No parameters specified>, ', '') ipverboseString += f"{arg}={value}, " ipverboseString = ipverboseString[:-2] ipverboseString = (ipverboseString[:96] + '...') if len(ipverboseString) > 99 else ipverboseString params = input_params(**input_params_kwargs) except Exception as e: params = input_params_kwargs params['processing_status'] = {} params['processing_status']['input_params_status'] = False params['processing_status']['overall_status'] = False verboseStatement.append(f"\t{e}") # Run fetch_data() fetch_data_kwargs = {k: v for k, v in locals()['readcsv_getMeta_fetch_kwargs'].items() if k in inspect.signature(fetch_data).parameters} fetch_data_kwargs2 = {k: v for k, v in param_dict.items() if k in inspect.signature(fetch_data).parameters} fetch_data_kwargs.update(fetch_data_kwargs2) try: fdverboseString = '\tfetch_data: <No parameters specified>, ' for arg, value in fetch_data_kwargs.items(): fdverboseString = fdverboseString.replace('<No parameters specified>, ', '') fdverboseString += f"{arg}={value}, " fdverboseString = fdverboseString[:-2] fdverboseString = (fdverboseString[:96] + '...') if len(fdverboseString) > 99 else fdverboseString hvsrData = fetch_data(params=params, **fetch_data_kwargs) except Exception as e: hvsrData = params hvsrData['processing_status']['fetch_data_status'] = False hvsrData['processing_status']['overall_status'] = False verboseStatement.append(f"\t{e}") if verbose and hvsrData['processing_status']['overall_status']: print(f" {hvsrData['site']}") print(ipverboseString) print(fdverboseString) if verboseStatement != []: for item in verboseStatement[i]: print(item) elif verbose and not hvsrData['processing_status']['overall_status']: if 'site' in param_dict.keys(): sitename = param_dict['site'] else: sitename = 'UNSPECIFIED_SITE' print(f" {sitename}") print(ipverboseString) print(fdverboseString) if verboseStatement != []: for item in verboseStatement[i]: print(item) print(f" *{sitename} not read correctly. Processing will not be carried out.") hvsrData['batch'] = True # This may be redundant if hvsrData['site'] == default_dict['site']: # If site was not designated hvsrData['site'] = "{}_{}".format(hvsrData['site'], str(i).zfill(zfillDigs)) i += 1 # Get processing parameters for other functions in sprit.run() besides input_params and fetch_data if 'processing_parameters' in hvsrData.keys(): processing_parameters = hvsrData['processing_parameters'].copy() else: processing_parameters = {} # "input_params": input_params_kwargs, "fetch_data": fetch_data_kwargs} for fun in SPRIT_RUN_FUNCTIONS: specified_params = {k: v for k, v in param_dict.items() if k in inspect.signature(fun).parameters} processing_parameters[fun.__name__] = specified_params # Assume source is 'file' if not specified hvsrData['processing_parameters'] = processing_parameters if 'source' not in hvsrData['processing_parameters']['fetch_data'].keys(): hvsrData['processing_parameters']['fetch_data']['source'] = 'file' hvsr_batchDict[hvsrData['site']] = hvsrData hvsrBatch = HVSRBatch(hvsr_batchDict, df_as_read=input_df_uniformatted) print() print('Finished reading input data in preparation for batch processing') return hvsrBatchFunction to read data in data as a batch of multiple data files. This is best used through sprit.fetch_data(args, source='batch', *other_kwargs).
Parameters
batch_data:filepathorlist- Input data information for how to read in data as batch. Can be filepath or list of filepaths/stream objects. If filepath, should point to .csv (or similar that can be read by pandas.read_csv()) with batch data information.
batch_type:str, optional- Type of batch read, only 'table' and 'filelist' accepted. If 'table', will read data from a file read in using pandas.read_csv(), by default 'table'
param_col:Noneorstr, optional- Name of parameter column from batch information file. Only used if a batch_type='table' and single parameter column is used, rather than one column per parameter (for single parameter column, parameters are formatted with = between keys/values and , between item pairs), by default None
batch_params:list, dict,orNone, default= None- Parameters to be used if batch_type='filelist'. If it is a list, needs to be the same length as batch_data. If it is a dict, will be applied to all files in batch_data and will combined with extra keyword arguments caught by **readcsv_getMeta_fetch_kwargs.
verbose:bool, optional- Whether to print information to terminal during batch read, by default False
**readcsv_getMeta_fetch_kwargs- Keyword arguments that will be read into pandas.read_csv(), sprit.input_params, sprit.get_metadata(), and/or sprit.fetch_data()
Returns
hvsrBatch- HVSRBatch object with each item representing a different HVSRData object
Raises
IndexError- description
def calculate_azimuth(hvsr_data,
azimuth_angle=45,
azimuth_type='multiple',
azimuth_unit='degrees',
show_az_plot=False,
verbose=False,
**plot_azimuth_kwargs)-
Expand source code
def calculate_azimuth(hvsr_data, azimuth_angle=45, azimuth_type='multiple', azimuth_unit='degrees', show_az_plot=False, verbose=False, **plot_azimuth_kwargs): """Function to calculate azimuthal horizontal component at specified angle(s). Adds each new horizontal component as a radial component to obspy.Stream object at hvsr_data['stream'] Parameters ---------- hvsr_data : HVSRData Input HVSR data azimuth_angle : int, default=10 If `azimuth_type='multiple'`, this is the angular step (in unit `azimuth_unit`) of each of the azimuthal measurements. If `azimuth_type='single'` this is the angle (in unit `azimuth_unit`) of the single calculated azimuthal measruement. By default 10. azimuth_type : str, default='multiple' What type of azimuthal measurement to make, by default 'multiple'. If 'multiple' (or {'multi', 'mult', 'm'}), will take a measurement at each angular step of azimuth_angle of unit azimuth_unit. If 'single' (or {'sing', 's'}), will take a single azimuthal measurement at angle specified in azimuth_angle. azimuth_unit : str, default='degrees' Angular unit used to specify `azimuth_angle` parameter. By default 'degrees'. If 'degrees' (or {'deg', 'd'}), will use degrees. If 'radians' (or {'rad', 'r'}), will use radians. show_az_plot : bool, default=False Whether to show azimuthal plot, by default False. verbose : bool, default=False Whether to print terminal output, by default False Returns ------- HVSRData Updated HVSRData object specified in hvsr_data with hvsr_data['stream'] attribute containing additional components (EHR-***), with *** being zero-padded (3 digits) azimuth angle in degrees. """ # Get intput paramaters orig_args = locals().copy() start_time = datetime.datetime.now() # Update with processing parameters specified previously in input_params, if applicable if 'processing_parameters' in hvsr_data.keys(): if 'calculate_azimuth' in hvsr_data['processing_parameters'].keys(): update_msg = [] for k, v in hvsr_data['processing_parameters']['calculate_azimuth'].items(): defaultVDict = dict(zip(inspect.getfullargspec(calculate_azimuth).args[1:], inspect.getfullargspec(calculate_azimuth).defaults)) # Manual input to function overrides the imported parameter values if (not isinstance(v, (HVSRData, HVSRBatch))) and (k in orig_args.keys()) and (orig_args[k]==defaultVDict[k]): update_msg.append(f'\t\t{k} = {v} (previously {orig_args[k]})') orig_args[k] = v azimuth_angle = orig_args['azimuth_angle'] azimuth_unit = orig_args['azimuth_unit'] show_az_plot = orig_args['show_az_plot'] verbose = orig_args['verbose'] if (verbose and isinstance(hvsr_data, HVSRBatch)) or (verbose and not hvsr_data['batch']): if isinstance(hvsr_data, HVSRData) and hvsr_data['batch']: pass else: print('\nGenerating azimuthal data (calculate_azimuth())') print('\tUsing the following parameters:') for key, value in orig_args.items(): if key == 'hvsr_data': pass else: print('\t {}={}'.format(key, value)) if 'processing_parameters' in hvsr_data.keys() and 'calculate_azimuth' in hvsr_data['processing_parameters'].keys(): if update_msg != []: print() update_msg.insert(0, '\tThe following parameters were updated using the processing_parameters attribute:') for msg_line in update_msg: print(msg_line) print() if isinstance(hvsr_data, HVSRBatch): # If running batch, we'll loop through each site hvsr_out = {} for site_name in hvsr_data.keys(): args = orig_args.copy() #Make a copy so we don't accidentally overwrite args['hvsr_data'] = hvsr_data[site_name] #Get what would normally be the "hvsr_data" variable for each site if hvsr_data[site_name]['processing_status']['overall_status']: try: hvsr_out[site_name] = __azimuth_batch(**args) #Call another function, that lets us run this function again except Exception as e: hvsr_out[site_name]['processing_status']['calculate_azimuths_status'] = False hvsr_out[site_name]['processing_status']['overall_status'] = False if verbose: print(e) else: hvsr_data[site_name]['processing_status']['calculate_azimuths_status'] = False hvsr_data[site_name]['processing_status']['overall_status'] = False hvsr_out = hvsr_data output = HVSRBatch(hvsr_out, df_as_read=hvsr_data.input_df) return output elif isinstance(hvsr_data, (HVSRData, dict, obspy.Stream)): degList = ['degrees', 'deg', 'd', '°'] radList = ['radians', 'rad', 'r'] if azimuth_unit.lower() in degList: az_angle_rad = np.deg2rad(azimuth_angle) az_angle_deg = azimuth_angle elif azimuth_unit.lower() in radList: az_angle_rad = azimuth_angle az_angle_deg = np.rad2deg(azimuth_angle) else: warnings.warn(f"azimuth_unit={azimuth_unit} not supported. Try 'degrees' or 'radians'. No azimuthal analysis run.") return hvsr_data # Limit to 1-180 (and "right" half of compass) (will be reflected on other half if applicable to save computation time) conversion_message = '' will_convert = False if az_angle_deg < 0: will_convert = True conversion_message = conversion_message + 'converted to a positive value' if az_angle_deg < -180: conversion_message = conversion_message + ' between 0 and 180 degrees' if az_angle_deg > 180: will_convert = True conversion_message = conversion_message + ' converted to a value between 0 and 180 degrees' if will_convert: conversion_message = f"\tThe azimuth angle specified will be{conversion_message}" if verbose: print(conversion_message, end=f': {az_angle_deg}') # Convert angle to 0-180 az_angle_deg = az_angle_deg - (180 * (az_angle_deg // 180)) az_angle_rad = az_angle_rad = np.deg2rad(azimuth_angle) if verbose: print(f' degrees --> {az_angle_deg} degrees.') multAzList = ['multiple azimuths', 'multiple', 'multi', 'mult', 'm'] singleAzList = ['single azimuth', 'single', 'sing', 's'] if azimuth_type.lower() in multAzList: azimuth_list = list(np.arange(0, np.pi, az_angle_rad)) azimuth_list_deg = list(np.arange(0, 180, az_angle_deg)) elif azimuth_type.lower() in singleAzList: azimuth_list = [az_angle_rad] azimuth_list_deg = [az_angle_deg] else: warnings.warn(f"azimuth_type={azimuth_type} not supported. Try 'multiple' or 'single'. No azimuthal analysis run.") return hvsr_data if isinstance(hvsr_data, (HVSRData, dict)): zComp = hvsr_data['stream'].select(component='Z').merge() eComp = hvsr_data['stream'].select(component='E').merge() nComp = hvsr_data['stream'].select(component='N').merge() elif isinstance(hvsr_data, obspy.Stream): zComp = hvsr_data.select(component='Z').merge() eComp = hvsr_data.select(component='E').merge() nComp = hvsr_data.select(component='N').merge() # Reset stats for original data too zComp[0].stats['azimuth_deg'] = 0 eComp[0].stats['azimuth_deg'] = 90 nComp[0].stats['azimuth_deg'] = 0 zComp[0].stats['azimuth_rad'] = 0 eComp[0].stats['azimuth_rad'] = np.pi/2 nComp[0].stats['azimuth_rad'] = 0 zComp[0].stats['location'] = '000' eComp[0].stats['location'] = '090' nComp[0].stats['location'] = '000' statsDict = {} for key, value in eComp[0].stats.items(): statsDict[key] = value for i, az_rad in enumerate(azimuth_list): az_deg = azimuth_list_deg[i] statsDict['location'] = f"{str(round(az_deg,0)).zfill(3)}" #Change location name statsDict['channel'] = f"EHR"#-{str(round(az_deg,0)).zfill(3)}" #Change channel name statsDict['azimuth_deg'] = az_deg statsDict['azimuth_rad'] = az_rad hasMask = [False, False] if np.ma.is_masked(nComp[0].data): nData = nComp[0].data.data nMask = nComp[0].data.mask hasMask[0] = True else: nData = nComp[0].data nMask = [True] * len(nData) if np.ma.is_masked(eComp[0].data): eData = eComp[0].data.data eMask = eComp[0].data.mask hasMask[1] = True else: eData = eComp[0].data eMask = [True] * len(eData) # From hvsrpy: horizontal = self.ns._amp * math.cos(az_rad) + self.ew._amp*math.sin(az_rad) if True in hasMask: radial_comp_data = np.ma.array(np.add(nData * np.cos(az_rad), eData * np.sin(az_angle_rad)), mask=list(map(operator.and_, nMask, eMask))) else: radial_comp_data = np.add(nData * np.cos(az_rad), eData * np.sin(az_rad)) radial_trace = obspy.Trace(data=radial_comp_data, header=statsDict) hvsr_data['stream'].append(radial_trace) # Verbose printing if verbose and not isinstance(hvsr_data, HVSRBatch): dataINStr = hvsr_data.stream.__str__().split('\n') for line in dataINStr: print('\t\t', line) if show_az_plot: hvsr_data['Azimuth_Fig'] = plot_azimuth(hvsr_data=hvsr_data, **plot_azimuth_kwargs) hvsr_data['processing_status']['calculate_azimuths_status'] = True hvsr_data = sprit_utils._check_processing_status(hvsr_data, start_time=start_time, func_name=inspect.stack()[0][3], verbose=verbose) return hvsr_dataFunction to calculate azimuthal horizontal component at specified angle(s). Adds each new horizontal component as a radial component to obspy.Stream object at hvsr_data['stream']
Parameters
hvsr_data:HVSRData- Input HVSR data
azimuth_angle:int, default=10- If
azimuth_type='multiple', this is the angular step (in unitazimuth_unit) of each of the azimuthal measurements. Ifazimuth_type='single'this is the angle (in unitazimuth_unit) of the single calculated azimuthal measruement. By default 10. azimuth_type:str, default='multiple'- What type of azimuthal measurement to make, by default 'multiple'. If 'multiple' (or {'multi', 'mult', 'm'}), will take a measurement at each angular step of azimuth_angle of unit azimuth_unit. If 'single' (or {'sing', 's'}), will take a single azimuthal measurement at angle specified in azimuth_angle.
azimuth_unit:str, default='degrees'- Angular unit used to specify
azimuth_angleparameter. By default 'degrees'. If 'degrees' (or {'deg', 'd'}), will use degrees. If 'radians' (or {'rad', 'r'}), will use radians. show_az_plot:bool, default=False- Whether to show azimuthal plot, by default False.
verbose:bool, default=False- Whether to print terminal output, by default False
Returns
HVSRData- Updated HVSRData object specified in hvsr_data with hvsr_data['stream'] attribute containing additional components (EHR-***), with *** being zero-padded (3 digits) azimuth angle in degrees.
def check_instance(init)-
Expand source code
def check_instance(init): def wrapper(self, *args, **kwargs): # Check if the first argument is an instance of self.__class__ if args and isinstance(args[0], self.__class__): # Copy its attributes to self self.__dict__.update(args[0].__dict__) else: # Call the original __init__ method init(self, *args, **kwargs) return wrapper def check_peaks(hvsr_data,
hvsr_band=[0.5, 40],
peak_selection='max',
peak_freq_range=[0.5, 40],
azimuth='HV',
verbose=False)-
Expand source code
def check_peaks(hvsr_data, hvsr_band=DEFAULT_BAND, peak_selection='max', peak_freq_range=DEFAULT_BAND, azimuth='HV', verbose=False): """Function to run tests on HVSR peaks to find best one and see if it passes SESAME quality checks Parameters ---------- hvsr_data : dict Dictionary containing all the calculated information about the HVSR data (i.e., hvsr_out returned from process_hvsr) hvsr_band : tuple or list, default=[0.1, 50] 2-item tuple or list with lower and upper limit of frequencies to analyze peak_selection : str or numeric, default='max' How to select the "best" peak used in the analysis. For peak_selection="max" (default value), the highest peak within peak_freq_range is used. For peak_selection='scored', an algorithm is used to select the peak based in part on which peak passes the most SESAME criteria. If a numeric value is used (e.g., int or float), this should be a frequency value to manually select as the peak of interest. peak_freq_range : tuple or list, default=[0.1, 50]; The frequency range within which to check for peaks. If there is an HVSR curve with multiple peaks, this allows the full range of data to be processed while limiting peak picks to likely range. verbose : bool, default=False Whether to print results and inputs to terminal. Returns ------- hvsr_data : HVSRData or HVSRBatch object Object containing previous input data, plus information about peak tests """ orig_args = locals().copy() # Get the initial arguments # Update with processing parameters specified previously in input_params, if applicable if 'processing_parameters' in hvsr_data.keys(): if 'check_peaks' in hvsr_data['processing_parameters'].keys(): update_msg = [] for k, v in hvsr_data['processing_parameters']['check_peaks'].items(): defaultVDict = dict(zip(inspect.getfullargspec(check_peaks).args[1:], inspect.getfullargspec(check_peaks).defaults)) # Manual input to function overrides the imported parameter values if (not isinstance(v, (HVSRData, HVSRBatch))) and (k in orig_args.keys()) and (orig_args[k]==defaultVDict[k]): update_msg.append(f'\t\t{k} = {v} (previously {orig_args[k]})') orig_args[k] = v hvsr_band = orig_args['hvsr_band'] peak_selection = orig_args['peak_selection'] peak_freq_range = orig_args['peak_freq_range'] verbose = orig_args['verbose'] #if (verbose and 'input_params' not in hvsr_data.keys()) or (verbose and not hvsr_data['batch']): # if isinstance(hvsr_data, HVSRData) and hvsr_data['batch']: # pass # else: if verbose: print('\nChecking peaks in the H/V Curve (check_peaks())') print('\tUsing the following parameters:') for key, value in orig_args.items(): if key == 'hvsr_data': pass else: print('\t {}={}'.format(key, value)) print() if 'processing_parameters' in hvsr_data.keys() and 'check_peaks' in hvsr_data['processing_parameters'].keys(): if update_msg != []: update_msg.insert(0, '\tThe following parameters were updated using the processing_parameters attribute:') for msg_line in update_msg: print(msg_line) print() # First, divide up for batch or not if isinstance(hvsr_data, HVSRBatch): if verbose: print('\t Running in batch mode') #If running batch, we'll loop through each site for site_name in hvsr_data.keys(): args = orig_args.copy() #Make a copy so we don't accidentally overwrite args['hvsr_data'] = hvsr_data[site_name] #Get what would normally be the "params" variable for each site if hvsr_data[site_name]['processing_status']['overall_status']: try: hvsr_data[site_name] = __check_peaks_batch(**args) #Call another function, that lets us run this function again except: if verbose: print(f"\t{site_name}: check_peaks() unsuccessful. Peaks not checked.") else: warnings.warn(f"\t{site_name}: check_peaks() unsuccessful. Peaks not checked.", RuntimeWarning) hvsr_data = HVSRBatch(hvsr_data, df_as_read=hvsr_data.input_df) else: HVColIDList = ['_'.join(col_name.split('_')[2:]) for col_name in hvsr_data['hvsr_windows_df'].columns if col_name.startswith('HV_Curves') and 'Log' not in col_name] HVColIDList[0] = 'HV' if hvsr_data['processing_status']['overall_status']: if not hvsr_band: hvsr_band = DEFAULT_BAND hvsr_data['hvsr_band'] = hvsr_band anyK = list(hvsr_data['x_freqs'].keys())[0] hvsr_data['PeakReport'] = {} hvsr_data['BestPeak'] = {} for i, col_id in enumerate(HVColIDList): x = hvsr_data['x_freqs'][anyK] # Consistent for all curves if col_id == 'HV': y = hvsr_data['hvsr_curve'] # Calculated based on "Use" column else: y = hvsr_data['hvsr_az'][col_id] scorelist = ['score', 'scored', 'best', 's'] maxlist = ['maximum', 'max', 'highest', 'm'] # Convert peak_selection to numeric, get index of nearest value as list item for __init_peaks() try: peak_val = float(peak_selection) index_list = [np.argmin(np.abs(x - peak_val))] except Exception as e: # If score method is being used, get index list for __init_peaks() if peak_selection in scorelist: index_list = hvsr_data['hvsr_peak_indices'][col_id] #Calculated based on hvsr_curve else:# str(peak_selection).lower() in maxlist: #Get max index as item in list for __init_peaks() startInd = np.argmin(np.abs(x - peak_freq_range[0])) endInd = np.argmin(np.abs(x - peak_freq_range[1])) if startInd > endInd: holder = startInd startInd = endInd endInd = holder subArrayMax = np.argmax(y[startInd:endInd]) # If max val is in subarray, this will be the same as the max of curve # Otherwise, it will be the index of the value that is max within peak_freq_range index_list = [subArrayMax+startInd] hvsrp = hvsr_data['hvsrp'][col_id] # Calculated based on "Use" column hvsrm = hvsr_data['hvsrm'][col_id] # Calculated based on "Use" column hvsrPeaks = hvsr_data['hvsr_windows_df'][hvsr_data['hvsr_windows_df']['Use']]['CurvesPeakIndices_'+col_id] hvsr_log_std = hvsr_data['hvsr_log_std'][col_id] peak_freq_range = hvsr_data['peak_freq_range'] # Do for hvsr peak = __init_peaks(x, y, index_list, hvsr_band, peak_freq_range, _min_peak_amp=0.5) peak = __check_curve_reliability(hvsr_data, peak, col_id) peak = __check_clarity(x, y, peak, do_rank=True) # Do for hvsrp # Find the relative extrema of hvsrp (hvsr + 1 standard deviation) if not np.isnan(np.sum(hvsrp)): index_p = __find_peaks(hvsrp) else: index_p = list() peakp = __init_peaks(x, hvsrp, index_p, hvsr_band, peak_freq_range, _min_peak_amp=1) peakp = __check_clarity(x, hvsrp, peakp, do_rank=True) # Do for hvsrm # Find the relative extrema of hvsrm (hvsr - 1 standard deviation) if not np.isnan(np.sum(hvsrm)): index_m = __find_peaks(hvsrm) else: index_m = list() peakm = __init_peaks(x, hvsrm, index_m, hvsr_band, peak_freq_range, _min_peak_amp=0) peakm = __check_clarity(x, hvsrm, peakm, do_rank=True) # Get standard deviation of time peaks stdf = __get_stdf(x, index_list, hvsrPeaks) peak = __check_freq_stability(peak, peakm, peakp) peak = __check_stability(stdf, peak, hvsr_log_std, rank=True) hvsr_data['PeakReport'][col_id] = peak #Iterate through peaks and # Get the BestPeak based on the peak score # Calculate whether each peak passes enough tests curveTests = ['WinLen','SigCycles', 'LowCurveStD'] peakTests = ['ProminenceLow', 'ProminenceHi', 'AmpClarity', 'FreqStability', 'LowStDev_Freq', 'LowStDev_Amp'] bestPeakScore = 0 for p in hvsr_data['PeakReport'][col_id]: # Get BestPeak if p['Score'] > bestPeakScore: bestPeakScore = p['Score'] bestPeak = p # Calculate if peak passes criteria cTestsPass = 0 pTestsPass = 0 for testName in p['PassList'].keys(): if testName in curveTests: if p['PassList'][testName]: cTestsPass += 1 elif testName in peakTests: if p['PassList'][testName]: pTestsPass += 1 if cTestsPass == 3 and pTestsPass >= 5: p['PeakPasses'] = True else: p['PeakPasses'] = False # Designate BestPeak in output dict if len(hvsr_data['PeakReport'][col_id]) == 0: bestPeak = {} print(f"No Best Peak identified for {hvsr_data['site']} (azimuth {col_id})") hvsr_data['BestPeak'][col_id] = bestPeak else: for i, col_id in enumerate(HVColIDList): if hasattr(hvsr_data, 'BestPeak'): hvsr_data['BestPeak'][col_id] = {} else: print(f"Processing Errors: No Best Peak identified for {hvsr_data['site']} (azimuth {col_id})") try: hvsr_data.plot() except: pass hvsr_data['processing_parameters']['check_peaks'] = {} exclude_params_list = ['hvsr_data'] for key, value in orig_args.items(): if key not in exclude_params_list: hvsr_data['processing_parameters']['check_peaks'][key] = value return hvsr_dataFunction to run tests on HVSR peaks to find best one and see if it passes SESAME quality checks
Parameters
hvsr_data:dict- Dictionary containing all the calculated information about the HVSR data (i.e., hvsr_out returned from process_hvsr)
hvsr_band:tupleorlist, default=[0.1, 50]- 2-item tuple or list with lower and upper limit of frequencies to analyze
peak_selection:strornumeric, default='max'- How to select the "best" peak used in the analysis. For peak_selection="max" (default value), the highest peak within peak_freq_range is used. For peak_selection='scored', an algorithm is used to select the peak based in part on which peak passes the most SESAME criteria. If a numeric value is used (e.g., int or float), this should be a frequency value to manually select as the peak of interest.
peak_freq_range:tupleorlist, default=[0.1, 50];- The frequency range within which to check for peaks. If there is an HVSR curve with multiple peaks, this allows the full range of data to be processed while limiting peak picks to likely range.
verbose:bool, default=False- Whether to print results and inputs to terminal.
Returns
hvsr_data : HVSRDataorHVSRBatch object- Object containing previous input data, plus information about peak tests
def export_data(hvsr_data,
data_export_path,
data_export_format='mseed',
starttime=None,
endtime=None,
tzone=None,
export_edited_stream=False,
site=None,
project=None,
verbose=False,
**kwargs)-
Expand source code
def export_data(hvsr_data, data_export_path, data_export_format='mseed', starttime=None, endtime=None, tzone=None, export_edited_stream=False, site=None, project=None, verbose=False, **kwargs): """Export data stream to file. This uses the obspy.Stream.write() method on the hvsr_data['stream'] object, but the stream can first be trimmed using starttime, endtime, and tzone. Parameters ---------- hvsr_data : HVSRData, HVSRBatch, obspy.Stream, obspy.Trace Input stream or HVSR object data_export_path : pathlike-object Filepath at which to format data. If directory (recommended), filename will be generated automatically. data_export_format : str, optional Format of data, should be file format supported by obspy.write(), by default 'mseed' starttime : str, UTCDateTime, or datetime.datetime, optional Starttime of stream, if trimming is desired, by default None endtime : str, UTCDateTime, or datetime.datetime, optional Endtime of stream, if trimming is desired, by default None tzone : str, zoneinfo.Zoneinfo, optional String readable by zoneinfo.Zoneinfo() or Zoneinfo object, by default None export_edited_stream : bool, optional Whether to export the raw stream ('stream' property; if False) or edited stream ('stream_edited' property; if True) in HVSRData object, by default False. site : str, optional Site name, to be used in filename generation, by default None project : str, optional Project or county name, to be used in filename generation, by default None verbose : bool, optional Whether to print information to terminal, by default False Returns ------- obspy.Stream Stream object exported Raises ------ TypeError hvsr_data must be of type HVSRData, HVSRBatch, obspy.Stream, or obspy.Trace """ # Extract stream for export if isinstance(hvsr_data, HVSRBatch): for site in hvsr_data: export_data(hvsr_data[site], data_export_path=data_export_path, data_export_format=data_export_format, starttime=starttime, endtime=endtime, verbose=verbose, **kwargs) return elif isinstance(hvsr_data, (obspy.Stream, obspy.Trace)): if isinstance(hvsr_data, obspy.Stream): outputStream = hvsr_data.copy() else: outputStream = obspy.Stream([hvsr_data]) else: # Assume data is in hvsr_data if not isinstance(hvsr_data, HVSRData): raise TypeError(f"The sprit.export_data() parameter hvsr_data must be of type HVSRData, HVSRBatch, obspy.Stream, or obspy.Trace, not {type(hvsr_data)}") if export_edited_stream and hasattr(hvsr_data, 'stream_edited'): outputStream = hvsr_data['stream_edited'].copy() else: outputStream = hvsr_data['stream'].copy() # Get starttime in obspy.UTCDateTime format if starttime is not None: if type(starttime) == str: sTimeDT = sprit_utils._format_time(starttime, tzone=tzone) acqDate = outputStream[0].stats.starttime.date sTimeDT.replace(year=acqDate.year, month=acqDate.month, day=acqDate.day) sTimeUTC = obspy.UTCDateTime(sTimeDT) elif isinstance(starttime, datetime.datetime): if tzone is not None: starttime = starttime.replace(tzinfo=tzone) sTimeUTC = obspy.UTCDateTime(starttime.astimezone(datetime.timezone.utc)) else: sTimeUTC = obspy.UTCDateTime(starttime) else: sTimeUTC = outputStream[0].stats.starttime # Get endtime in obspy.UTCDateTime format if endtime is not None: if type(endtime) == str: eTimeDT = sprit_utils._format_time(endtime, tzone=tzone) acqDate = outputStream[-1].stats.endtime.date eTimeDT.replace(year=acqDate.year, month=acqDate.month, day=acqDate.day) eTimeUTC = obspy.UTCDateTime(eTimeDT) elif isinstance(endtime, datetime.datetime): if tzone is not None: endtime = endtime.replace(tzinfo=tzone) eTimeUTC = obspy.UTCDateTime(endtime.astimezone(datetime.timezone.utc)) else: eTimeUTC = obspy.UTCDateTime(endtime) else: eTimeUTC = outputStream[-1].stats.endtime # Build filepath siteName = site if site is None: siteName = "HVSRSite" projectName = project if project is None: projectName = "" if projectName != "" and len(projectName)>0 and projectName[-1] != '-': projectName += "-" sDateStr = outputStream[0].stats.starttime.strftime("%Y%m%d") sTimeStr = outputStream[0].stats.starttime.strftime("%H%M") staStr = outputStream[0].stats.station deFormat = str(data_export_format).upper() if data_export_format[0] == '.': deFormat = deFormat[1:] dePath = pathlib.Path(data_export_path) autoFname = f"{siteName}_Stream_{projectName}{sDateStr}-{sTimeStr}-{staStr}_{datetime.date.today()}.{deFormat}" if dePath.is_dir(): if not dePath.exists(): dePath.mkdir(parents=True) outfPath = dePath.joinpath(autoFname) elif dePath.is_file(): outfPath = dePath # Trim stream as needed if starttime is None and endtime is None: pass else: isMasked = False doTrim = False for tr in outputStream: if isinstance(tr.data, np.ma.masked_array): isMasked = True if sTimeUTC > tr.stats.endtime or eTimeUTC < tr.stats.starttime: doTrim = True if isMasked: outputStream = outputStream.split() if doTrim: if verbose: print(f"\t Trimming data to {sTimeUTC} and {eTimeUTC}\n\t\t Stream starttime: {outputStream[0].stats.starttime}\n\t\t Stream endtime: {outputStream[0].stats.endtime}") outputStream.trim(starttime=sTimeUTC, endtime=eTimeUTC) outputStream.merge(method=1) # Take care of masked arrays for writing purposes if 'fill_value' in kwargs.keys(): for tr in outputStream: if isinstance(tr.data, np.ma.masked_array): tr.data = tr.data.filled(kwargs['fill_value']) else: outputStream = outputStream.split() outputStream.write(filename=outfPath.as_posix()) if verbose: print('Stream has been written to ' + outfPath.as_posix()) return outputStreamExport data stream to file. This uses the obspy.Stream.write() method on the hvsr_data['stream'] object, but the stream can first be trimmed using starttime, endtime, and tzone.
Parameters
hvsr_data:HVSRData, HVSRBatch, obspy.Stream, obspy.Trace- Input stream or HVSR object
data_export_path:pathlike-object- Filepath at which to format data. If directory (recommended), filename will be generated automatically.
data_export_format:str, optional- Format of data, should be file format supported by obspy.write(), by default 'mseed'
starttime:str, UTCDateTime,ordatetime.datetime, optional- Starttime of stream, if trimming is desired, by default None
endtime:str, UTCDateTime,ordatetime.datetime, optional- Endtime of stream, if trimming is desired, by default None
tzone:str, zoneinfo.Zoneinfo, optional- String readable by zoneinfo.Zoneinfo() or Zoneinfo object, by default None
export_edited_stream:bool, optional- Whether to export the raw stream ('stream' property; if False) or edited stream ('stream_edited' property; if True) in HVSRData object, by default False.
site:str, optional- Site name, to be used in filename generation, by default None
project:str, optional- Project or county name, to be used in filename generation, by default None
verbose:bool, optional- Whether to print information to terminal, by default False
Returns
obspy.Stream- Stream object exported
Raises
TypeError- hvsr_data must be of type HVSRData, HVSRBatch, obspy.Stream, or obspy.Trace
def export_hvsr(hvsr_data,
hvsr_export_path=None,
ext='hvsr',
export_type='gzip',
export_plots=False,
verbose=False)-
Expand source code
def export_hvsr(hvsr_data, hvsr_export_path=None, ext='hvsr', export_type='gzip', export_plots=False, verbose=False): """Export data into pickle format that can be read back in using import_data(). Intended so data does not need to be processed each time it needs to be used. By default, first, export_hvsr serializes the HVSRData object(s) using pickle.dumps(). Then, to save space, it writes that to a gzip file. Default extension is .hvsr no matter the format, though this can be set with `ext` parameter. Parameters ---------- hvsr_data : HVSRData or HVSRBatch Data to be exported hvsr_export_path : str or filepath object, default = None String or filepath object to be read by pathlib.Path() and/or a with open(hvsr_export_path, 'wb') statement. If None, defaults to input input_data directory, by default None ext : str, default = 'hvsr' Filepath extension to use for data file, by default 'hvsr'. This will be the extension no matter the export_type export_type : str, default = 'gzip' Export type to use. If `export_type` is 'pickle', will just save to disk using pickle.dump. Otherwise, saves a pickle-serialized object to a gzip file (with a .hvsr extension in both cases, by default). verbose : bool, default=False Whether to print information about export. A confirmation message is printed no matter what. """ def _hvsr_export(_hvsr_data=hvsr_data, _export_path=hvsr_export_path, _ext=ext): fname = f"{_hvsr_data['site']}_HVSRData_{_hvsr_data['hvsr_id']}_{datetime.date.today()}_pickled.{ext}" if _export_path is None or _export_path is True: _export_path = _hvsr_data['input_data'] _export_path = pathlib.Path(_export_path).with_name(fname) else: _export_path = pathlib.Path(_export_path) if _export_path.is_dir(): _export_path = _export_path.joinpath(fname) _export_path = str(_export_path) if export_type == 'pickle': with open(_export_path, 'wb') as f: pickle.dump(_hvsr_data, f) else: with gzip.open(_export_path, 'wb') as f: f.write(pickle.dumps(_hvsr_data)) if verbose: print('EXPORT COMPLETE') print(f"Processed data exported as pickled data to: {_export_path} [~{round(float(pathlib.Path(_export_path).stat().st_size)/2**20,1)} Mb]") hvData = hvsr_data hvData = hvsr_data.copy() if export_plots is False: for pk in PLOT_KEYS: if hasattr(hvData, pk): delattr(hvData, pk) if isinstance(hvData, HVSRBatch): for sitename in hvData.keys(): _hvsr_export(_hvsr_data=hvData[sitename], _export_path=hvsr_export_path, _ext=ext) elif isinstance(hvData, HVSRData): _hvsr_export(_hvsr_data=hvData, _export_path=hvsr_export_path, _ext=ext) else: print("Error in data export. Data must be either of type sprit.HVSRData or sprit.HVSRBatch") returnExport data into pickle format that can be read back in using import_data(). Intended so data does not need to be processed each time it needs to be used. By default, first, export_hvsr serializes the HVSRData object(s) using pickle.dumps(). Then, to save space, it writes that to a gzip file. Default extension is .hvsr no matter the format, though this can be set with
extparameter.Parameters
hvsr_data:HVSRDataorHVSRBatch- Data to be exported
hvsr_export_path:strorfilepath object, default= None- String or filepath object to be read by pathlib.Path() and/or a with open(hvsr_export_path, 'wb') statement. If None, defaults to input input_data directory, by default None
ext:str, default= 'hvsr'- Filepath extension to use for data file, by default 'hvsr'. This will be the extension no matter the export_type
export_type:str, default= 'gzip'- Export type to use. If
export_typeis 'pickle', will just save to disk using pickle.dump. Otherwise, saves a pickle-serialized object to a gzip file (with a .hvsr extension in both cases, by default). verbose:bool, default=False- Whether to print information about export. A confirmation message is printed no matter what.
def export_report(hvsr_results,
report_export_path=None,
report_export_format=['pdf'],
azimuth='HV',
csv_handling='rename',
show_report=True,
verbose=False)-
Expand source code
def export_report(hvsr_results, report_export_path=None, report_export_format=['pdf'], azimuth='HV', csv_handling='rename', show_report=True, verbose=False): """Function to export reports to disk. Exportable formats for report_export_format include: * 'table': saves a pandas DataFrame as a csv) * 'plot': saves the matplotlib or plotly plot figure (depending on what is designated via plot_engine) as an image (png by default) * 'print': saves the print report as a .txt file * 'html': saves the html report as a .html file * 'pdf': saves the pdf report as a .pdf file Parameters ---------- hvsr_results : HVSRData object HVSRData object containing the HVSR data report_export_path : path-like object, optional The path to where the report should be exported. If this is None (default), this is written to the home directory. If this is a True, uses the same directory as the input data, but generates a filename. If this is a directory, generates a filename. If filename is specified and the extension does not match the report type, the extension is adjusted. Otherwise, this is the output file or , by default None csv_handling : {'rename', 'append', 'overwrite', 'keep'}, optional If table is the report type, this can prevent overwriting data, by default 'rename'. * "rename" (or "keep"): renames the new file to prevent overwrite, appends a digit to the end of filename * "append": appends the new data to the existing file * "overwrite": overwrites the existing file report_export_format : str or list, optional The format (or a list of formats) to export the report, by default ['pdf']. show_report : bool, optional Whether to show the designated reports that were chosen for export, by default True verbose : bool, optional Whether to print progress and other information to terminal, by default False Returns ------- HVSRData An HVSRData object that is the same as hvsr_results, but with any additionally generated reports. """ if type(report_export_format) is str: report_export_format = [report_export_format] for ref in report_export_format: if report_export_path is None: print('The export_report(report_export_path) parameter was not specified.') print(f'The report will be saved the home directory: {pathlib.Path.home()}') if ref == 'table': ext = '.csv' elif ref == 'plot': ext = '.png' elif ref == 'print': ext = '.txt' elif ref == 'html': ext = '.html' else: ref == 'pdf' ext = '.pdf' sitename = hvsr_results['input_params']['site'] fname = f"{sitename}_REPORT_{hvsr_results['hvsr_id']}_{datetime.date.today()}{ext}" fname = fname.replace(':', '') # Initialize output as file in home directory (if not updated) outFile = pathlib.Path().home().joinpath(fname) if report_export_path is True or report_export_path is None: # Check so we don't write in sample directory if pathlib.Path(hvsr_results['input_params']['input_data']) in sampleFileKeyMap.values(): if pathlib.Path(os.getcwd()) in sampleFileKeyMap.values(): #Just in case current working directory is also sample directory inFile = pathlib.Path.home() #Use the path to user's home if all else fails else: inFile = pathlib.Path(os.getcwd()) else: inFile = pathlib.Path(hvsr_results['input_params']['input_data']) if inFile.is_dir(): outFile = inFile.joinpath(fname) else: outFile = inFile.with_name(fname) else: if report_export_path is False: pass elif pathlib.Path(report_export_path).is_dir(): outFile = pathlib.Path(report_export_path).joinpath(fname) else: outFile = pathlib.Path(report_export_path) if ref == 'table': if not hasattr(hvsr_results, 'Table_Report'): hvsr_results = _generate_table_report(hvsr_results, azimuth=azimuth, show_table_report=show_report, verbose=verbose) reportDF = hvsr_results['Table_Report'] # Check if file already exists, and handle as specified in csv_handling if outFile.exists(): existFile = pd.read_csv(outFile) if csv_handling.lower() == 'append': # Append report to existing report as new row reportDF = pd.concat([existFile, reportDF], ignore_index=True, join='inner') elif csv_handling.lower() == 'overwrite': # Overwrite existing report file pass else: # csv_handling.lower() in ['keep', 'rename', or other]: # Rename new report so as not to modify existing report (default handling) if outFile.stem[-3] == '_' and outFile.stem[-2:].isdigit(): fileDigit = int(outFile.stem[-2:]) + 1 else: fileDigit = 1 fileDigit = str(fileDigit).zfill(2) outFile = outFile.with_stem(outFile.stem + '_' + fileDigit) # Export to csv using pandas to_csv method try: print(f'\nSaving table report to: {outFile}') reportDF.to_csv(outFile, index_label='ID') except: warnings.warn("Table report not exported. \n\tDataframe to be exported as csv has been saved in hvsr_results['BestPeak']['Report']['Table_Report]", category=RuntimeWarning) if show_report or verbose: print('\nTable Report:\n') maxColWidth = 13 print(' ', end='') for col in reportDF.columns: if len(str(col)) > maxColWidth: colStr = str(col)[:maxColWidth-3]+'...' else: colStr = str(col) print(colStr.ljust(maxColWidth), end=' ') print() #new line for c in range(len(reportDF.columns) * (maxColWidth+2)): if c % (maxColWidth+2) == 0: print('|', end='') else: print('-', end='') print('|') #new line print(' ', end='') #Small indent at start for row in reportDF.iterrows(): for col in row[1]: if len(str(col)) > maxColWidth: colStr = str(col)[:maxColWidth-3]+'...' else: colStr = str(col) print(colStr.ljust(maxColWidth), end=' ') print() elif ref == 'plot': if not hasattr(hvsr_results, 'Plot_Report'): fig = plot_hvsr(hvsr_results, return_fig=True) hvsr_results['BestPeak'][azimuth]['Report']['Plot_Report'] = hvsr_results['Plot_Report'] = fig if verbose: print(f'\nSaving plot to: {outFile}') plt.scf = fig plt.savefig(outFile) elif ref == 'print': if not hasattr(hvsr_results, "Print_Report") or hvsr_results['Print_Report'] is None: hvsr_results = _generate_print_report(hvsr_results, azimuth=azimuth, show_print_report=show_report, verbose=verbose) with open(outFile, 'w') as outF: outF.write(hvsr_results['Print_Report']) # Could write more details in the future if show_report or verbose: print(hvsr_results['Print_Report']) elif ref == "html": if not hasattr(hvsr_results, "HTML_Report") or hvsr_results['HTML_Report'] is None: hvsr_results = _generate_html_report(hvsr_results) with open(outFile, 'w') as outF: outF.write(hvsr_results['HTML_Report']) elif ref == "pdf": hvsr_results = _generate_pdf_report(hvsr_results, pdf_report_filepath=report_export_path, show_pdf_report=show_report, verbose=verbose) return hvsr_resultsFunction to export reports to disk. Exportable formats for report_export_format include: * 'table': saves a pandas DataFrame as a csv) * 'plot': saves the matplotlib or plotly plot figure (depending on what is designated via plot_engine) as an image (png by default) * 'print': saves the print report as a .txt file * 'html': saves the html report as a .html file * 'pdf': saves the pdf report as a .pdf file
Parameters
hvsr_results:HVSRData object- HVSRData object containing the HVSR data
report_export_path:path-like object, optional- The path to where the report should be exported. If this is None (default), this is written to the home directory. If this is a True, uses the same directory as the input data, but generates a filename. If this is a directory, generates a filename. If filename is specified and the extension does not match the report type, the extension is adjusted. Otherwise, this is the output file or , by default None
csv_handling:{'rename', 'append', 'overwrite', 'keep'}, optional- If table is the report type, this can prevent overwriting data, by default 'rename'. * "rename" (or "keep"): renames the new file to prevent overwrite, appends a digit to the end of filename * "append": appends the new data to the existing file * "overwrite": overwrites the existing file
report_export_format:strorlist, optional- The format (or a list of formats) to export the report, by default ['pdf'].
show_report:bool, optional- Whether to show the designated reports that were chosen for export, by default True
verbose:bool, optional- Whether to print progress and other information to terminal, by default False
Returns
HVSRData- An HVSRData object that is the same as hvsr_results, but with any additionally generated reports.
def export_settings(hvsr_data,
export_settings_path='default',
export_settings_type='all',
include_location=False,
verbose=True)-
Expand source code
def export_settings(hvsr_data, export_settings_path='default', export_settings_type='all', include_location=False, verbose=True): """Save processing settings to json file. Parameters ---------- export_settings_path : str, default="default" Where to save the json file(s) containing the settings, by default 'default'. If "default," will save to sprit package resources. Otherwise, set a filepath location you would like for it to be saved to. If 'all' is selected, a directory should be supplied. Otherwise, it will save in the directory of the provided file, if it exists. Otherwise, defaults to the home directory. export_settings_type : str, {'all', 'instrument', 'processing'} What kind of settings to save. If 'all', saves all possible types in their respective json files. If 'instrument', save the instrument settings to their respective file. If 'processing', saves the processing settings to their respective file. By default 'all' include_location : bool, default=False, input CRS Whether to include the location parametersin the exported settings document.This includes xcoord, ycoord, elevation, elev_unit, and input_crs verbose : bool, default=True Whether to print outputs and information to the terminal """ fnameDict = {} fnameDict['instrument'] = "instrument_settings.json" fnameDict['processing'] = "processing_settings.json" if export_settings_path == 'default' or export_settings_path is True: settingsPath = RESOURCE_DIR.joinpath('settings') else: export_settings_path = pathlib.Path(export_settings_path) if not export_settings_path.exists(): if not export_settings_path.parent.exists(): print(f'The provided value for export_settings_path ({export_settings_path}) does not exist. Saving settings to the home directory: {pathlib.Path.home()}') settingsPath = pathlib.Path.home() else: settingsPath = export_settings_path.parent if export_settings_path.is_dir(): settingsPath = export_settings_path elif export_settings_path.is_file(): settingsPath = export_settings_path.parent fnameDict['instrument'] = export_settings_path.name+"_instrumentSettings.json" fnameDict['processing'] = export_settings_path.name+"_processingSettings.json" #Get final filepaths instSetFPath = settingsPath.joinpath(fnameDict['instrument']) procSetFPath = settingsPath.joinpath(fnameDict['processing']) #Get settings values instKeys = ["instrument", "net", "sta", "loc", "cha", "depth", "metadata", "hvsr_band"] inst_location_keys = ['xcoord', 'ycoord', 'elevation', 'elev_unit', 'input_crs'] procFuncs = [fetch_data, remove_noise, generate_psds, process_hvsr, check_peaks, get_report] instrument_settings_dict = {} processing_settings_dict = {} for k in instKeys: if isinstance(hvsr_data[k], pathlib.PurePath): #For those that are paths and cannot be serialized instrument_settings_dict[k] = hvsr_data[k].as_posix() else: instrument_settings_dict[k] = hvsr_data[k] if include_location: for k in inst_location_keys: if isinstance(hvsr_data[k], pathlib.PurePath): #For those that are paths and cannot be serialized instrument_settings_dict[k] = hvsr_data[k].as_posix() else: instrument_settings_dict[k] = hvsr_data[k] for func in procFuncs: funcName = func.__name__ processing_settings_dict[funcName] = {} for arg in hvsr_data['processing_parameters'][funcName]: if isinstance(hvsr_data['processing_parameters'][funcName][arg], (HVSRBatch, HVSRData)): pass else: processing_settings_dict[funcName][arg] = hvsr_data['processing_parameters'][funcName][arg] if verbose: print("Exporting Settings") #Save settings files if export_settings_type.lower()=='instrument' or export_settings_type.lower()=='all': try: with open(instSetFPath.with_suffix('.inst').as_posix(), 'w') as instSetF: jsonString = json.dumps(instrument_settings_dict, indent=2) #Format output for readability jsonString = jsonString.replace('\n ', ' ') jsonString = jsonString.replace('[ ', '[') jsonString = jsonString.replace('\n ]', ']') #Export instSetF.write(jsonString) except: instSetFPath = pathlib.Path.home().joinpath(instSetFPath.name) with open(instSetFPath.with_suffix('.inst').as_posix(), 'w') as instSetF: jsonString = json.dumps(instrument_settings_dict, indent=2) #Format output for readability jsonString = jsonString.replace('\n ', ' ') jsonString = jsonString.replace('[ ', '[') jsonString = jsonString.replace('\n ]', ']') #Export instSetF.write(jsonString) if verbose: print(f"Instrument settings exported to {instSetFPath}") print(f"{jsonString}") print() if export_settings_type.lower()=='processing' or export_settings_type.lower()=='all': try: with open(procSetFPath.with_suffix('.proc').as_posix(), 'w') as procSetF: jsonString = json.dumps(processing_settings_dict, indent=2) #Format output for readability jsonString = jsonString.replace('\n ', ' ') jsonString = jsonString.replace('[ ', '[') jsonString = jsonString.replace('\n ]', ']') jsonString = jsonString.replace('\n },','\n\t\t},\n') jsonString = jsonString.replace('{ "', '\n\t\t{\n\t\t"') jsonString = jsonString.replace(', "', ',\n\t\t"') jsonString = jsonString.replace('\n }', '\n\t\t}') jsonString = jsonString.replace(': {', ':\n\t\t\t{') #Export procSetF.write(jsonString) except: procSetFPath = pathlib.Path.home().joinpath(procSetFPath.name) with open(procSetFPath.with_suffix('.proc').as_posix(), 'w') as procSetF: jsonString = json.dumps(processing_settings_dict, indent=2) #Format output for readability jsonString = jsonString.replace('\n ', ' ') jsonString = jsonString.replace('[ ', '[') jsonString = jsonString.replace('\n ]', ']') jsonString = jsonString.replace('\n },','\n\t\t},\n') jsonString = jsonString.replace('{ "', '\n\t\t{\n\t\t"') jsonString = jsonString.replace(', "', ',\n\t\t"') jsonString = jsonString.replace('\n }', '\n\t\t}') jsonString = jsonString.replace(': {', ':\n\t\t\t{') #Export procSetF.write(jsonString) if verbose: print(f"Processing settings exported to {procSetFPath}") print(f"{jsonString}") print()Save processing settings to json file.
Parameters
export_settings_path:str, default="default"- Where to save the json file(s) containing the settings, by default 'default'. If "default," will save to sprit package resources. Otherwise, set a filepath location you would like for it to be saved to. If 'all' is selected, a directory should be supplied. Otherwise, it will save in the directory of the provided file, if it exists. Otherwise, defaults to the home directory.
export_settings_type:str, {'all', 'instrument', 'processing'}- What kind of settings to save. If 'all', saves all possible types in their respective json files. If 'instrument', save the instrument settings to their respective file. If 'processing', saves the processing settings to their respective file. By default 'all'
include_location:bool, default=False, input CRS- Whether to include the location parametersin the exported settings document.This includes xcoord, ycoord, elevation, elev_unit, and input_crs
verbose:bool, default=True- Whether to print outputs and information to the terminal
def fetch_data(params,
source='file',
data_export_path=None,
data_export_format='mseed',
detrend='spline',
detrend_options=2,
filter_type=None,
filter_options={},
update_metadata=True,
plot_input_stream=False,
plot_engine='matplotlib',
show_plot=True,
verbose=False,
**kwargs)-
Expand source code
def fetch_data(params, source='file', data_export_path=None, data_export_format='mseed', detrend='spline', detrend_options=2, filter_type=None, filter_options={}, update_metadata=True, plot_input_stream=False, plot_engine='matplotlib', show_plot=True, verbose=False, **kwargs): """Fetch ambient seismic data from a source to read into obspy stream. Parameters ---------- params : dict Dictionary containing all the necessary params to get data. Parameters defined using input_params() function. source : str, {'raw', 'dir', 'file', 'batch'} String indicating where/how data file was created. For example, if raw data, will need to find correct channels. 'raw' finds raspberry shake data, from raw output copied using scp directly from Raspberry Shake, either in folder or subfolders; 'dir' is used if the day's 3 component files (currently Raspberry Shake supported only) are all 3 contained in a directory by themselves. 'file' is used if the params['input_data'] specified in input_params() is the direct filepath to a single file to be read directly into an obspy stream. 'batch' is used to read a list or specified set of seismic files. Most commonly, a csv file can be read in with all the parameters. Each row in the csv is a separate file. Columns can be arranged by parameter. data_export_path : None or str or pathlib obj, default=None If None (or False), data is not trimmed in this function. Otherwise, this is the directory to save trimmed and exported data. data_export_format: str='mseed' If data_export_path is not None, this is the format in which to save the data detrend : str or bool, default='spline' If False, data is not detrended. Otherwise, this should be a string accepted by the type parameter of the obspy.core.trace.Trace.detrend method: https://docs.obspy.org/packages/autogen/obspy.core.trace.Trace.detrend.html detrend_options : int, default=2 If detrend parameter is 'spline' or 'polynomial', this is passed directly to the order parameter of obspy.core.trace.Trace.detrend method. filter_type : None, str Type of filter to use on raw data. This should either be None or any of {'bandpass', 'bandstop', 'lowpass', 'highpass', 'lowpass_cheby_2', 'lowpass_fir', 'remez_fir'}. This passes `filter_type` to the `type` parameter and `**filter_options` to the `**options` parameter of the obspy.Stream filter() method. See here for more information: https://docs.obspy.org/packages/autogen/obspy.core.stream.Stream.filter.html If None, no filtering is done on the input seismic data. filter_options : dict Dictionary that will be unpacked into the `**options` parameter of the filter() method of the obspy.Stream class. This should fit the parameters of whichever filter type is specifed by filter_type. Example options for the 'bandpass' filter_type might be: `filter_options={'freqmin': 0.1, 'freqmax':50, 'df':100, 'corners':4, 'zerophase':True}`. See here for more information: https://docs.obspy.org/packages/autogen/obspy.core.stream.Stream.filter.html update_metadata : bool, default=True Whether to update the metadata file, used primarily with Raspberry Shake data which uses a generic inventory file. plot_input_stream : bool, default=False Whether to plot the raw input stream. This plot includes a spectrogram (Z component) and the raw (with decimation for speed) plots of each component signal. plot_engine : str, default='matplotlib' Which plotting library/engine to use for plotting the Input stream. Options are 'matplotlib', 'plotly', or 'obspy' (not case sensitive). verbose : bool, default=False Whether to print outputs and inputs to the terminal **kwargs Keywords arguments, primarily for 'batch' and 'dir' sources Returns ------- params : HVSRData or HVSRBatch object Same as params parameter, but with an additional "stream" attribute with an obspy data stream with 3 traces: Z (vertical), N (North-south), and E (East-west) """ # Get intput paramaters orig_args = locals().copy() start_time = datetime.datetime.now() # Keep track of any updates made to raw input along the way update_msg = [] # Update with processing parameters specified previously in input_params, if applicable if 'processing_parameters' in params.keys(): if 'fetch_data' in params['processing_parameters'].keys(): defaultVDict = dict(zip(inspect.getfullargspec(fetch_data).args[1:], inspect.getfullargspec(fetch_data).defaults)) defaultVDict['kwargs'] = kwargs for k, v in params['processing_parameters']['fetch_data'].items(): # Manual input to function overrides the imported parameter values if k != 'params' and k in orig_args.keys() and orig_args[k]==defaultVDict[k]: update_msg.append(f'\t\t{k} = {v} (previously {orig_args[k]})') orig_args[k] = v # Update local variables, in case of previously-specified parameters source = orig_args['source'].lower() data_export_path = orig_args['data_export_path'] data_export_format = orig_args['data_export_format'] detrend = orig_args['detrend'] detrend_options = orig_args['detrend_options'] filter_type = orig_args['filter_type'] filter_options = orig_args['filter_options'] update_metadata = orig_args['update_metadata'] plot_input_stream = orig_args['plot_input_stream'] plot_engine = orig_args['plot_engine'] verbose = orig_args['verbose'] kwargs = orig_args['kwargs'] # Print inputs for verbose setting if verbose: print('\nFetching data (fetch_data())') for key, value in orig_args.items(): if not isinstance(value, (HVSRData, HVSRBatch)): print('\t {}={}'.format(key, value)) print() if 'processing_parameters' in params.keys() and 'fetch_data' in params['processing_parameters'].keys(): if update_msg != []: update_msg.insert(0, '\tThe following parameters were updated using the processing_parameters attribute:') for msg_line in update_msg: print(msg_line) print() raspShakeInstNameList = ['raspberry shake', 'shake', 'raspberry', 'rs', 'rs3d', 'rasp. shake', 'raspshake'] trominoNameList = ['tromino', 'trom','tromino blue', 'tromino blu', 'tromino 3g', 'tromino 3g+', 'tr', 't'] # Check if data is from tromino, and adjust parameters accordingly if 'trc' in pathlib.Path(str(params['input_data'])).suffix: if verbose and hasattr(params, 'instrument') and params['instrument'].lower() not in trominoNameList: print(f"\t Data from tromino detected. Changing instrument from {params['instrument']} to 'Tromino'") if 'tromino' not in str(params['instrument']).lower(): params['instrument'] = 'Tromino' # Get metadata (inventory/response information) params = get_metadata(params, update_metadata=update_metadata, source=source, verbose=verbose) inv = params['inv'] date = params['acq_date'] # Cleanup for gui input if isinstance(params['input_data'], (obspy.Stream, obspy.Trace)): pass elif '}' in str(params['input_data']): # This is how tkinter gui data comes in params['input_data'] = params['input_data'].as_posix().replace('{', '') params['input_data'] = params['input_data'].split('}') # Make sure input_data is pointing to an actual file if isinstance(params['input_data'], list): for i, d in enumerate(params['input_data']): params['input_data'][i] = sprit_utils._checkifpath(str(d).strip(), sample_list=SAMPLE_LIST) dPath = params['input_data'] elif isinstance(params['input_data'], (obspy.Stream, obspy.Trace)): dPath = pathlib.Path() #params['input_data'] elif isinstance(params['input_data'], HVSRData): dPath = pathlib.Path(params['input_data']['input_data']) if not isinstance(params['input_data']['stream'], (obspy.Stream, obspy.Trace)): try: for k, v in params.items(): if isinstance(v, (obspy.Trace, obspy.Stream)): params['input_data']['stream'] = v elif pathlib.Path(str(v)).exists(): try: params['input_data']['stream'] = obspy.read(v) except Exception as e: pass except: raise RuntimeError(f'The params["input_data"] parameter of fetch_data() was determined to be an HVSRData object, but no data in the "stream" attribute.') else: if verbose: print('\tThe params["input_data"] argument is already an HVSRData obect.') print("\tChecking metadata then moving on.") else: dPath = sprit_utils._checkifpath(params['input_data'], sample_list=SAMPLE_LIST) inst = params['instrument'] # Need to put dates and times in right formats first if type(date) is datetime.datetime: doy = date.timetuple().tm_yday year = date.year elif type(date) is datetime.date: date = datetime.datetime.combine(date, datetime.time(hour=0, minute=0, second=0)) doy = date.timetuple().tm_yday year = date.year elif type(date) is tuple: if date[0]>366: raise ValueError('First item in date tuple must be day of year (0-366)', 0) elif date[1] > datetime.datetime.now().year: raise ValueError('Second item in date tuple should be year, but given item is in the future', 0) else: doy = date[0] year = date[1] elif type(date) is str: if '/' in date: dateSplit = date.split('/') elif '-' in date: dateSplit = date.split('-') else: dateSplit = date if int(dateSplit[0]) > 31: date = datetime.datetime(int(dateSplit[0]), int(dateSplit[1]), int(dateSplit[2])) doy = date.timetuple().tm_yday year = date.year elif int(dateSplit[0])<=12 and int(dateSplit[2]) > 31: warnings.warn("Preferred date format is 'yyyy-mm-dd' or 'yyyy/mm/dd'. Will attempt to parse date.") date = datetime.datetime(int(dateSplit[2]), int(dateSplit[0]), int(dateSplit[1])) doy = date.timetuple().tm_yday year = date.year else: warnings.warn("Preferred date format is 'yyyy-mm-dd' or 'yyyy/mm/dd'. Cannot parse date.") elif type(date) is int: doy = date year = datetime.datetime.today().year else: date = datetime.datetime.now() doy = date.timetuple().tm_yday year = date.year warnings.warn("Did not recognize date, using year {} and day {}".format(year, doy)) # Select which instrument we are reading from (requires different processes for each instrument) # Get any kwargs that are included in obspy.read obspyReadKwargs = {} for argName in inspect.getfullargspec(obspy.read)[0]: if argName in kwargs.keys(): obspyReadKwargs[argName] = kwargs[argName] # Select how reading will be done if isinstance(params['input_data'], obspy.Stream): rawDataIN = params['input_data'].copy() tr = params['input_data'][0] params['input_data'] = '_'.join([tr.id, str(tr.stats.starttime)[:10], str(tr.stats.starttime)[11:19], str(tr.stats.endtime)[11:19]]) elif isinstance(params['input_data'], obspy.Trace): rawDataIN = obspy.Stream(params['input_data']) tr = params['input_data'] params['input_data'] = '_'.join([tr.id, str(tr.stats.starttime)[:10], str(tr.stats.starttime)[11:19], str(tr.stats.endtime)[11:19]]) elif isinstance(params['input_data'], HVSRData): rawDataIN = params['input_data']['stream'] else: if source == 'raw': try: if inst.lower() in trominoNameList: params['instrument'] = 'Tromino' params['params']['instrument'] = 'Tromino' trominoKwargs = {k: v for k, v in kwargs.items() if k in tuple(inspect.signature(read_tromino_files).parameters.keys())} paramDict = {k: v for k, v in params.items()} trominoKwargs.update(paramDict) rawDataIN = read_tromino_files(params, verbose=verbose, **trominoKwargs) if 'site' in rawDataIN[0].stats: if hasattr(params, 'site'): params['site'] = rawDataIN[0].stats.site if hasattr(params, input_params): params['input_params']['site'] = rawDataIN[0].stats.site else: if inst.lower() not in raspShakeInstNameList: print(f"Unrecognized value instrument={inst}. Defaulting to raw raspberry shake data.") rawDataIN = __read_RS_file_struct(dPath, source, year, doy, inv, params, verbose=verbose) except Exception as e: raise RuntimeError(f"Data not fetched for {params['site']}. Check input parameters or the data file.\n\n{e}") elif source == 'stream' or isinstance(params, (obspy.Stream, obspy.Trace)): rawDataIN = params['input_data'].copy() elif source == 'dir': if inst.lower() in raspShakeInstNameList: rawDataIN = __read_RS_file_struct(dPath, source, year, doy, inv, params, verbose=verbose) else: obspyFiles = {} for obForm in OBSPY_FORMATS: temp_file_glob = pathlib.Path(dPath.as_posix().lower()).glob('.'+obForm.lower()) for f in temp_file_glob: currParams = params currParams['input_data'] = f curr_data = fetch_data(params, source='file', #all the same as input, except just reading the one file using the source='file' data_export_path=data_export_path, data_export_format=data_export_format, detrend=detrend, detrend_options=detrend_options, update_metadata=update_metadata, verbose=verbose, **kwargs) curr_data.merge() obspyFiles[f.stem] = curr_data #Add path object to dict, with filepath's stem as the site name return HVSRBatch(obspyFiles) elif source == 'file' and str(params['input_data']).lower() not in SAMPLE_LIST: # Read the file specified by input_data # Automatically read tromino data if str(inst).lower() in trominoNameList or 'tromino' in str(inst).lower() or 'trc' in dPath.suffix: params['instrument'] = 'Tromino' params['params']['instrument'] = 'Tromino' if 'blu' in str(inst).lower(): params['instrument'] = 'Tromino Blue' params['params']['instrument'] = 'Tromino Blue' try: trominoKwargs = {k: v for k, v in kwargs.items() if k in tuple(inspect.signature(read_tromino_files).parameters.keys())} paramDict = {k: v for k, v in params.items()} if 'input_data' in trominoKwargs: del trominoKwargs['input_data'] if 'tromino_model' not in trominoKwargs: trominoKwargs['tromino_model'] = params['instrument'] rawDataIN = read_tromino_files(input_data=params, verbose=verbose, **trominoKwargs) if 'site' in rawDataIN[0].stats and params['site'] == 'HVSRSite': if hasattr(params, 'site'): params['site'] = rawDataIN[0].stats.site if hasattr(params, 'params'): params['params']['site'] = rawDataIN[0].stats.site params['acq_date'] = rawDataIN[0].stats.starttime.date params['starttime'] = rawDataIN[0].stats.starttime params['endtime'] = rawDataIN[0].stats.endtime except Exception: try: rawDataIN = obspy.read(dPath) except Exception: raise ValueError(f"{dPath.suffix} is not a a filetype that can be read by SpRIT (via ObsPy)") else: if isinstance(dPath, list) or isinstance(dPath, tuple): rawStreams = [] for datafile in dPath: rawStream = obspy.read(datafile, **obspyReadKwargs) rawStreams.append(rawStream) #These are actually streams, not traces for i, stream in enumerate(rawStreams): if i == 0: rawDataIN = obspy.Stream(stream) #Just in case else: rawDataIN = rawDataIN + stream #This adds a stream/trace to the current stream object elif str(dPath)[:6].lower() == 'sample': pass else: rawDataIN = obspy.read(dPath, **obspyReadKwargs)#, starttime=obspy.core.UTCDateTime(params['starttime']), endttime=obspy.core.UTCDateTime(params['endtime']), nearest_sample =True) elif source == 'batch' and str(params['input_data']).lower() not in SAMPLE_LIST: if verbose: print('\nFetching data (fetch_data())') batch_data_read_kwargs = {k: v for k, v in kwargs.items() if k in tuple(inspect.signature(batch_data_read).parameters.keys())} params = batch_data_read(batch_data=params['input_data'], verbose=verbose, **batch_data_read_kwargs) params = HVSRBatch(params, df_as_read=params.input_df) return params elif str(params['input_data']).lower() in SAMPLE_LIST or f"sample{params['input_data'].lower()}" in SAMPLE_LIST: if source=='batch': params['input_data'] = SAMPLE_DATA_DIR.joinpath('Batch_SampleData.csv') params = batch_data_read(batch_data=params['input_data'], batch_type='sample', verbose=verbose) params = HVSRBatch(params, df_as_read=params.input_df) return params elif source=='dir': params['input_data'] = SAMPLE_DATA_DIR.joinpath('Batch_SampleData.csv') params = batch_data_read(batch_data=params['input_data'], batch_type='sample', verbose=verbose) params = HVSRBatch(params, df_as_read=params.input_df) return params elif source=='file': params['input_data'] = str(params['input_data']).lower() if params['input_data'].lower() in sampleFileKeyMap.keys(): if params['input_data'].lower() == 'sample': params['input_data'] = sampleFileKeyMap params['input_data'] = sampleFileKeyMap[params['input_data'].lower()] else: params['input_data'] = SAMPLE_DATA_DIR.joinpath('SampleHVSRSite1_AM.RAC84.00.2023.046_2023-02-15_1704-1734.MSEED') dPath = params['input_data'] rawDataIN = obspy.read(dPath)#, starttime=obspy.core.UTCDateTime(params['starttime']), endttime=obspy.core.UTCDateTime(params['endtime']), nearest_sample =True) #import warnings #with warnings.catch_warnings(): # warnings.simplefilter(action='ignore', category=UserWarning) # rawDataIN.attach_response(inv) else: # Last try if source cannot be read correctly try: rawDataIN = obspy.read(dPath) except: RuntimeError(f'source={source} not recognized, and input_data cannot be read using obspy.read()') if verbose: print('\t Data as read in initially:') print(f'\t {len(rawDataIN)} trace(s) in Stream:') for i, trace in enumerate(rawDataIN): if i == 0: prevComponent = trace.stats.component print(f'\t\t{prevComponent} Component') currComponent = trace.stats.component if prevComponent != currComponent: print(f"\t\t{currComponent} Component") print("\t\t ", trace) prevComponent = trace.stats.component print() # Get metadata from the data itself, if not reading raw data try: # If the data already exists (not reading in raw from RS, for example), get the parameters from the data dataIN = rawDataIN.copy() if source != 'raw': # Use metadata from file for updating: # site site_default = inspect.signature(input_params).parameters['site'].default updateMsg = [] if params['site'] == site_default and params['site'] != dPath.stem: if isinstance(dPath, (list, tuple)): dPath = dPath[0] params['site'] = dPath.stem params['params']['site'] = dPath.stem if verbose: updateMsg.append(f"\tSite name updated to {params['site']}") # network net_default = inspect.signature(input_params).parameters['network'].default if params['net'] == net_default and net_default != dataIN[0].stats.network: params['net'] = dataIN[0].stats.network params['params']['net'] = dataIN[0].stats.network if verbose: updateMsg.append(f"\tNetwork name updated to {params['net']}") # station sta_default = inspect.signature(input_params).parameters['station'].default if str(params['sta']) == sta_default and str(params['sta']) != dataIN[0].stats.station: params['sta'] = dataIN[0].stats.station params['station'] = dataIN[0].stats.station params['params']['sta'] = dataIN[0].stats.station params['params']['station'] = dataIN[0].stats.station if verbose: updateMsg.append(f"\tStation name updated to {params['sta']}") # location loc_default = inspect.signature(input_params).parameters['location'].default if params['location'] == loc_default and params['location'] != dataIN[0].stats.location: params['location'] = dataIN[0].stats.location params['params']['location'] = dataIN[0].stats.location if verbose: updateMsg.append(f"\tLocation updated to {params['location']}") # channels channelList = [] cha_default = inspect.signature(input_params).parameters['channels'].default if str(params['cha']) == cha_default: for tr in dataIN: if tr.stats.channel not in channelList: channelList.append(tr.stats.channel) channelList.sort(reverse=True) #Just so z is first, just in case if set(params['cha']) != set(channelList): params['cha'] = channelList params['params']['cha'] = channelList if verbose: updateMsg.append(f"\tChannels updated to {params['cha']}") # Acquisition date # acqdate_default = inspect.signature(input_params).parameters['acq_date'].default acqdate_default = str(NOWTIME.date()) # If input date is default date and does not match date in the data, update to match data if str(params['acq_date']) == acqdate_default and params['acq_date'] != dataIN[0].stats.starttime.date: params['acq_date'] = params['params']['acq_date'] = dataIN[0].stats.starttime.date if verbose: updateMsg.append(f"\tAcquisition Date updated to {params['acq_date']}") elif params['acq_date'] != dataIN[0].stats.starttime.date: # If date has been input manually and does not match data date, update the data newStartDate = sprit_utils._format_time(params['acq_date']) params['acq_date'] = params['params']['acq_date'] = newStartDate.date() for tr in dataIN.merge(): tr.stats.starttime = obspy.UTCDateTime(newStartDate.year, newStartDate.month, newStartDate.day, tr.stats.starttime.hour, tr.stats.starttime.minute, tr.stats.starttime.second, tr.stats.starttime.microsecond) # starttime today_Starttime = obspy.UTCDateTime(datetime.datetime(year=datetime.date.today().year, month=datetime.date.today().month, day=datetime.date.today().day, hour=0, minute=0, second=0, microsecond=0)) maxStarttime = datetime.datetime(year=params['acq_date'].year, month=params['acq_date'].month, day=params['acq_date'].day, hour=0, minute=0, second=0, microsecond=0, tzinfo=datetime.timezone.utc) stime_default = obspy.UTCDateTime(NOWTIME) sTimeIsDefault = params['starttime'] == stime_default # Check if stime is not the same as the data starttime (if it is, leave it alone!) if params['starttime'] != dataIN.merge()[0].stats.starttime: # Check if stime in params is the default value if sTimeIsDefault: # We will update the params starttime to match the data if it is the default input # Ensure we are getting the largest starttime from the data traces (assumes they all start at the same time, but may be slightly off) for tr in dataIN.merge(): currTime = datetime.datetime(year=tr.stats.starttime.year, month=tr.stats.starttime.month, day=tr.stats.starttime.day, hour=tr.stats.starttime.hour, minute=tr.stats.starttime.minute, second=tr.stats.starttime.second, microsecond=tr.stats.starttime.microsecond, tzinfo=datetime.timezone.utc) if currTime > maxStarttime: maxStarttime = currTime # Calculate new start time based data dataDate = dataIN.merge()[0].stats.starttime.date newStarttime = obspy.UTCDateTime(year=dataDate.year, month=dataDate.month,day=dataDate.day, hour=maxStarttime.hour, minute=maxStarttime.minute, second=maxStarttime.second, microsecond=maxStarttime.microsecond) # Update parameters to match new starttime (this will be trimmed later if maxStarttime is different than trace starttimes) params['starttime'] = newStarttime params['params']['starttime'] = newStarttime if verbose: updateMsg.append(f"\tStarttime updated to {params['starttime']}") else: # If we manually set a starttime in order to trim or otherwise update the data # For trimming data (starttime within data time bounds) sTimeInDataTime = params['starttime'] > dataIN.merge()[0].stats.starttime and params['starttime'] < dataIN.merge()[-1].stats.endtime if sTimeInDataTime: # Don't update anything, will use for trimming later pass if verbose: updateMsg.append(f"\tStart of data will be trimmed to {params['starttime']}") else: # If params['starttime'] is not in data time bounds, assume that the dataset timing should be updated minStartTime = dataIN.merge()[-1].stats.starttime for tr in dataIN.merge(): # Calculate offset from current trace to starttime (in case traces aren't fully aligned) if tr.stats.starttime < minStartTime: minStartTime = tr.stats.starttime # Calculate the offset between the earliest trace starttime and specified starttime timeOffset = minStartTime - params['starttime'] # Update the startime for each trace based on offset for tr in dataIN.merge(): tr.stats.starttime = tr.stats.starttime - timeOffset if verbose: updateMsg.append(f"\tStarttime updated to {params['starttime']}") # endttime # Endtime only matters if it is used to trim the data eTimeDefault = obspy.UTCDateTime(NOWTIME.year, NOWTIME.month, NOWTIME.day, 23, 59, 59, 999999) eTimeIsDefault = params['endtime'] == eTimeDefault minEndTime = dataIN.merge()[-1].stats.endtime for i, tr in enumerate(dataIN.merge()): if tr.stats.endtime < minEndTime: minEndTime = tr.stats.endtime # Check if etime is anything other than default if not eTimeIsDefault: # If endtime is not default, change params['endtime'] to match data endtime unless it falls within the data time (in which case, will be used to trim later) eTimeInDataTimeBounds = (params['endtime'] > dataIN.merge()[0].stats.starttime) and (params['endtime'] < minEndTime) if not eTimeInDataTimeBounds: params['endtime'] = params['params']['endtime'] = minEndTime else: params['endtime'] = params['params']['endtime'] = minEndTime # HVSR_ID (derived) project = params['project'] if project is None: proj_id = '' else: proj_id = str(project)+'-' # Update HVSR_ID with new information params['hvsr_id'] = f"{proj_id}{params['acq_date'].strftime('%Y%m%d')}-{params['starttime'].strftime('%H%M')}-{params['station']}" params['params']['hvsr_id'] = f"{proj_id}{params['acq_date'].strftime('%Y%m%d')}-{params['starttime'].strftime('%H%M')}-{params['station']}" if verbose and len(updateMsg) > 0: updateMsg.insert(0, 'The following parameters have been updated directly from the data:') for msgLine in updateMsg: print('\t', msgLine) print() # Clean up dataIN = dataIN.split() dataIN = dataIN.trim(starttime=params['starttime'], endtime=params['endtime']) dataIN.merge() except Exception as e: raise RuntimeError(f'Data as read by obspy does not contain the proper metadata. \n{e}.\nCheck your input parameters or the data file.') # Latitude, Longitude, Elevation # Maybe make this more comprehensive, like for all input_params if hasattr(dataIN[0].stats, 'latitude'): params['latitude'] = params['params']['latitude'] = dataIN[0].stats['latitude'] if hasattr(dataIN[0].stats, 'longitude'): params['longitude'] = params['params']['longitude'] = dataIN[0].stats['longitude'] if hasattr(dataIN[0].stats, 'elevation'): params['elevation'] = params['params']['elevation'] = dataIN[0].stats['elevation'] if hasattr(dataIN[0].stats, 'elev_unit'): params['elev_unit'] = params['params']['elev_unit'] = dataIN[0].stats['elev_unit'] if hasattr(dataIN[0].stats, 'input_crs'): params['input_crs'] = params['params']['input_crs'] = dataIN[0].stats['input_crs'] # Get and update metadata after updating data from source params = get_metadata(params, update_metadata=update_metadata, source=source) inv = params['inv'] # Trim and save data as specified if data_export_path == 'None': data_export_path = None if not data_export_path: pass else: if isinstance(params, HVSRBatch): pass else: dataIN = _trim_data(input=params, stream=dataIN, export_dir=data_export_path, source=source, data_export_format=data_export_format) # Split data if masked array (if there are gaps)...detrending cannot be done without for tr in dataIN: if isinstance(tr.data, np.ma.masked_array): dataIN = dataIN.split() #Splits entire stream if any trace is masked_array break # Detrend data if isinstance(params, HVSRBatch): pass else: dataIN = __detrend_data(input=dataIN, detrend=detrend, detrend_options=detrend_options, verbose=verbose, source=source) # Filter data if isinstance(params, HVSRBatch): pass elif filter_type is None: pass else: dataIN.filter_type(type=filter_type, **filter_options) # Remerge data dataIN = dataIN.merge(method=1) # Plot the input stream? if plot_input_stream: if plot_engine.lower() in ['plotly', 'plty', 'p']: if 'spectrogram_component' in kwargs.keys(): specComp = kwargs['spectrogram_component'] else: specComp = 'Z' params['Input_Plot'] = sprit_plot.plot_input_stream(hv_data=params, stream=dataIN, spectrogram_component=specComp, show_plot=show_plot, return_fig=True) elif plot_engine.lower() in ['obspy', 'ospby', 'osbpy', 'opsby', 'opspy', 'o']: params['Input_Plot'] = dataIN.plot(method='full', linewidth=0.25, handle=True, show=False) if show_plot: plt.show() else: plt.close() else: try: params['Input_Plot'] = sprit_plot._plot_input_stream_mpl(stream=dataIN, hv_data=params, component='Z', stack_type='linear', detrend='mean', dbscale=True, fill_gaps=None, ylimstd=3, return_fig=True, fig=None, ax=None, show_plot=False) if show_plot: plt.show() else: plt.close() except Exception as e: print(f'Error with default plotting method: {e}.\n Falling back to internal obspy plotting method') params['Input_Plot'] = dataIN.plot(method='full', linewidth=0.25, handle=True, show=False) if show_plot: plt.show() else: plt.close() else: params['Input_Plot'] = None # Sort channels (make sure Z is first, makes things easier later) if isinstance(params, HVSRBatch): pass else: dataIN = _sort_channels(input=dataIN, source=source, verbose=verbose) # Clean up the ends of the data unless explicitly specified to do otherwise (this is a kwarg, not a parameter) if 'clean_ends' not in kwargs.keys(): clean_ends = True else: clean_ends = kwargs['clean_ends'] if clean_ends: maxStarttime = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(days=36500) # 100 years ago minEndtime = datetime.datetime.now(datetime.timezone.utc) + datetime.timedelta(days=36500) # 100 years from now for tr in dataIN: currStarttime = datetime.datetime(year=tr.stats.starttime.year, month=tr.stats.starttime.month, day=tr.stats.starttime.day, hour=tr.stats.starttime.hour, minute=tr.stats.starttime.minute, second=tr.stats.starttime.second, microsecond=tr.stats.starttime.microsecond, tzinfo=datetime.timezone.utc) if currStarttime > maxStarttime: maxStarttime = currStarttime currEndtime = datetime.datetime(year=tr.stats.endtime.year, month=tr.stats.endtime.month, day=tr.stats.endtime.day, hour=tr.stats.endtime.hour, minute=tr.stats.endtime.minute, second=tr.stats.endtime.second, microsecond=tr.stats.endtime.microsecond, tzinfo=datetime.timezone.utc) if currEndtime < minEndtime: minEndtime = currEndtime maxStarttime = obspy.UTCDateTime(maxStarttime) minEndtime = obspy.UTCDateTime(minEndtime) dataIN = dataIN.split() for tr in dataIN: tr.trim(starttime=maxStarttime, endtime=minEndtime) pass dataIN.merge() params['batch'] = False # Set False by default, will get corrected later if batch params['input_stream'] = dataIN.copy() # Original stream as read params['stream'] = dataIN.copy() # Stream that may be modified later if 'processing_parameters' not in params.keys(): params['processing_parameters'] = {} params['processing_parameters']['fetch_data'] = {} exclude_params_list = ['params'] for key, value in orig_args.items(): if key not in exclude_params_list: params['processing_parameters']['fetch_data'][key] = value # Attach response data to stream and get paz (for PPSD later) # Check if response can be attached try: responseMatch = {} for trace in params['stream']: k = trace.stats.component # Check if station, channel, location, and timing match responseMatch[k] = False # Default to false until proven otherwise for sta in params['inv'].networks[0].stations: # Assumes only one network per inst hasCha = False # all default to false until proven otherwise hasLoc = False hasSta = False isStarted = False notEnded = False # Check station if sta.code == params['stream'][0].stats.station: hasSta = True else: continue # Check Channel for cha in sta: if cha.code == trace.stats.channel: hasCha = True # Check location if cha.location_code == trace.stats.location: hasLoc = True # Check time if (cha.start_date is None or cha.start_date <= tr.stats.starttime): isStarted = True if (cha.end_date is None or cha.end_date >= tr.stats.endtime): notEnded = True if all([hasSta, hasCha, hasLoc, isStarted, notEnded]): responseMatch[k] = True if responseMatch[k] is not True: responseMatch[k] = {'Station': (hasSta, [sta.code for sta in params['inv'].networks[0].stations]), 'Channel': (hasCha, [cha.code for cha in sta for sta in params['inv'].networks[0].stations]), 'Location': (hasLoc, [cha.location_code for cha in sta for sta in params['inv'].networks[0].stations]), 'Starttime':(isStarted, [cha.start_date for cha in sta for sta in params['inv'].networks[0].stations]), 'Endtime': (notEnded, [cha.end_date for cha in sta for sta in params['inv'].networks[0].stations])} metadataMatchError = False for comp, matchItems in responseMatch.items(): if matchItems is not True: metadataMatchError = True errorMsg = 'The following items in your data need to be matched in the instrument response/metadata:' for matchType, match in matchItems.items(): if match[0] is False: errorMsg = errorMsg + f"\n\t{matchType} does not match {match[1]} correctly for component {comp}: {params['stream'].select(component=comp)[0].stats[matchType.lower()]}" if metadataMatchError: if verbose: print(errorMsg) raise ValueError('Instrument Response/Metadata does not match input data and cannot be used!!\n'+errorMsg) else: params['stream'].attach_response(params['inv']) for tr in params['stream']: cmpnt = tr.stats.component params['paz'][cmpnt]['poles'] = tr.stats.response.get_paz().poles params['paz'][cmpnt]['zeros'] = tr.stats.response.get_paz().zeros params['paz'][cmpnt]['sensitivity'] = tr.stats.response.get_paz().stage_gain params['paz'][cmpnt]['gain'] = tr.stats.response.get_paz().normalization_factor except Exception as e: if 'obspy_ppsds' in kwargs and kwargs['obspy_ppsds']: errMsg = "Metadata missing, incomplete, or incorrect. Instrument response cannot be removed." errMsg += "if metadata cannot be matched, use obspy_ppsds=False to perform analysis on raw data (without instrument response removed)" raise ValueError(errMsg) else: if verbose: print("\tMetadata/instrument response does not match data.") print("\t Raw data (without the instrument response removed) will be used for processing.") params['processing_status']['fetch_data_status'] = True if verbose and not isinstance(params, HVSRBatch): print('\n') dataINStr = dataIN.__str__().split('\n') for line in dataINStr: print('\t\t', line) params = sprit_utils._check_processing_status(params, start_time=start_time, func_name=inspect.stack()[0][3], verbose=verbose) return paramsFetch ambient seismic data from a source to read into obspy stream.
Parameters
- params : dict
- Dictionary containing all the necessary params to get data.
- Parameters defined using input_params() function.
- source : str,
- String indicating where/how data file was created. For example, if raw data, will need to find correct channels.
- 'raw' finds raspberry shake data, from raw output copied using scp directly from Raspberry Shake, either in folder or subfolders;
- 'dir' is used if the day's 3 component files (currently Raspberry Shake supported only) are all 3 contained in a directory by themselves.
- 'file' is used if the params['input_data'] specified in input_params() is the direct filepath to a single file to be read directly into an obspy stream.
- 'batch' is used to read a list or specified set of seismic files.
- Most commonly, a csv file can be read in with all the parameters. Each row in the csv is a separate file. Columns can be arranged by parameter.
data_export_path:Noneorstrorpathlib obj, default=None- If None (or False), data is not trimmed in this function. Otherwise, this is the directory to save trimmed and exported data.
data_export_format:str='mseed'- If data_export_path is not None, this is the format in which to save the data
detrend:strorbool, default='spline'- If False, data is not detrended. Otherwise, this should be a string accepted by the type parameter of the obspy.core.trace.Trace.detrend method: https://docs.obspy.org/packages/autogen/obspy.core.trace.Trace.detrend.html
detrend_options:int, default=2- If detrend parameter is 'spline' or 'polynomial', this is passed directly to the order parameter of obspy.core.trace.Trace.detrend method.
filter_type:None, str- Type of filter to use on raw data.
This should either be None or any of {'bandpass', 'bandstop', 'lowpass', 'highpass', 'lowpass_cheby_2', 'lowpass_fir', 'remez_fir'}.
This passes
filter_typeto thetypeparameter and**filter_optionsto the**optionsparameter of the obspy.Stream filter() method. See here for more information: https://docs.obspy.org/packages/autogen/obspy.core.stream.Stream.filter.html If None, no filtering is done on the input seismic data. filter_options:dict- Dictionary that will be unpacked into the
**optionsparameter of the filter() method of the obspy.Stream class. This should fit the parameters of whichever filter type is specifed by filter_type. Example options for the 'bandpass' filter_type might be:filter_options={'freqmin': 0.1, 'freqmax':50, 'df':100, 'corners':4, 'zerophase':True}. See here for more information: https://docs.obspy.org/packages/autogen/obspy.core.stream.Stream.filter.html update_metadata:bool, default=True- Whether to update the metadata file, used primarily with Raspberry Shake data which uses a generic inventory file.
plot_input_stream:bool, default=False- Whether to plot the raw input stream. This plot includes a spectrogram (Z component) and the raw (with decimation for speed) plots of each component signal.
plot_engine:str, default='matplotlib'- Which plotting library/engine to use for plotting the Input stream. Options are 'matplotlib', 'plotly', or 'obspy' (not case sensitive).
verbose:bool, default=False- Whether to print outputs and inputs to the terminal
**kwargs- Keywords arguments, primarily for 'batch' and 'dir' sources
Returns
def generate_ppsds(hvsr_data, **gen_psds_kwargs)-
Expand source code
def generate_ppsds(hvsr_data, **gen_psds_kwargs): """This function is to maintain backwards compatibility with previous version See Also -------- generate_psds """ warnings.warn("generate_ppsds() is now deprecated, use generate_psds()", DeprecationWarning) hvsrData = generate_psds(hvsr_data, **gen_psds_kwargs) return hvsrData def generate_psds(hvsr_data,
window_length=30.0,
overlap_pct=0.5,
window_type='hann',
window_length_method='length',
remove_response=False,
skip_on_gaps=True,
num_freq_bins=512,
hvsr_band=[0.5, 40],
obspy_ppsds=False,
azimuthal_psds=False,
verbose=False,
plot_psds=False,
**obspy_ppsd_kwargs)-
Expand source code
def generate_psds(hvsr_data, window_length=30.0, overlap_pct=0.5, window_type='hann', window_length_method='length', remove_response=False, skip_on_gaps=True, num_freq_bins=512, hvsr_band=DEFAULT_BAND, obspy_ppsds=False, azimuthal_psds=False, verbose=False, plot_psds=False, **obspy_ppsd_kwargs): """Calculate Power Spectral Density (PSD) curves for each channel. Uses the [scipy.signal.welch()](https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.welch.html) function to generate PSDs by default, or can use Obspy's PPSD class. Info on Obspy PPSD creation here (if obspy_ppsds=True): https://docs.obspy.org/packages/autogen/obspy.signal.spectral_estimation.PPSD.html Parameters ---------- hvsr_data : dict, HVSRData object, or HVSRBatch object Data object containing all the parameters and other data of interest (stream and paz, for example) window_length : float Length of the window, in seconds, to use for each PSD calculation. Defaults to 30.0. overlap_pct : float Percentage (should be 0-1) for overlapping each window used for PSD calculation. Defaults to 0.5. window_type : str Type of window to use. This is passed to the window parameter of the scipy.signal.welch function window_length_method : str = {'length', 'number'} Whether the window length should be a measure of length in seconds or number of windows. If number of windows uses integer value. remove_response : bool, default=False Whether to remove the instrument response from the data traces before calculating PSD data. If True, the appropriate metadata (i.e., obspy.Inventory object) must be attached to the stream and should be stored in the 'inv' attribute of hvsr_data. skip_on_gaps : bool, default=True Whether to skip data gaps when processing windows. This is passed to the skip_on_gaps parameter of the Obspy PPSD class. num_freq_bins : int, default=512 Number of frequency bins to use. When using the default (i.e., scipy.signal.welch) PSD function, the frequency bins are created manually for processing. obspy_ppsds : bool, default=False Whether to use the Obspy PPSD class. azimuthal_psds : bool, default=False Whether to generate PPSDs for azimuthal data verbose : bool, default=True Whether to print inputs and results to terminal plot_psds : bool, default=False Whether to show a plot of the psds here. **obspy_ppsd_kwargs : dict Dictionary with keyword arguments that are passed directly to obspy.signal.PPSD. If the following keywords are not specified, their defaults are amended in this function from the obspy defaults for its PPSD function. Specifically: - ppsd_length defaults to 30 (seconds) here instead of 3600 - skip_on_gaps defaults to True instead of False - period_step_octaves defaults to 0.03125 instead of 0.125 Returns ------- psds : HVSRData object Dictionary containing entries with psds for each channel See Also -------- [scipy.signal.welch](https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.welch.html) [obspy.signal.spectral_estimation.PPSD](https://docs.obspy.org/packages/autogen/obspy.signal.spectral_estimation.PPSD.html) """ # First, divide up for batch or not orig_args = locals().copy() # Get the initial arguments start_time = datetime.datetime.now() obspy_ppsd_kwargs_sprit_defaults = obspy_ppsd_kwargs.copy() # Set defaults here that are different than obspy defaults if 'ppsd_length' not in obspy_ppsd_kwargs.keys(): obspy_ppsd_kwargs_sprit_defaults['ppsd_length'] = 30.0 if 'period_step_octaves' not in obspy_ppsd_kwargs.keys(): obspy_ppsd_kwargs_sprit_defaults['period_step_octaves'] = 0.03125 if 'period_limits' not in obspy_ppsd_kwargs.keys(): if 'hvsr_band' in hvsr_data.keys(): obspy_ppsd_kwargs_sprit_defaults['period_limits'] = [1/hvsr_data['hvsr_band'][1], 1/hvsr_data['hvsr_band'][0]] elif 'input_params' in hvsr_data.keys() and 'hvsr_band' in hvsr_data['input_params'].keys(): obspy_ppsd_kwargs_sprit_defaults['period_limits'] = [1/hvsr_data['input_params']['hvsr_band'][1], 1/hvsr_data['input_params']['hvsr_band'][0]] else: obspy_ppsd_kwargs_sprit_defaults['period_limits'] = [1/hvsr_band[1], 1/hvsr_band[0]] else: if verbose: print(f"\t\tUpdating hvsr_band to band specified by period_limits={obspy_ppsd_kwargs['period_limits']}") if 'hvsr_band' in hvsr_data.keys(): if obspy_ppsd_kwargs['period_limits'] is None: obspy_ppsd_kwargs['period_limits'] = np.round([1/hvsr_data['hvsr_band'][1], 1/hvsr_data['hvsr_band'][0]], 3).tolist() else: hvsr_data['hvsr_band'] = np.round([1/obspy_ppsd_kwargs['period_limits'][1], 1/obspy_ppsd_kwargs['period_limits'][0]], 2).tolist() if 'input_params' in hvsr_data.keys() and 'hvsr_band' in hvsr_data['input_params'].keys(): hvsr_data['input_params']['hvsr_band'] = np.round([1/obspy_ppsd_kwargs['period_limits'][1], 1/obspy_ppsd_kwargs['period_limits'][0]], 2).tolist() # Get Probablistic power spectral densities (PPSDs) # Get default args for function obspy_ppsd_kwargs = sprit_utils._get_default_args(PPSD) obspy_ppsd_kwargs.update(obspy_ppsd_kwargs_sprit_defaults) # Update with sprit defaults, or user input orig_args['obspy_ppsd_kwargs'] = obspy_ppsd_kwargs # Update with processing parameters specified previously in input_params, if applicable if 'processing_parameters' in hvsr_data.keys(): if 'generate_psds' in hvsr_data['processing_parameters'].keys(): defaultVDict = dict(zip(inspect.getfullargspec(generate_psds).args[1:], inspect.getfullargspec(generate_psds).defaults)) defaultVDict['obspy_ppsd_kwargs'] = obspy_ppsd_kwargs update_msg = [] for k, v in hvsr_data['processing_parameters']['generate_psds'].items(): # Manual input to function overrides the imported parameter values if not isinstance(v, (HVSRData, HVSRBatch)) and (k in orig_args.keys()) and (orig_args[k] == defaultVDict[k]): update_msg.append(f'\t\t{k} = {v} (previously {orig_args[k]})') orig_args[k] = v azimuthal_psds = orig_args['azimuthal_psds'] verbose = orig_args['verbose'] obspy_ppsd_kwargs = orig_args['obspy_ppsd_kwargs'] # if (verbose and isinstance(hvsr_data, HVSRBatch)) or (verbose and not hvsr_data['batch']): if verbose: print('\nGenerating Probabilistic Power Spectral Densities (generate_psds())') print('\tUsing the following parameters:') for key, value in orig_args.items(): if key == 'hvsr_data': pass else: print('\t {}={}'.format(key, value)) print() if 'processing_parameters' in hvsr_data.keys() and 'generate_psds' in hvsr_data['processing_parameters'].keys(): if update_msg != []: update_msg.insert(0, '\tThe following parameters were updated using the processing_parameters attribute:') for msg_line in update_msg: print(msg_line) print() if isinstance(hvsr_data, HVSRBatch): # If running batch, we'll loop through each one for site_name in hvsr_data.keys(): args = orig_args.copy() # Make a copy so we don't accidentally overwrite individual_params = hvsr_data[site_name] # Get what would normally be the "hvsr_data" variable for each site args['hvsr_data'] = individual_params # reset the hvsr_data parameter we originally read in to an individual site hvsr_data if hvsr_data[site_name]['processing_status']['overall_status']: try: hvsr_data[site_name] = __generate_ppsds_batch(**args) #Call another function, that lets us run this function again except: hvsr_data[site_name]['processing_status']['generate_psds_status']=False hvsr_data[site_name]['processing_status']['overall_status'] = False else: hvsr_data[site_name]['processing_status']['generate_psds_status']=False hvsr_data[site_name]['processing_status']['overall_status'] = False try: sprit_tkinter_ui.update_progress_bars(prog_percent=5) except Exception as e: pass #print(e) return hvsr_data def _get_obspy_ppsds(hvsr_data,**obspy_ppsd_kwargs): paz = hvsr_data['paz'] stream = hvsr_data['stream'] # Get ppsds of e component eStream = stream.select(component='E') estats = eStream.traces[0].stats ppsdE = PPSD(estats, paz['E'], **obspy_ppsd_kwargs) ppsdE.add(eStream) # Get ppsds of n component nStream = stream.select(component='N') nstats = nStream.traces[0].stats ppsdN = PPSD(nstats, paz['N'], **obspy_ppsd_kwargs) ppsdN.add(nStream) # Get ppsds of z component zStream = stream.select(component='Z') zstats = zStream.traces[0].stats ppsdZ = PPSD(zstats, paz['Z'], **obspy_ppsd_kwargs) ppsdZ.add(zStream) # Get ppsds of R components (azimuthal data) has_az = False ppsds = {'Z':ppsdZ, 'E':ppsdE, 'N':ppsdN} rStream = stream.select(component='R') for curr_trace in stream: if 'R' in curr_trace.stats.channel: curr_stats = curr_trace.stats ppsd_curr = PPSD(curr_stats, paz['E'], **obspy_ppsd_kwargs) has_az = True ppsdName = curr_trace.stats.location ppsd_curr.add(rStream) ppsds[ppsdName] = ppsd_curr # Add to the input dictionary, so that some items can be manipulated later on, and original can be saved hvsr_data['ppsds_obspy'] = ppsds hvsr_data['psds'] = {} anyKey = list(hvsr_data['ppsds_obspy'].keys())[0] # Get ppsd class members members = [mems for mems in dir(hvsr_data['ppsds_obspy'][anyKey]) if not callable(mems) and not mems.startswith("_")] for k in ppsds.keys(): hvsr_data['psds'][k] = {} #Get lists/arrays so we can manipulate data later and copy everything over to main 'psds' subdictionary (convert lists to np.arrays for consistency) listList = ['times_data', 'times_gaps', 'times_processed','current_times_used', 'psd_values'] #Things that need to be converted to np.array first, for consistency timeKeys= ['times_processed','current_times_used','psd_values'] timeDiffWarn = True dfList = [] time_data = {} time_dict = {} for m in members: for k in hvsr_data['psds'].keys(): hvsr_data['psds'][k][m] = getattr(hvsr_data['ppsds_obspy'][k], m) if m in listList: hvsr_data['psds'][k][m] = np.array(hvsr_data['psds'][k][m]) if str(m)=='times_processed': unique_times = np.unique(np.array([hvsr_data['psds']['Z'][m], hvsr_data['psds']['E'][m], hvsr_data['psds']['N'][m]])) common_times = [] for currTime in unique_times: if currTime in hvsr_data['psds']['Z'][m]: if currTime in hvsr_data['psds']['E'][m]: if currTime in hvsr_data['psds']['N'][m]: common_times.append(currTime) cTimeIndList = [] for cTime in common_times: ZArr = hvsr_data['psds']['Z'][m] EArr = hvsr_data['psds']['E'][m] NArr = hvsr_data['psds']['N'][m] cTimeIndList.append([int(np.where(ZArr == cTime)[0][0]), int(np.where(EArr == cTime)[0][0]), int(np.where(NArr == cTime)[0][0])]) # Make sure number of time windows is the same between PPSDs (this can happen with just a few slightly different number of samples) if m in timeKeys: if str(m) != 'times_processed': time_data[str(m)] = (hvsr_data['psds']['Z'][m], hvsr_data['psds']['E'][m], hvsr_data['psds']['N'][m]) tSteps_same = hvsr_data['psds']['Z'][m].shape[0] == hvsr_data['psds']['E'][m].shape[0] == hvsr_data['psds']['N'][m].shape[0] if not tSteps_same: shortestTimeLength = min(hvsr_data['psds']['Z'][m].shape[0], hvsr_data['psds']['E'][m].shape[0], hvsr_data['psds']['N'][m].shape[0]) maxPctDiff = 0 for comp in hvsr_data['psds'].keys(): currCompTimeLength = hvsr_data['psds'][comp][m].shape[0] timeLengthDiff = currCompTimeLength - shortestTimeLength percentageDiff = timeLengthDiff / currCompTimeLength if percentageDiff > maxPctDiff: maxPctDiff = percentageDiff for comp in hvsr_data['psds'].keys(): while hvsr_data['psds'][comp][m].shape[0] > shortestTimeLength: hvsr_data['psds'][comp][m] = hvsr_data['psds'][comp][m][:-1] if maxPctDiff > 0.05 and timeDiffWarn: warnings.warn(f"\t Number of ppsd time windows between different components is significantly different: {round(maxPctDiff*100,2)}% > 5%. Last windows will be trimmed.") elif verbose and timeDiffWarn: print(f"\t Number of ppsd time windows between different components is different by {round(maxPctDiff*100,2)}%. Last window(s) of components with larger number of ppsd windows will be trimmed.") timeDiffWarn = False #So we only do this warning once, even though there may be multiple arrays that need to be trimmed for i, currTStep in enumerate(cTimeIndList): colList = [] currTStepList = [] colList.append('Use') currTStepList.append(np.ones_like(common_times[i]).astype(bool)) for tk in time_data.keys(): if 'current_times_used' not in tk: for i, k in enumerate(hvsr_data['psds'].keys()): if k.lower() in ['z', 'e', 'n']: colList.append(str(tk)+'_'+k) currTStepList.append(time_data[tk][i][currTStep[i]]) dfList.append(currTStepList) return hvsr_data, dfList, colList, common_times if obspy_ppsds: hvsr_data, dfList, colList, common_times = _get_obspy_ppsds(hvsr_data, **obspy_ppsd_kwargs) else: psdDict, times_bool = __single_psd_from_raw_data(hvsr_data, window_length=window_length, window_length_method=window_length_method, window_type=window_type, num_freq_bins=num_freq_bins, overlap=overlap_pct, remove_response=remove_response, do_azimuths=azimuthal_psds, show_psd_plot=False) common_times = [ct[0] for ct in times_bool] use_times = [ut[1] for ut in times_bool] x_freqs = np.flip(np.logspace(np.log10(hvsr_data['hvsr_band'][0]), np.log10(hvsr_data['hvsr_band'][1]), num_freq_bins)) psdDictUpdate = {} hvsr_data['psds'] = {} for key, compdict in psdDict.items(): psdDictUpdate[key] = np.array([list(np.flip(arr)) for time, arr in compdict.items()]) hvsr_data['psds'][key] = {} #hvsr_data['psds'] = {'Z':{}, 'E':{}, 'N':{}} for key, item in psdDict.items(): currSt = hvsr_data.stream.select(component=key).merge() hvsr_data['psds'][key]['channel'] = currSt[0].stats.channel hvsr_data['psds'][key]['current_times_used'] = common_times hvsr_data['psds'][key]['delta'] = float(currSt[0].stats.delta) #hvsr_data['psds'][key]['get_mean'] = np.nanmean(item) #hvsr_data['psds'][key]['mean'] = np.nanmean(item) #hvsr_data['psds'][key]['get_mode'] = scipy.stats.mode(item) #hvsr_data['psds'][key]['mode'] = scipy.stats.mode(item) hvsr_data['psds'][key]['id'] = currSt[0].id hvsr_data['psds'][key]['len'] = int(window_length / hvsr_data['psds'][key]['delta']) hvsr_data['psds'][key]['location'] = currSt[0].stats.location hvsr_data['psds'][key]['metadata'] = [currSt[0].stats.response if hasattr(currSt[0].stats, 'response') else None][0] hvsr_data['psds'][key]['network'] = currSt[0].stats.network hvsr_data['psds'][key]['nfft'] = int(window_length / hvsr_data['psds'][key]['delta']) hvsr_data['psds'][key]['nlap'] = int(overlap_pct*window_length / hvsr_data['psds'][key]['delta']) hvsr_data['psds'][key]['overlap'] = overlap_pct hvsr_data['psds'][key]['period_bin_centers'] = [round(1/float(f + np.diff(x_freqs)[i]/2), 4) for i, f in enumerate(x_freqs[:-1])] hvsr_data['psds'][key]['period_bin_centers'].append(float(round(1/x_freqs[-1], 3))) hvsr_data['psds'][key]['period_bin_centers'] = np.array(hvsr_data['psds'][key]['period_bin_centers']) hvsr_data['psds'][key]['period_bin_left_edges'] = 1/x_freqs[:-1] hvsr_data['psds'][key]['period_bin_right_edges'] = 1/x_freqs[1:] hvsr_data['psds'][key]['period_xedges'] = 1/x_freqs hvsr_data['psds'][key]['ppsd_length'] = window_length hvsr_data['psds'][key]['psd_length'] = window_length hvsr_data['psds'][key]['psd_frequencies'] = x_freqs hvsr_data['psds'][key]['psd_periods'] = 1/x_freqs hvsr_data['psds'][key]['psd_values'] = psdDictUpdate[key] hvsr_data['psds'][key]['sampling_rate'] = currSt[0].stats.sampling_rate hvsr_data['psds'][key]['skip_on_gaps'] = skip_on_gaps hvsr_data['psds'][key]['station'] = currSt[0].stats.station hvsr_data['psds'][key]['step'] = window_length * (1-overlap_pct) hvsr_data['psds'][key]['times_data'] = common_times hvsr_data['psds'][key]['times_gaps'] = [[None, None]] hvsr_data['psds'][key]['times_processed'] = [[None, None]] hvsr_data['ppsds_obspy'] = {} dfList = [] for i, w in enumerate(common_times): ws = str(w) dfList.append([use_times[i], psdDictUpdate['Z'][i], psdDictUpdate['E'][i], psdDictUpdate['N'][i]]) colList = ["Use", "psd_values_Z", "psd_values_E", "psd_values_N"] # dfList: list of np.arrays, fitting the above column # common_times: times in common between all, should be length of 1 psd dimension above # hvsr_data['psds']['Z']['times_gaps']: list of two-item lists with UTCDatetimes for gaps # #Maybe not needed hvsr_data['psds']['Z']['current_times_used'] hvsrDF = pd.DataFrame(dfList, columns=colList) if verbose: print(f"\t\t{hvsrDF.shape[0]} processing windows generated and psd values stored in hvsr_windows_df with columns: {', '.join(hvsrDF.columns)}") hvsrDF['Use'] = hvsrDF['Use'].astype(bool) # Add azimuthal psds values for k in hvsr_data['psds'].keys(): if k.upper() not in ['Z', 'E', 'N']: hvsrDF['psd_values_'+k] = hvsr_data['psds'][k]['psd_values'].tolist() hvsrDF['TimesProcessed_Obspy'] = common_times hvsrDF['TimesProcessed_ObspyEnd'] = hvsrDF['TimesProcessed_Obspy'] + obspy_ppsd_kwargs['ppsd_length'] # colList.append('TimesProcessed_Obspy') # currTStepList.append(common_times[i]) # Add other times (for start times) # Create functions to be used in pandas .apply() for datetime conversions def convert_to_datetime(obspyUTCDateTime): return obspyUTCDateTime.datetime.replace(tzinfo=datetime.timezone.utc) def convert_to_mpl_dates(obspyUTCDateTime): return obspyUTCDateTime.matplotlib_date hvsrDF['TimesProcessed'] = hvsrDF['TimesProcessed_Obspy'].apply(convert_to_datetime) hvsrDF['TimesProcessed_End'] = hvsrDF['TimesProcessed'] + datetime.timedelta(days=0, seconds=obspy_ppsd_kwargs['ppsd_length']) hvsrDF['TimesProcessed_MPL'] = hvsrDF['TimesProcessed_Obspy'].apply(convert_to_mpl_dates) hvsrDF['TimesProcessed_MPLEnd'] = hvsrDF['TimesProcessed_MPL'] + (obspy_ppsd_kwargs['ppsd_length']/86400) # Take care of existing time gaps, in case not taken care of previously if obspy_ppsds: for gap in hvsr_data['psds']['Z']['times_gaps']: hvsrDF['Use'] = (hvsrDF['TimesProcessed_MPL'].gt(gap[1].matplotlib_date))| \ (hvsrDF['TimesProcessed_MPLEnd'].lt(gap[0].matplotlib_date)).astype(bool)# | \ hvsrDF.set_index('TimesProcessed', inplace=True) hvsr_data['hvsr_windows_df'] = hvsrDF # Remove data set for removal during remove_noise() if 'x_windows_out' in hvsr_data.keys(): if verbose: print("\t\tRemoving Noisy windows from hvsr_windows_df.") hvsr_data = __remove_windows_from_df(hvsr_data, verbose=verbose) #for window in hvsr_data['x_windows_out']: # print(window) # hvsrDF['Use'] = (hvsrDF['TimesProcessed_MPL'][hvsrDF['Use']].lt(window[0]) & hvsrDF['TimesProcessed_MPLEnd'][hvsrDF['Use']].lt(window[0]) )| \ # (hvsrDF['TimesProcessed_MPL'][hvsrDF['Use']].gt(window[1]) & hvsrDF['TimesProcessed_MPLEnd'][hvsrDF['Use']].gt(window[1])).astype(bool) #hvsrDF['Use'] = hvsrDF['Use'].astype(bool) # Create dict entry to keep track of how many outlier hvsr curves are removed # This is a (2-item list with [0]=current number, [1]=original number of curves) hvsr_data['tsteps_used'] = [int(hvsrDF['Use'].sum()), hvsrDF['Use'].shape[0]] #hvsr_data['tsteps_used'] = [hvsr_data['psds']['Z']['times_processed'].shape[0], hvsr_data['psds']['Z']['times_processed'].shape[0]] #hvsr_data['tsteps_used'][0] = hvsr_data['psds']['Z']['current_times_used'].shape[0] hvsr_data = sprit_utils._make_it_classy(hvsr_data) if 'processing_parameters' not in hvsr_data.keys(): hvsr_data['processing_parameters'] = {} hvsr_data['processing_parameters']['generate_psds'] = {} exclude_params_list = ['hvsr_data'] for key, value in orig_args.items(): if key not in exclude_params_list: hvsr_data['processing_parameters']['generate_psds'][key] = value hvsr_data['processing_status']['generate_psds_status'] = True hvsr_data = sprit_utils._check_processing_status(hvsr_data, start_time=start_time, func_name=inspect.stack()[0][3], verbose=verbose) #for ind, row in hvsrDF.iterrows(): # print(row['psd_values_Z'].shape) if plot_psds: for i, r in hvsrDF.iterrows(): plt.plot(r['psd_values_Z'], c='k', linewidth=0.5) plt.plot(r['psd_values_E'], c='b', linewidth=0.5) plt.plot(r['psd_values_N'], c='r', linewidth=0.5) plt.show() return hvsr_dataCalculate Power Spectral Density (PSD) curves for each channel. Uses the scipy.signal.welch() function to generate PSDs by default, or can use Obspy's PPSD class. Info on Obspy PPSD creation here (if obspy_ppsds=True): https://docs.obspy.org/packages/autogen/obspy.signal.spectral_estimation.PPSD.html
Parameters
hvsr_data:dict, HVSRData object,orHVSRBatch object- Data object containing all the parameters and other data of interest (stream and paz, for example)
window_length:float- Length of the window, in seconds, to use for each PSD calculation. Defaults to 30.0.
overlap_pct:float- Percentage (should be 0-1) for overlapping each window used for PSD calculation. Defaults to 0.5.
window_type:str- Type of window to use. This is passed to the window parameter of the scipy.signal.welch function
window_length_method:str = {'length', 'number'}- Whether the window length should be a measure of length in seconds or number of windows. If number of windows uses integer value.
remove_response:bool, default=False- Whether to remove the instrument response from the data traces before calculating PSD data. If True, the appropriate metadata (i.e., obspy.Inventory object) must be attached to the stream and should be stored in the 'inv' attribute of hvsr_data.
skip_on_gaps:bool, default=True- Whether to skip data gaps when processing windows. This is passed to the skip_on_gaps parameter of the Obspy PPSD class.
num_freq_bins:int, default=512- Number of frequency bins to use. When using the default (i.e., scipy.signal.welch) PSD function, the frequency bins are created manually for processing.
obspy_ppsds:bool, default=False- Whether to use the Obspy PPSD class.
azimuthal_psds:bool, default=False- Whether to generate PPSDs for azimuthal data
verbose:bool, default=True- Whether to print inputs and results to terminal
plot_psds:bool, default=False- Whether to show a plot of the psds here.
**obspy_ppsd_kwargs:dict- Dictionary with keyword arguments that are passed directly to obspy.signal.PPSD. If the following keywords are not specified, their defaults are amended in this function from the obspy defaults for its PPSD function. Specifically: - ppsd_length defaults to 30 (seconds) here instead of 3600 - skip_on_gaps defaults to True instead of False - period_step_octaves defaults to 0.03125 instead of 0.125
Returns
psds : HVSRData object Dictionary containing entries with psds for each channelSee Also
[scipy.signal.welch](https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.welch.html)[obspy.signal.spectral_estimation.PPSD](https://docs.obspy.org/packages/autogen/obspy.signal.spectral_estimation.PPSD.html) def get_metadata(params,
write_path='',
update_metadata=True,
source=None,
verbose=False,
**read_inventory_kwargs)-
Expand source code
def get_metadata(params, write_path='', update_metadata=True, source=None, verbose=False, **read_inventory_kwargs): """Get metadata and calculate or get paz parameter needed for PSD Adds an obspy.Inventory object to the "inv" attribute or key of params Parameters ---------- params : dict Dictionary containing all the input and other parameters needed for processing Ouput from input_params() function write_path : str String with output filepath of where to write updated inventory or metadata file If not specified, does not write file update_metadata : bool Whether to update the metadata file itself, or just read as-is. If using provided raspberry shake metadata file, select True. source : str, default=None This passes the source variable value to _read_RS_metadata. It is expected that this is passed directly from the source parameter of sprit.fetch_data() Returns ------- params : dict Modified input dictionary with additional key:value pair containing paz dictionary (key = "paz") """ invPath = params['metadata'] raspShakeInstNameList = ['raspberry shake', 'shake', 'raspberry', 'rs', 'rs3d', 'rasp. shake', 'raspshake', 'raspberry shake 3d'] trominoNameList = ['tromino', 'trom', 'trm', 't'] if str(params['instrument']).lower() in raspShakeInstNameList: if update_metadata: params = _update_shake_metadata(filepath=invPath, params=params, write_path=write_path, verbose=verbose) params = _read_RS_Metadata(params, source=source) elif params['instrument'].lower() in trominoNameList: params['paz'] = {'Z':{}, 'E':{}, 'N':{}} # Initially started here: https://ds.iris.edu/NRL/sensors/Sunfull/RESP.XX.NS721..BHZ.PS-4.5C1_LF4.5_RC3400_RSNone_SG82_STgroundVel tromino_paz = { 'zeros': [-3.141592653589793/2-0j, -3.141592653589793/2-0j], 'poles': [(17-24j), (17+24j)], 'stage_gain':100, 'stage_gain_frequency':10, 'normalization_frequency':5, 'normalization_factor':1} params['paz']['Z'] = params['paz']['E'] = params['paz']['N'] = tromino_paz tromChaResponse = obspy.core.inventory.response.Response().from_paz(**tromino_paz) obspyStartDate = obspy.UTCDateTime(1900,1,1) obspyNow = obspy.UTCDateTime.now() # Update location code to match partition if type(params['station']) is int or str(params['station']).isdigit(): params['location'] = str(params['station']) # Create channel objects to be used in inventory channelObj_Z = obspy.core.inventory.channel.Channel(code='EHZ', location_code=params['location'], latitude=params['params']['latitude'], longitude=params['params']['longitude'], elevation=params['params']['elevation'], depth=params['params']['depth'], azimuth=0, dip=90, start_date=obspyStartDate, end_date=obspyNow, response=tromChaResponse) channelObj_E = obspy.core.inventory.channel.Channel(code='EHE', location_code=params['location'], latitude=params['params']['latitude'], longitude=params['params']['longitude'], elevation=params['params']['elevation'], depth=params['params']['depth'], azimuth=90, dip=0, start_date=obspyStartDate, end_date=obspyNow, response=tromChaResponse) channelObj_N = obspy.core.inventory.channel.Channel(code='EHN', location_code=params['location'], latitude=params['params']['latitude'], longitude=params['params']['longitude'], elevation=params['params']['elevation'], depth=params['params']['depth'], azimuth=0, dip=0, start_date=obspyStartDate, end_date=obspyNow, response=tromChaResponse) # Create site object for inventory siteObj = obspy.core.inventory.util.Site(name=params['params']['site'], description=None, town=None, county=None, region=None, country=None) # Create station object for inventory stationObj = obspy.core.inventory.station.Station(code='TRMNO', latitude=params['params']['latitude'], longitude=params['params']['longitude'], elevation=params['params']['elevation'], channels=[channelObj_Z, channelObj_E, channelObj_N], site=siteObj, vault=None, geology=None, equipments=None, operators=None, creation_date=obspyStartDate, termination_date=obspy.UTCDateTime(2100,1,1), total_number_of_channels=3, selected_number_of_channels=3, description='Estimated data for Tromino, this is NOT from the manufacturer', comments=None, start_date=obspyStartDate, end_date=obspyNow, restricted_status=None, alternate_code=None, historical_code=None, data_availability=obspy.core.inventory.util.DataAvailability(obspyStartDate, obspy.UTCDateTime.now()), identifiers=None, water_level=None, source_id=None) # Create network object for inventory network = [obspy.core.inventory.network.Network(code='AM', stations=[stationObj], total_number_of_stations=None, selected_number_of_stations=None, description=None, comments=None, start_date=obspyStartDate, end_date=obspyNow, restricted_status=None, alternate_code=None, historical_code=None, data_availability=None, identifiers=None, operators=None, source_id=None)] params['inv'] = obspy.Inventory(networks=network) else: if not invPath: pass #if invPath is None elif not pathlib.Path(invPath).exists() or invPath == '': warnings.warn(f"The metadata parameter was not specified correctly. Returning original params value {params['metadata']}") readInvKwargs = {} argspecs = inspect.getfullargspec(obspy.read_inventory) for argName in argspecs[0]: if argName in read_inventory_kwargs.keys(): readInvKwargs[argName] = read_inventory_kwargs[argName] readInvKwargs['path_or_file_object'] = invPath params['inv'] = obspy.read_inventory(invPath) if 'params' in params.keys(): params['params']['inv'] = params['inv'] return paramsGet metadata and calculate or get paz parameter needed for PSD Adds an obspy.Inventory object to the "inv" attribute or key of params
Parameters
params:dict- Dictionary containing all the input and other parameters needed for processing Ouput from input_params() function
write_path:str- String with output filepath of where to write updated inventory or metadata file If not specified, does not write file
update_metadata:bool- Whether to update the metadata file itself, or just read as-is. If using provided raspberry shake metadata file, select True.
source:str, default=None- This passes the source variable value to _read_RS_metadata. It is expected that this is passed directly from the source parameter of sprit.fetch_data()
Returns
params:dict- Modified input dictionary with additional key:value pair containing paz dictionary (key = "paz")
def get_report(hvsr_results,
report_formats=['print', 'table', 'plot', 'html', 'pdf'],
azimuth='HV',
plot_type='HVSR p ann COMP+ p ann SPEC p ann',
plot_engine='matplotlib',
show_print_report=True,
show_table_report=False,
show_plot_report=False,
show_html_report=False,
show_pdf_report=True,
suppress_report_outputs=False,
show_report_outputs=False,
csv_handling='append',
report_export_format=None,
report_export_path=None,
verbose=False,
**kwargs)-
Expand source code
def get_report(hvsr_results, report_formats=['print', 'table', 'plot', 'html', 'pdf'], azimuth='HV', plot_type=DEFAULT_PLOT_STR, plot_engine='matplotlib', show_print_report=True, show_table_report=False, show_plot_report=False, show_html_report=False, show_pdf_report=True, suppress_report_outputs=False, show_report_outputs=False, csv_handling='append', report_export_format=None, report_export_path=None, verbose=False, **kwargs): """Generate and/or print and/or export a report of the HVSR analysis in a variety of formats. Formats include: * 'print': A (monospace) text summary of the HVSR results * 'table': A pandas.DataFrame summary of the HVSR Results. This is useful for copy/pasting directly into a larger worksheet. * 'plot': A plot summary of the HVSR results, generated using the plot_hvsr() function. * 'html': An HTML document/text of the HVSR results. This includes the table, print, and plot reports in one document. * 'pdf': A PDF document showing the summary of the HVSR Results. The PDF report is simply the HTML report saved to an A4-sized PDF document. Parameters ---------- hvsr_results : dict Dictionary containing all the information about the processed hvsr data report_formats : {'table', 'print', plot} Format in which to print or export the report. The following report_formats return the following items in the following attributes: - 'plot': hvsr_results['Print_Report'] as a str - 'print': hvsr_results['Plot_Report'] - matplotlib.Figure object - 'table': hvsr_results['Table_Report']- pandas.DataFrame object - list/tuple - a list or tuple of the above objects, in the same order they are in the report_formats list - 'html': hvsr_results['HTML_Report'] - a string containing the text for an HTML document - 'pdf': currently does not save to the HVSRData object itself, can only be saved to the disk directly plot_type : str, default = 'HVSR p ann C+ p ann Spec p ann' What type of plot to plot, if 'plot' part of report_formats input azimuth : str, default = 'HV' Which azimuth to plot, by default "HV" which is the main "azimuth" combining the E and N components csv_handling : str, {'append', 'overwrite', 'keep/rename'} How to handle table report outputs if the designated csv output file already exists. By default, appends the new information to the end of the existing file. suppress_report_outputs : bool, default=False If True, only reads output to appropriate attribute of data class (ie, print does not print, only reads text into variable). If False, performs as normal. report_export_format : list or str, default=['pdf'] A string or list of strings indicating which report formats should be exported to disk. report_export_path : None, bool, or filepath, default = None If None or False, does not export; if True, will export to same directory as the input_data parameter in the input_params() function. Otherwise, it should be a string or path object indicating where to export results. May be a file or directory. If a directory is specified, the filename will be "<site_name>_<acq_date>_<UTC start time>-<UTC end time>". The extension/suffix defaults to png for report_formats="plot", csv for 'table', txt for 'print', html for 'html', and pdf for 'pdf.' verbose : bool, default=True Whether to print the results to terminal. This is the same output as report_formats='print', and will not repeat if that is already selected Returns ------- sprit.HVSRData """ orig_args = locals().copy() #Get the initial arguments orig_args['report_formats'] = [str(f).lower() for f in orig_args['report_formats']] update_msg = [] # Update with processing parameters specified previously in input_params, if applicable if 'processing_parameters' in hvsr_results.keys(): if 'get_report' in hvsr_results['processing_parameters'].keys(): for k, v in hvsr_results['processing_parameters']['get_report'].items(): defaultVDict = dict(zip(inspect.getfullargspec(get_report).args[1:], inspect.getfullargspec(get_report).defaults)) defaultVDict['kwargs'] = {} # Manual input to function overrides the imported parameter values if (not isinstance(v, (HVSRData, HVSRBatch))) and (k in orig_args.keys()) and (orig_args[k]==defaultVDict[k]): update_msg.append(f'\t\t{k} = {v} (previously {orig_args[k]})') orig_args[k] = v report_formats = orig_args['report_formats'] azimuth = orig_args['azimuth'] plot_type = orig_args['plot_type'] plot_engine = orig_args['plot_engine'] show_print_report = orig_args['show_print_report'] show_table_report = orig_args['show_table_report'] show_plot_report = orig_args['show_plot_report'] show_html_report = orig_args['show_html_report'] show_pdf_report = orig_args['show_pdf_report'] suppress_report_outputs = orig_args['suppress_report_outputs'] show_report_outputs = orig_args['show_report_outputs'] report_export_format = orig_args['report_export_format'] report_export_path = orig_args['report_export_path'] csv_handling = orig_args['csv_handling'] verbose = orig_args['verbose'] kwargs = orig_args['kwargs'] # Put Processing parameters in hvsr_results immediately (gets used later local function in get_report) hvsr_results['processing_parameters']['get_report'] = {} exclude_params_list = ['hvsr_results'] for key, value in orig_args.items(): if key not in exclude_params_list: hvsr_results['processing_parameters']['get_report'][key] = value if verbose: print('\nGetting HVSR Report: get_report()') print('\tUsing the following parameters:') for key, value in orig_args.items(): if key == 'params' or isinstance(value, (HVSRData, HVSRBatch)): pass else: print('\t {}={}'.format(key, value)) print() if update_msg != [] and verbose: update_msg.insert(0, '\tThe following parameters were updated using the processing_parameters attribute:') for msg_line in update_msg: print(msg_line) if isinstance(hvsr_results, HVSRBatch): if verbose: print('\nGetting Reports: Running in batch mode') print('\tUsing parameters:') for key, value in orig_args.items(): print(f'\t {key}={value}') print() #If running batch, we'll loop through each site for site_name in hvsr_results.keys(): args = orig_args.copy() #Make a copy so we don't accidentally overwrite individual_params = hvsr_results[site_name] #Get what would normally be the "params" variable for each site args['hvsr_results'] = individual_params #reset the params parameter we originally read in to an individual site params if hvsr_results[site_name]['processing_status']['overall_status']: try: hvsr_results[site_name] = __get_report_batch(**args) #Call another function, that lets us run this function again except: hvsr_results[site_name] = hvsr_results[site_name] else: hvsr_results[site_name] = hvsr_results[site_name] combined_csvReport = pd.DataFrame() for site_name in hvsr_results.keys(): if 'Table_Report' in hvsr_results[site_name].keys(): combined_csvReport = pd.concat([combined_csvReport, hvsr_results[site_name]['Table_Report']], ignore_index=True, join='inner') if report_export_path is not None: if report_export_path is True: if pathlib.Path(hvsr_results['input_params']['input_data']) in sampleFileKeyMap.values(): csvExportPath = pathlib.Path(os.getcwd()) else: csvExportPath = pathlib.Path(hvsr_results['input_params']['input_data']).parent elif pathlib.Path(report_export_path).is_dir(): csvExportPath = report_export_path elif pathlib.Path(report_export_path).is_file(): csvExportPath = report_export_path.parent else: csvExportPath = pathlib.Path(hvsr_results[site_name].input_data) if csvExportPath.is_dir(): pass else: csvExportPath = csvExportPath.parent combined_csvReport.to_csv(csvExportPath, index=False) return hvsr_results if suppress_report_outputs: show_print_report = show_plot_report = show_table_report = show_html_report = show_pdf_report = False elif show_report_outputs: show_print_report = show_plot_report = show_table_report = show_html_report = show_pdf_report = True #if 'BestPeak' in hvsr_results.keys() and 'PassList' in hvsr_results['BestPeak'].keys(): try: curvTestsPassed = (hvsr_results['BestPeak'][azimuth]['PassList']['WinLen'] + hvsr_results['BestPeak'][azimuth]['PassList']['SigCycles']+ hvsr_results['BestPeak'][azimuth]['PassList']['LowCurveStD']) curvePass = curvTestsPassed > 2 #Peak Pass? peakTestsPassed = ( hvsr_results['BestPeak'][azimuth]['PassList']['ProminenceLow'] + hvsr_results['BestPeak'][azimuth]['PassList']['ProminenceHi']+ hvsr_results['BestPeak'][azimuth]['PassList']['AmpClarity']+ hvsr_results['BestPeak'][azimuth]['PassList']['FreqStability']+ hvsr_results['BestPeak'][azimuth]['PassList']['LowStDev_Freq']+ hvsr_results['BestPeak'][azimuth]['PassList']['LowStDev_Amp']) peakPass = peakTestsPassed >= 5 except Exception as e: errMsg= 'No BestPeak identified. Check peak_freq_range or hvsr_band or try to remove bad noise windows using remove_noise() or change processing parameters in process_hvsr() or generate_psds(). Otherwise, data may not be usable for HVSR.' print(errMsg) print(e) plotString_noBestPeak = 'HVSR t all C+ t SPEC' hvsr_results['Plot_Report'] = plot_hvsr(hvsr_results, plot_type=plotString_noBestPeak, azimuth=azimuth, return_fig=True) return hvsr_results #raise RuntimeError('No BestPeak identified. Check peak_freq_range or hvsr_band or try to remove bad noise windows using remove_noise() or change processing parameters in process_hvsr() or generate_psds(). Otherwise, data may not be usable for HVSR.') # Figure out which reports will be used, and format them correctly if isinstance(report_formats, (list, tuple)): report_formats = [str(rf).lower() for rf in report_formats] else: #We will use a loop later even if it's just one report type, so reformat to prepare for for loop allList = [':', 'all'] if report_formats.lower() in allList: report_formats = ['print', 'table', 'plot', 'html', 'pdf'] else: report_formats = [str(report_formats).lower()] # Format the export formats correctly if isinstance(report_export_format, (list, tuple)): pass elif report_export_format is None: pass else: # We will use list methods later even if it's just one report type, so reformat as list allList = [':', 'all'] if report_export_format.lower() in allList: report_export_format = ['print', 'table', 'plot', 'html', 'pdf'] else: report_export_format = [report_export_format] # Put print first to get results immediatley while plots and others are created if 'print' in report_formats and report_formats[0] != 'print': report_formats = ['table', 'plot', 'print', 'html', 'pdf'] report_formats.pop(report_formats.index('print')) report_formats.insert(0, 'print') for i, rep_form in enumerate(report_formats): if isinstance(report_export_path, (list, tuple)): if not isinstance(report_formats, (list, tuple)): warnings.warn('report_export_path is a list/tuple and report_formats is not. This may result in unexpected behavior.') if isinstance(report_formats, (list, tuple)) and isinstance(report_export_path, (list, tuple)) and len(report_formats) != len(report_export_path): warnings.warn('report_export_path and report_formats are both lists or tuples, but they are not the same length. This may result in unexpected behavior.') exp_path = report_export_path[i] else: exp_path = report_export_path if report_export_format is None: report_export_format = '' # Print_Report if rep_form == 'print': verbose_print = verbose if show_print_report: verbose_print = True # Generates print report and saves to hvsr_results["Print_Report"] hsvr_results = _generate_print_report(hvsr_results, azimuth = azimuth, show_print_report = True, verbose=verbose_print) if 'print' in report_export_format: if exp_path is None: print_exp_path = exp_path else: print_exp_path = pathlib.Path(exp_path).with_suffix('.txt') export_report(hvsr_results, azimuth=azimuth, report_export_format='print', report_export_path=print_exp_path, show_report = False, # If report is to be shown, done in previous step verbose = verbose_print) # Table_Report elif rep_form == 'table': verbose_table = verbose if show_table_report: verbose_table = True hsvr_results = _generate_table_report(hvsr_results, azimuth=azimuth, show_table_report=show_table_report, verbose=verbose_table) if 'table' in report_export_format: if exp_path is None: table_exp_path = exp_path else: table_exp_path = pathlib.Path(exp_path).with_suffix('.csv') export_report(hvsr_results, azimuth=azimuth, report_export_format='table', report_export_path=table_exp_path, csv_handling=csv_handling, show_report = False, # If report is to be shown, done in previous step verbose = verbose_table) # Plot_Report elif rep_form == 'plot': plot_hvsr_kwargs = {k: v for k, v in kwargs.items() if k in tuple(inspect.signature(plot_hvsr).parameters.keys())} if 'plot_type' in plot_hvsr_kwargs.keys(): plot_hvsr_kwargs.pop('plot_type') if 'plot_engine' in plot_hvsr_kwargs.keys(): plot_hvsr_kwargs.pop('plot_engine') fig = plot_hvsr(hvsr_results, plot_type=plot_type, azimuth=azimuth, plot_engine=plot_engine, show_plot=show_plot_report, return_fig=True) expFigAx = fig if 'plot' in report_export_format: export_report(hvsr_results=hvsr_results, report_export_path=report_export_path, report_export_format='plot') #hvsr_results['BestPeak'][azimuth]['Report']['Plot_Report'] = fig hvsr_results['Plot_Report'] = fig if show_plot_report:#'show_plot' in plot_hvsr_kwargs.keys() and plot_hvsr_kwargs['show_plot'] is False: if not verbose: if str(plot_engine).lower(): plt.show() else: fig.show() else: print('\nPlot of data report:') if str(plot_engine).lower(): plt.show() else: fig.show() else: if verbose: print("\n\tPlot of data report created and saved in ['Plot_Report'] attribute") # HTML_Report elif rep_form == 'html': verbose_html = verbose if verbose or show_html_report: verbose_html = True hvsr_results = _generate_html_report(hsvr_results, show_html_report=show_html_report, verbose=verbose_html) if 'html' in report_export_format: if exp_path is None: html_exp_path = exp_path else: html_exp_path = pathlib.Path(exp_path).with_suffix('.html') export_report(hvsr_results, azimuth=azimuth, report_export_format='html', report_export_path=html_exp_path, show_report = False, # If report is to be shown, done in previous step verbose = verbose_html) # PDF_Report elif rep_form == 'pdf': verbose_pdf = verbose # Don't repeat html printing, etc. if already done if 'html' in report_formats: show_html_report = False else: show_html_report = show_html_report if exp_path is None: pdf_exp_path = exp_path else: pdf_exp_path = pathlib.Path(exp_path) hvsr_results = _generate_pdf_report(hvsr_results, pdf_report_filepath=pdf_exp_path, show_pdf_report=show_pdf_report, show_html_report=show_html_report, verbose=verbose_pdf) return hvsr_resultsGenerate and/or print and/or export a report of the HVSR analysis in a variety of formats.
Formats include: * 'print': A (monospace) text summary of the HVSR results * 'table': A pandas.DataFrame summary of the HVSR Results. This is useful for copy/pasting directly into a larger worksheet. * 'plot': A plot summary of the HVSR results, generated using the plot_hvsr() function. * 'html': An HTML document/text of the HVSR results. This includes the table, print, and plot reports in one document. * 'pdf': A PDF document showing the summary of the HVSR Results. The PDF report is simply the HTML report saved to an A4-sized PDF document.
Parameters
hvsr_results:dict- Dictionary containing all the information about the processed hvsr data
report_formats:{'table', 'print', plot}- Format in which to print or export the report. The following report_formats return the following items in the following attributes: - 'plot': hvsr_results['Print_Report'] as a str - 'print': hvsr_results['Plot_Report'] - matplotlib.Figure object - 'table': hvsr_results['Table_Report']- pandas.DataFrame object - list/tuple - a list or tuple of the above objects, in the same order they are in the report_formats list - 'html': hvsr_results['HTML_Report'] - a string containing the text for an HTML document - 'pdf': currently does not save to the HVSRData object itself, can only be saved to the disk directly
plot_type:str, default= 'HVSR p ann C+ p ann Spec p ann'- What type of plot to plot, if 'plot' part of report_formats input
azimuth:str, default= 'HV'- Which azimuth to plot, by default "HV" which is the main "azimuth" combining the E and N components
csv_handling:str, {'append', 'overwrite', 'keep/rename'}- How to handle table report outputs if the designated csv output file already exists. By default, appends the new information to the end of the existing file.
suppress_report_outputs:bool, default=False- If True, only reads output to appropriate attribute of data class (ie, print does not print, only reads text into variable). If False, performs as normal.
report_export_format:listorstr, default=['pdf']- A string or list of strings indicating which report formats should be exported to disk.
report_export_path:None, bool,orfilepath, default= None- If None or False, does not export; if True, will export to same directory as the input_data parameter in the input_params() function.
Otherwise, it should be a string or path object indicating where to export results. May be a file or directory.
If a directory is specified, the filename will be
"
- ". The extension/suffix defaults to png for report_formats="plot", csv for 'table', txt for 'print', html for 'html', and pdf for 'pdf.' verbose:bool, default=True- Whether to print the results to terminal. This is the same output as report_formats='print', and will not repeat if that is already selected
Returns
def gui(kind: str = 'browser')-
Expand source code
def gui(kind: str = 'browser'): """Function to open a graphical user interface (gui) Parameters ---------- kind : str, optional What type of gui to open: * "browser" or "default" opens browser interface (using streamlit) * "widget" opens jupyter widget (using ipywidgets) * "window" opens windowed gui (using tkinter) """ browserList = ['browser', 'streamlit', 'default', 'd', 'b', 's'] windowList = ['windowed', 'window', 'tkinter', 'tk', 't', 'win'] widgetList = ['widget', 'jupyter', 'notebook', 'nb'] liteList = ['lite', 'light', 'basic', 'l'] if kind.lower() in browserList: import subprocess streamlitPath = pathlib.Path(__file__).parent.joinpath("sprit_streamlit_ui.py") cmd = ['streamlit', 'run', streamlitPath.as_posix()] #subprocess.run(cmd) import sys from streamlit.web import cli as stcli import streamlit import sys import subprocess import tempfile temp_dir = tempfile.TemporaryDirectory() def run_streamlit_app(path_dir): temp_dir = tempfile.TemporaryDirectory() # create a temporary directory fpathList = ['sprit_hvsr.py', 'sprit_tkinter_ui.py', 'sprit_jupyter_ui.py', 'sprit_utils.py', 'sprit_plot.py', '__init__.py', 'sprit_streamlit_ui.py'] currDir = os.path.dirname(os.path.abspath(__file__)) for fpath in fpathList: temp_file_path = os.path.join(temp_dir.name, fpath) with open(pathlib.Path(currDir).joinpath(fpath), 'r') as cf: scriptText = cf.read() # write the streamlit app code to a Python script in the temporary directory with open(temp_file_path, 'w') as f: f.write(scriptText) # execute the streamlit app try: # execute the streamlit app subprocess.run( ['streamlit', "run", temp_file_path], stderr=subprocess.DEVNULL ) except KeyboardInterrupt: pass # clean up the temporary directory when done temp_dir.cleanup() #with open(streamlitPath.parent.as_posix(), 'r') as file: # appText = file.read() run_streamlit_app(pathlib.Path(__name__).parent) #streamlit.web.bootstrap.run(streamlitPath.as_posix(), '', [], []) #process = subprocess.Popen(["streamlit", "run", os.path.join( # 'application', 'main', 'services', 'streamlit_app.py')]) elif kind.lower() in windowList: #guiPath = pathlib.Path(os.path.realpath(__file__)) try: import tkinter as tk from sprit.sprit_tkinter_ui import SPRIT_App except: if sys.platform == 'linux': raise ImportError('The SpRIT graphical interface uses tkinter, which ships with python but is not pre-installed on linux machines. Use "apt-get install python-tk" or "apt-get install python3-tk" to install tkinter. You may need to use the sudo command at the start of those commands.') else: print("Tkinter may not be installed on your system, or is not functioning correctly. Please download and install tkinter, or use another interface.") def on_gui_closing(): plt.close('all') gui_root.quit() gui_root.destroy() if sys.platform == 'linux': if not pathlib.Path("/usr/share/doc/python3-tk").exists(): warnings.warn('The SpRIT graphical interface uses tkinter, which ships with python but is not pre-installed on linux machines. Use "apt-get install python-tk" or "apt-get install python3-tk" to install tkinter. You may need to use the sudo command at the start of those commands.') gui_root = tk.Tk() try: try: icon_path = pathlib.Path(str(importlib.resources.files('sprit'))).joinpath('resources').joinpath("icon").joinpath('sprit_icon_alpha.ico') gui_root.iconbitmap(icon_path.as_posix()) except: icon_path = pathlib.Path(str(importlib.resources.files('sprit'))).joinpath('resources').joinpath("icon").joinpath('sprit_icon.png') gui_root.iconphoto(False, tk.PhotoImage(file=icon_path.as_posix())) except Exception as e: print("ICON NOT LOADED, still opening GUI") gui_root.resizable(True, True) spritApp = SPRIT_App(master=gui_root) # Open the app with a tk.Tk root gui_root.protocol("WM_DELETE_WINDOW", on_gui_closing) gui_root.mainloop() # Run the main loop elif kind.lower() in widgetList: try: sprit_jupyter_UI.create_jupyter_ui() except Exception as e: if hasattr(e, 'message'): errMsg = e.message else: errMsg = e print(errMsg) raise e elif kind.lower() in liteList: print("Lite GUI is not currently supported")Function to open a graphical user interface (gui)
Parameters
kind:str, optional- What type of gui to open: * "browser" or "default" opens browser interface (using streamlit) * "widget" opens jupyter widget (using ipywidgets) * "window" opens windowed gui (using tkinter)
def import_data(import_filepath, data_format='gzip', show_data=True)-
Expand source code
def import_data(import_filepath, data_format='gzip', show_data=True): """Function to import .hvsr (or other extension) data exported using export_hvsr() function Parameters ---------- import_filepath : str or path object Filepath of file created using export_hvsr() function. This is usually a pickle file with a .hvsr extension data_format : str, default='pickle' Type of format data is in. Currently, only 'pickle' supported. Eventually, json or other type may be supported, by default 'pickle'. Returns ------- HVSRData or HVSRBatch object """ sample_list = ['sample', 'sampledata', 's'] if import_filepath in sample_list: import_filepath = RESOURCE_DIR.joinpath(r'sample_data') import_filepath = import_filepath.joinpath(r'SampleHVSRSite01.hvsr') if data_format == 'pickle': with open(import_filepath, 'rb') as f: dataIN = pickle.load(f) elif data_format.lower() == 'dataframe': dataIN = pd.read_csv(import_filepath) else: try: with gzip.open(import_filepath, 'rb') as f: dataIN = pickle.loads(f.read()) except Exception as e: with open(import_filepath, 'rb') as f: dataIN = pickle.load(f) if show_data: print(dataIN) return dataINFunction to import .hvsr (or other extension) data exported using export_hvsr() function
Parameters
import_filepath:strorpath object- Filepath of file created using export_hvsr() function. This is usually a pickle file with a .hvsr extension
data_format:str, default='pickle'- Type of format data is in. Currently, only 'pickle' supported. Eventually, json or other type may be supported, by default 'pickle'.
Returns
def import_settings(settings_import_path, settings_import_type='instrument', verbose=False)-
Expand source code
def import_settings(settings_import_path, settings_import_type='instrument', verbose=False): """Function to import settings, intended for use with settings saved to disk using export_settings Parameters ---------- settings_import_path : pathlike object Filepath to exported settings document settings_import_type : str, optional What type of settings to export (can be 'instrument' or 'all'), by default 'instrument' verbose : bool, optional Whether to print information to terminal, by default False Returns ------- dict A dictionary containing the function names as keys of internal dictionaries, with key:value pairs for each parameter name:value in that function. """ allList = ['all', ':', 'both', 'any'] if settings_import_type.lower() not in allList: # if just a single settings dict is desired with open(settings_import_path, 'r') as f: settingsDict = json.load(f) else: # Either a directory or list if isinstance(settings_import_path, (list, tuple)): for setPath in settings_import_path: pass else: settings_import_path = sprit_utils._checkifpath(settings_import_path) if not settings_import_path.is_dir(): raise RuntimeError(f'settings_import_type={settings_import_type}, but settings_import_path is not list/tuple or filepath to directory') else: instFile = settings_import_path.glob('*.inst') procFile = settings_import_path.glob('*.proc') return settingsDictFunction to import settings, intended for use with settings saved to disk using export_settings
Parameters
settings_import_path:pathlike object- Filepath to exported settings document
settings_import_type:str, optional- What type of settings to export (can be 'instrument' or 'all'), by default 'instrument'
verbose:bool, optional- Whether to print information to terminal, by default False
Returns
dict- A dictionary containing the function names as keys of internal dictionaries, with key:value pairs for each parameter name:value in that function.
def input_params(input_data,
site='HVSRSite',
project=None,
network='AM',
station='NONE',
location='00',
channels=['EHZ', 'EHN', 'EHE'],
acq_date=None,
starttime=None,
endtime=None,
tzone='UTC',
xcoord=-88.229,
ycoord=40.101,
elevation=225,
input_crs='EPSG:4326',
output_crs=None,
elev_unit='meters',
depth=0,
instrument='Seismometer',
metadata=None,
hvsr_band=[0.5, 40],
peak_freq_range=[0.5, 40],
processing_parameters={},
verbose=False)-
Expand source code
def input_params(input_data, site='HVSRSite', project=None, network='AM', station='NONE', location='00', channels=['EHZ', 'EHN', 'EHE'], acq_date = None, starttime = None, endtime = None, tzone = 'UTC', xcoord = -88.229, ycoord = 40.101, elevation = 225, input_crs = 'EPSG:4326', #Default is WGS84,#4269 is NAD83 output_crs = None, elev_unit = 'meters', depth = 0, instrument = "Seismometer", metadata = None, hvsr_band = DEFAULT_BAND, peak_freq_range = DEFAULT_BAND, processing_parameters={}, verbose=False ): """Function for designating input parameters for reading in and processing data Parameters ---------- input_data : str or pathlib.Path object Filepath of data. This can be a directory or file, but will need to match with what is chosen later as the source parameter in fetch_data() site : str, default="HVSR Site" Site name as designated by user for ease of reference. Used for plotting titles, filenames, etc. project : str, default=None A prefix that may be used to create unique identifiers for each site. The identifier created is saved as the ['HVSR_ID'] attribute of the HVSRData object, and is equivalent to the following formatted string: f"{project}-{acq_date.strftime("%Y%m%d")}-{starttime.strftime("%H%M")}-{station}". network : str, default='AM' The network designation of the seismometer. This is necessary for data from Raspberry Shakes. 'AM' is for Amateur network, which fits Raspberry Shakes. station : str, default='None' The station name of the seismometer. This is necessary for data from Raspberry Shakes. location : str, default='00' Location information of the seismometer. channels : list, default=['EHZ', 'EHN', 'EHE'] The three channels used in this analysis, as a list of strings. Preferred that Z component is first, but not necessary acq_date : str, int, date object, or datetime object If string, preferred format is 'YYYY-MM-DD'. If int, this will be interpreted as the time_int of year of current year (e.g., 33 would be Feb 2 of current year) If date or datetime object, this will be the date. Make sure to account for time change when converting to UTC (if UTC is the following time_int, use the UTC time_int). starttime : str, time object, or datetime object, default='00:00:00.00' Start time of data stream. This is necessary for Raspberry Shake data in 'raw' form, or for trimming data. Format can be either 'HH:MM:SS.micros' or 'HH:MM' at minimum. endtime : str, time obejct, or datetime object, default='23:59:99.99' End time of data stream. This is necessary for Raspberry Shake data in 'raw' form, or for trimming data. Same format as starttime. tzone : str or int, default = 'UTC' Timezone of input data. If string, 'UTC' will use the time as input directly. Any other string value needs to be a TZ identifier in the IANA database, a wikipedia page of these is available here: https://en.wikipedia.org/wiki/List_of_tz_database_time_zones. If int, should be the int value of the UTC offset (e.g., for American Eastern Standard Time: -5). This is necessary for Raspberry Shake data in 'raw' format. xcoord : float, default=-88.2290526 Longitude (or easting, or, generally, X coordinate) of data point, in Coordinate Reference System (CRS) designated by input_crs. Currently only used in table output, but will likely be used in future for mapping/profile purposes. ycoord : float, default=40.1012122 Latitute (or northing, or, generally, X coordinate) of data point, in Coordinate Reference System (CRS) designated by input_crs. Currently only used in table output, but will likely be used in future for mapping/profile purposes. input_crs : str or other format read by pyproj, default='EPSG:4326' Coordinate reference system of input data, as used by pyproj.CRS.from_user_input() output_crs : str or other format read by pyproj, default='EPSG:4326' Coordinate reference system to which input data will be transformed, as used by pyproj.CRS.from_user_input() elevation : float, default=755 Surface elevation of data point. Not currently used (except in table output), but will likely be used in the future. depth : float, default=0 Depth of seismometer. Not currently used, but will likely be used in the future. instrument : str {'Raspberry Shake', "Tromino"} Instrument from which the data was acquired. metadata : str or pathlib.Path object, default=None Filepath of metadata, in format supported by obspy.read_inventory. If default value of None, will read from resources folder of repository (only supported for Raspberry Shake). hvsr_band : list, default=[0.1, 50] Two-element list containing low and high "corner" frequencies (in Hz) for processing. This can specified again later. peak_freq_range : list or tuple, default=[0.1, 50] Two-element list or tuple containing low and high frequencies (in Hz) that are used to check for HVSR Peaks. This can be a tigher range than hvsr_band, but if larger, it will still only use the hvsr_band range. processing_parameters={} : dict or filepath, default={} If filepath, should point to a .proc json file with processing parameters (i.e, an output from sprit.export_settings()). Note that this only applies to parameters for the functions: 'fetch_data', 'remove_noise', 'generate_psds', 'process_hvsr', 'check_peaks', and 'get_report.' If dictionary, dictionary containing nested dictionaries of function names as they key, and the parameter names/values as key/value pairs for each key. If a function name is not present, or if a parameter name is not present, default values will be used. For example: `{ 'fetch_data' : {'source':'batch', 'data_export_path':"/path/to/trimmed/data", 'data_export_format':'mseed', 'detrend':'spline', 'plot_input_stream':True, 'verbose':False, kwargs:{'kwargskey':'kwargsvalue'}} }` verbose : bool, default=False Whether to print output and results to terminal Returns ------- params : sprit.HVSRData sprit.HVSRData class containing input parameters, including data file path and metadata path. This will be used as an input to other functions. If batch processing, params will be converted to batch type in fetch_data() step. """ orig_args = locals().copy() #Get the initial arguments # Record starting time for this function run start_time = datetime.datetime.now() # Record any updates that are made to input_params based update_msg = [] # Reformat times # Date will come out of this block as a string of datetime.date if acq_date is None: date = str(datetime.datetime.now().date()) elif type(acq_date) is datetime.datetime: date = str(acq_date.date()) elif type(acq_date) is datetime.date: date=str(acq_date) elif type(acq_date) is str: monthStrs = {'jan':1, 'january':1, 'feb':2, 'february':2, 'mar':3, 'march':3, 'apr':4, 'april':4, 'may':5, 'jun':6, 'june':6, 'jul':7, 'july':7, 'aug':8, 'august':8, 'sep':9, 'sept':9, 'september':9, 'oct':10,'october':10, 'nov':11,'november':11, 'dec':12,'december':12} spelledMonth = False for m in monthStrs.keys(): acq_date = acq_date.lower() if m in acq_date: spelledMonth = True break if spelledMonth is not False: month = monthStrs[m] if '/' in acq_date: sep = '/' elif '.' in acq_date: sep='.' elif ' ' in acq_date: sep = ' ' acq_date = acq_date.replace(',', '') else: sep = '-' acq_date = acq_date.split(sep) if len(acq_date[2]) > 2: #American format date = '{}-{}-{}'.format(acq_date[2], acq_date[0], acq_date[1]) else: #international format, one we're going to use date = '{}-{}-{}'.format(acq_date[0], acq_date[1], acq_date[2]) elif type(acq_date) is int: year=datetime.datetime.today().year date = str((datetime.datetime(year, 1, 1) + datetime.timedelta(acq_date - 1)).date()) # Starttime will be standardized as string, then converted to UTCDateTime # If not specified, will be set to 00:00 of current UTC date if starttime is None: starttime = obspy.UTCDateTime(NOWTIME.year, NOWTIME.month, NOWTIME.day, 0, 0, 0, 0) elif type(starttime) is str: if 'T' in starttime: #date=starttime.split('T')[0] starttime = starttime.split('T')[1] else: pass #starttime = date+'T'+starttime elif isinstance(starttime, datetime.datetime): starttime = starttime.time() elif type(starttime) is datetime.time(): starttime = str(starttime) if not isinstance(starttime, obspy.UTCDateTime): starttime = str(date)+"T"+str(starttime) starttime = obspy.UTCDateTime(sprit_utils._format_time(starttime, tzone=tzone)) if not isinstance(orig_args['starttime'], obspy.UTCDateTime) or starttime != orig_args['starttime']: update_msg.append(f"\t\tstarttime was updated from {orig_args['starttime']} to {starttime}") # endtime will be standardized as string, then converted to UTCDateTime # If not specified, will be set to 23:59:59.999999 of current UTC date if endtime is None: endtime = obspy.UTCDateTime(NOWTIME.year, NOWTIME.month, NOWTIME.day, 23, 59, 59, 999999) elif type(endtime) is str: if 'T' in endtime: date=endtime.split('T')[0] endtime = endtime.split('T')[1] elif type(endtime) is datetime.datetime: date = str(endtime.date()) endtime = str(endtime.time()) elif type(endtime) is datetime.time(): endtime = str(endtime) if not isinstance(endtime, obspy.UTCDateTime): endtime = str(date)+"T"+str(endtime) endtime = obspy.UTCDateTime(sprit_utils._format_time(endtime, tzone=tzone)) if not isinstance(orig_args['starttime'], obspy.UTCDateTime) or starttime != orig_args['starttime']: update_msg.append(f"\t\tendtime was updated from {orig_args['endtime']} to {endtime}") acq_date = datetime.date(year=int(date.split('-')[0]), month=int(date.split('-')[1]), day=int(date.split('-')[2])) if acq_date != orig_args['acq_date']: update_msg.append(f"\t\tacq_date was updated from {orig_args['acq_date']} to {acq_date}") raspShakeInstNameList = ['raspberry shake', 'shake', 'raspberry', 'rs', 'rs3d', 'rasp. shake', 'raspshake'] # If no CRS specified, assume WGS84 if input_crs is None or input_crs == '': if verbose: update_msg.append(f"\t\tNo value specified for input_crs, assuming WGS84 (EPSG:4326)") input_crs = 'EPSG:4326' if output_crs is None: if verbose: update_msg.append(f"\t\tNo value specified for output_crs, using same coordinate system is input_crs: ({input_crs})") output_crs = input_crs if xcoord is None or xcoord == '': xcoord = 0.0 else: xcoord = float(xcoord) if ycoord is None or ycoord == '': ycoord = 0.0 else: ycoord = float(ycoord) # Get CRS Objects input_crs = CRS.from_user_input(input_crs) output_crs = CRS.from_user_input(output_crs) # We always need latitude and longitude, so specify this regadless of in/output crs # Get WGS84 coordinates (needed for inventory) wgs84_crs = CRS.from_user_input('EPSG:4326') wgs84_transformer = Transformer.from_crs(input_crs, wgs84_crs, always_xy=True) xcoord_wgs84, ycoord_wgs84 = wgs84_transformer.transform(xcoord, ycoord) xcoord_wgs84 = round(xcoord_wgs84, 7) ycoord_wgs84 = round(ycoord_wgs84, 7) update_msg.append(f"\t\tLongitude ({xcoord_wgs84}) and Latitude ({ycoord_wgs84}) calculated for compatibility with obspy.") # Get coordinates in CRS specified in output_crs xcoordIN = xcoord ycoordIN = ycoord coord_transformer = Transformer.from_crs(input_crs, output_crs, always_xy=True) xcoord, ycoord = coord_transformer.transform(xcoordIN, ycoordIN) if isinstance(processing_parameters, dict): pass else: processing_parameters = sprit_utils._checkifpath(processing_parameters) processing_parameters = import_settings(processing_parameters, settings_import_type='processing', verbose=verbose) # Get elevation in meters if str(elev_unit).lower() in ['feet', 'foot', 'ft', 'f', 'imperial', 'imp', 'american', 'us']: elevation = elevation * 0.3048 elev_unit = 'meters' update_msg.append(f"\t\t Elevations are automatically converted to meters during processing") update_msg.append(f"\t\t elevation was updated to {elevation} m (from {orig_args['elevation']} ft)") update_msg.append(f"\t\t elev_unit was also updated to {elev_unit} (from {orig_args['elev_unit']})") # Create a unique identifier for each site if project is None: proj_id = '' else: proj_id = str(project)+'-' hvsr_id = f"{proj_id}{acq_date.strftime('%Y%m%d')}-{starttime.strftime('%H%M')}-{station}" update_msg.append(f"\t\thvsr_id generated from input parameters: {hvsr_id}") #Add key/values to input parameter dictionary for use throughout the rest of the package inputParamDict = {'site':site, 'project':project, 'hvsr_id':hvsr_id, 'network':network, 'station':station,'location':location, 'channels':channels, 'net':network,'sta':station, 'loc':location, 'cha':channels, 'instrument':instrument, 'acq_date':acq_date,'starttime':starttime,'endtime':endtime, 'timezone':'UTC', #Will be in UTC by this point 'xcoord_input':xcoordIN, 'ycoord_input': ycoordIN ,'xcoord':xcoord, 'ycoord':ycoord, 'longitude':xcoord_wgs84,'latitude':ycoord_wgs84, 'elevation':elevation, 'elev_unit':elev_unit, 'input_crs':input_crs, 'output_crs':output_crs, 'depth':depth, 'input_data': input_data, 'metadata':metadata, 'hvsr_band':hvsr_band, 'peak_freq_range':peak_freq_range, 'processing_parameters':processing_parameters, 'processing_status':{'input_params_status':True, 'overall_status':True} } #Replace any default parameter settings with those from json file of interest, potentially instrument_settings_dict = {} if pathlib.Path(str(instrument)).exists(): instrument_settings = import_settings(settings_import_path=instrument, settings_import_type='instrument', verbose=verbose) input_params_args = inspect.getfullargspec(input_params).args input_params_args.append('net') input_params_args.append('sta') for k, settings_value in instrument_settings.items(): if k in input_params_args: instrument_settings_dict[k] = settings_value inputParamDict['instrument_settings'] = inputParamDict['instrument'] inputParamDict.update(instrument_settings_dict) if str(instrument).lower() in raspShakeInstNameList: if metadata is None or metadata=='': metadata = pathlib.Path(str(importlib.resources.files('sprit'))).joinpath('resources').joinpath("rs3dv5plus_metadata.inv").as_posix() inputParamDict['metadata'] = metadata for settingName in instrument_settings_dict.keys(): if settingName in inputParamDict.keys(): inputParamDict[settingName] = instrument_settings_dict[settingName] if verbose: print('Gathering input parameters (input_params())') for key, value in inputParamDict.items(): print('\t {}={}'.format(key, value)) print() update_msg.insert(0, '\tThe following parameters were modified from the raw input:') for msg_line in update_msg: print(msg_line) print() #Format everything nicely params = sprit_utils._make_it_classy(inputParamDict) params['processing_status']['input_params_status'] = True params = sprit_utils._check_processing_status(params, start_time=start_time, func_name=inspect.stack()[0][3], verbose=verbose) return paramsFunction for designating input parameters for reading in and processing data
Parameters
input_data:strorpathlib.Path object- Filepath of data. This can be a directory or file, but will need to match with what is chosen later as the source parameter in fetch_data()
site:str, default="HVSR Site"- Site name as designated by user for ease of reference. Used for plotting titles, filenames, etc.
project:str, default=None- A prefix that may be used to create unique identifiers for each site. The identifier created is saved as the ['HVSR_ID'] attribute of the HVSRData object, and is equivalent to the following formatted string: f"{project}-{acq_date.strftime("%Y%m%d")}-{starttime.strftime("%H%M")}-{station}".
network:str, default='AM'- The network designation of the seismometer. This is necessary for data from Raspberry Shakes. 'AM' is for Amateur network, which fits Raspberry Shakes.
station:str, default='None'- The station name of the seismometer. This is necessary for data from Raspberry Shakes.
location:str, default='00'- Location information of the seismometer.
channels:list, default=['EHZ', 'EHN', 'EHE']- The three channels used in this analysis, as a list of strings. Preferred that Z component is first, but not necessary
acq_date:str, int, date object,ordatetime object- If string, preferred format is 'YYYY-MM-DD'. If int, this will be interpreted as the time_int of year of current year (e.g., 33 would be Feb 2 of current year) If date or datetime object, this will be the date. Make sure to account for time change when converting to UTC (if UTC is the following time_int, use the UTC time_int).
starttime:str, time object,ordatetime object, default='00:00:00.00'- Start time of data stream. This is necessary for Raspberry Shake data in 'raw' form, or for trimming data. Format can be either 'HH:MM:SS.micros' or 'HH:MM' at minimum.
endtime:str, time obejct,ordatetime object, default='23:59:99.99'- End time of data stream. This is necessary for Raspberry Shake data in 'raw' form, or for trimming data. Same format as starttime.
tzone:strorint, default= 'UTC'- Timezone of input data. If string, 'UTC' will use the time as input directly. Any other string value needs to be a TZ identifier in the IANA database, a wikipedia page of these is available here: https://en.wikipedia.org/wiki/List_of_tz_database_time_zones. If int, should be the int value of the UTC offset (e.g., for American Eastern Standard Time: -5). This is necessary for Raspberry Shake data in 'raw' format.
xcoord:float, default=-88.2290526- Longitude (or easting, or, generally, X coordinate) of data point, in Coordinate Reference System (CRS) designated by input_crs. Currently only used in table output, but will likely be used in future for mapping/profile purposes.
ycoord:float, default=40.1012122- Latitute (or northing, or, generally, X coordinate) of data point, in Coordinate Reference System (CRS) designated by input_crs. Currently only used in table output, but will likely be used in future for mapping/profile purposes.
input_crs:strorother format read by pyproj, default='EPSG:4326'- Coordinate reference system of input data, as used by pyproj.CRS.from_user_input()
output_crs:strorother format read by pyproj, default='EPSG:4326'- Coordinate reference system to which input data will be transformed, as used by pyproj.CRS.from_user_input()
elevation:float, default=755- Surface elevation of data point. Not currently used (except in table output), but will likely be used in the future.
depth:float, default=0- Depth of seismometer. Not currently used, but will likely be used in the future.
instrument:str {'Raspberry Shake', "Tromino"}- Instrument from which the data was acquired.
metadata:strorpathlib.Path object, default=None- Filepath of metadata, in format supported by obspy.read_inventory. If default value of None, will read from resources folder of repository (only supported for Raspberry Shake).
hvsr_band:list, default=[0.1, 50]- Two-element list containing low and high "corner" frequencies (in Hz) for processing. This can specified again later.
peak_freq_range:listortuple, default=[0.1, 50]- Two-element list or tuple containing low and high frequencies (in Hz) that are used to check for HVSR Peaks. This can be a tigher range than hvsr_band, but if larger, it will still only use the hvsr_band range.
- processing_parameters={} : dict or filepath, default={}
- If filepath, should point to a .proc json file with processing parameters (i.e, an output from sprit.export_settings()).
- Note that this only applies to parameters for the functions: 'fetch_data', 'remove_noise', 'generate_psds', 'process_hvsr', 'check_peaks', and 'get_report.'
- If dictionary, dictionary containing nested dictionaries of function names as they key, and the parameter names/values as key/value pairs for each key.
- If a function name is not present, or if a parameter name is not present, default values will be used.
- For example:
{ 'fetch_data' : {'source':'batch', 'data_export_path':"/path/to/trimmed/data", 'data_export_format':'mseed', 'detrend':'spline', 'plot_input_stream':True, 'verbose':False, kwargs:{'kwargskey':'kwargsvalue'}} }verbose:bool, default=False- Whether to print output and results to terminal
Returns
params:HVSRData- sprit.HVSRData class containing input parameters, including data file path and metadata path. This will be used as an input to other functions. If batch processing, params will be converted to batch type in fetch_data() step.
def plot_azimuth(hvsr_data,
fig=None,
ax=None,
show_azimuth_peaks=False,
interpolate_azimuths=True,
show_azimuth_grid=False,
show_plot=True,
**plot_azimuth_kwargs)-
Expand source code
def plot_azimuth(hvsr_data, fig=None, ax=None, show_azimuth_peaks=False, interpolate_azimuths=True, show_azimuth_grid=False, show_plot=True, **plot_azimuth_kwargs): """Function to plot azimuths when azimuths are calculated Parameters ---------- hvsr_data : HVSRData or HVSRBatch HVSRData that has gone through at least the sprit.fetch_data() step, and before sprit.generate_psds() show_azimuth_peaks : bool, optional Whether to display the peak value at each azimuth calculated on the chart, by default False interpolate_azimuths : bool, optional Whether to interpolate the azimuth data to get a smoother plot. This is just for visualization, does not change underlying data. It takes a lot of time to process the data, but interpolation for vizualization can happen fairly fast. By default True. show_azimuth_grid : bool, optional Whether to display the grid on the chart, by default False Returns ------- matplotlib.Figure, matplotlib.Axis Figure and axis of resulting azimuth plot """ orig_args = locals().copy() #Get the initial arguments if isinstance(hvsr_data, HVSRBatch): #If running batch, we'll loop through each site for site_name in hvsr_data.keys(): args = orig_args.copy() #Make a copy so we don't accidentally overwrite individual_params = hvsr_data[site_name] #Get what would normally be the "params" variable for each site args['hvsr_data'] = individual_params #reset the params parameter we originally read in to an individual site params if hvsr_data[site_name]['processing_status']['overall_status']: try: hvsr_data['Azimuth_Fig'] = __plot_azimuth_batch(**args) #Call another function, that lets us run this function again except: print(f"ERROR: {site_name} will not have azimuths plotted.") elif isinstance(hvsr_data, HVSRData): if fig is None: fig = plt.figure() hvsr_band = hvsr_data.hvsr_band azDataList = [] azExtraDataList = [] for k in sorted(hvsr_data.hvsr_az.keys()): currData = hvsr_data.hvsr_az[k] azDataList.append(currData) azExtraDataList.append(currData) freq = hvsr_data.x_freqs['Z'].tolist()[1:] a = np.deg2rad(np.array(sorted(hvsr_data.hvsr_az.keys())).astype(float)) b = a + np.pi z = np.array(azDataList) z2 =np.array(azExtraDataList) def interp_along_theta(orig_array, orig_ind): newArrayList = [] for a1 in orig_array.T: # Resample the array along the first dimension using numpy.interp newZ = np.interp( np.linspace(np.pi/180, np.pi, 180), # New indices orig_ind, # Original indices a1) newArrayList.append(newZ) return np.array(newArrayList).T if 'plot_type' in plot_azimuth_kwargs.keys(): if 'i' in plot_azimuth_kwargs['plot_type']: interpolate_azimuths = True if '-i' in plot_azimuth_kwargs['plot_type']: interpolate_azimuths = False if interpolate_azimuths: z = interp_along_theta(z, a) z2 = interp_along_theta(z2, a) a = np.linspace(np.deg2rad(1), np.pi, 180) b = (a + np.pi).tolist() a = a.tolist() r, th = np.meshgrid(freq, a) r2, th2 = np.meshgrid(freq, b) # Set up plot if ax is None: ax = plt.subplot(polar=True) plt.title(hvsr_data['site']) else: plt.sca(ax) plt.semilogy() ax.set_theta_zero_location("N") ax.set_theta_direction(-1) plt.xlim([0, np.pi*2]) plt.ylim([hvsr_band[1], hvsr_band[0]]) # Plot data pmesh1 = plt.pcolormesh(th, r, z, cmap = 'jet') pmesh2 = plt.pcolormesh(th2, r2, z2, cmap = 'jet') azList = ['azimuth', 'az', 'a', 'radial', 'r'] azOpts = [] if 'plot_type' in plot_azimuth_kwargs.keys(): if type(plot_azimuth_kwargs['plot_type']) is str: ptList = plot_azimuth_kwargs['plot_type'].split(' ') elif isinstance(plot_azimuth_kwargs['plot_type'], (list, tuple)): ptList = list(plot_azimuth_kwargs['plot_type']) for az in azList: if az in ptList: azOpts = [item.lower() for item in ptList[ptList.index(az)+1:]] if 'p' in azOpts: show_azimuth_peaks = True if 'g' in azOpts: show_azimuth_grid = True if show_azimuth_peaks: peakVals = [] peakThetas = [] for k in sorted(hvsr_data.hvsr_az.keys()): peakVals.append(hvsr_data.BestPeak[k]['f0']) peakThetas.append(int(k)) peakThetas = peakThetas + (180 + np.array(peakThetas)).tolist() peakThetas = np.deg2rad(peakThetas).tolist() peakVals = peakVals + peakVals peakVals.append(peakVals[0]) peakThetas.append(peakThetas[0]+(np.pi*2)) peakThetas.append(peakThetas[1]+(np.pi*2)) peakThetas = (np.convolve(peakThetas, np.ones(2), 'full')/2).tolist()[1:-1] newThetas = [] newVals = [] for i, p in enumerate(peakThetas): newThetas.append(p) newThetas.append(p) if i == 0: newVals.append(peakVals[-1]) newVals.append(peakVals[-1]) else: newVals.append(peakVals[i]) newVals.append(peakVals[i]) newThetas.insert(0, newThetas[-1]) newThetas.pop() newVals.append(newVals[0]) newThetas.append(newThetas[0]) #peakThetas = newThetas #peakVals = newVals if len(peakThetas) >= 20: alphaVal = 0.2 else: alphaVal = 0.9 - (19/28) plt.scatter(peakThetas, peakVals, marker='h', facecolors='none', edgecolors='k', alpha=alphaVal) #plt.plot(a, r, ls='none', color = 'k') if show_azimuth_grid: plt.grid(visible=show_azimuth_grid, which='both', alpha=0.5) plt.grid(visible=show_azimuth_grid, which='major', c='k', linewidth=1, alpha=1) #plt.colorbar(pmesh1) if show_plot: plt.show() hvsr_data['AzimuthFig'] = fig else: warnings.warn(f'hvsr_data must be of type HVSRData or HVSRBatch, not {type(hvsr_data)}') return fig, axFunction to plot azimuths when azimuths are calculated
Parameters
hvsr_data:HVSRDataorHVSRBatch- HVSRData that has gone through at least the sprit.fetch_data() step, and before sprit.generate_psds()
show_azimuth_peaks:bool, optional- Whether to display the peak value at each azimuth calculated on the chart, by default False
interpolate_azimuths:bool, optional- Whether to interpolate the azimuth data to get a smoother plot. This is just for visualization, does not change underlying data. It takes a lot of time to process the data, but interpolation for vizualization can happen fairly fast. By default True.
show_azimuth_grid:bool, optional- Whether to display the grid on the chart, by default False
Returns
matplotlib.Figure, matplotlib.Axis- Figure and axis of resulting azimuth plot
def plot_hvsr(hvsr_data,
plot_type='HVSR p ann COMP+ p ann SPEC p ann',
azimuth='HV',
use_subplots=True,
fig=None,
ax=None,
return_fig=False,
plot_engine='matplotlib',
save_dir=None,
save_suffix='',
show_legend=False,
show_plot=True,
close_figs=False,
clear_fig=True,
**kwargs)-
Expand source code
def plot_hvsr(hvsr_data, plot_type=DEFAULT_PLOT_STR, azimuth='HV', use_subplots=True, fig=None, ax=None, return_fig=False, plot_engine='matplotlib', save_dir=None, save_suffix='', show_legend=False, show_plot=True, close_figs=False, clear_fig=True,**kwargs): """Function to plot HVSR data Parameters ---------- hvsr_data : dict Dictionary containing output from process_hvsr function plot_type : str or list, default = 'HVSR ann p C+ ann p SPEC ann p' The plot_type of plot(s) to plot. If list, will plot all plots listed - 'HVSR' - Standard HVSR plot, including standard deviation. Options are included below: - 'p' shows a vertical dotted line at frequency of the "best" peak - 'ann' annotates the frequency value of of the "best" peak - 'all' shows all the peaks identified in check_peaks() (by default, only the max is identified) - 't' shows the H/V curve for all time windows - 'tp' shows all the peaks from the H/V curves of all the time windows - 'fr' shows the window within which SpRIT will search for peak frequencies, as set by peak_freq_range - 'test' shows a visualization of the results of the peak validity test(s). Examples: - 'tests' visualizes the results of all the peak tests (not the curve tests) - 'test12' shows the results of tests 1 and 2. - Append any number 1-6 after 'test' to show a specific test result visualized - 'COMP' - plot of the PPSD curves for each individual component ("C" also works) - '+' (as a suffix in 'C+' or 'COMP+') plots C on a plot separate from HVSR (C+ is default, but without + will plot on the same plot as HVSR) - 'p' shows a vertical dotted line at frequency of the "best" peak - 'ann' annotates the frequency value of of the "best" peak - 'all' shows all the peaks identified in check_peaks() (by default, only the max is identified) - 't' shows the H/V curve for all time windows - 'SPEC' - spectrogram style plot of the H/V curve over time - 'p' shows a horizontal dotted line at the frequency of the "best" peak - 'ann' annotates the frequency value of the "best" peak - 'all' shows all the peaks identified in check_peaks() - 'tp' shows all the peaks of the H/V curve at all time windows - 'AZ' - circular plot of calculated azimuthal HV curves, similar in style to SPEC plot. - 'p' shows a point at each calculated (not interpolated) azimuth peak - 'g' shows grid lines at various angles - 'i' interpolates so that there is an interpolated azimuth at each degree interval (1 degree step) This is the default, so usually 'i' is not needed. - '-i' prohibits interpolation (only shows the calculated azimuths, as determined by azimuth_angle (default = 30)) azimuth : str, default = 'HV' What 'azimuth' to plot, default being standard N E components combined use_subplots : bool, default = True Whether to output the plots as subplots (True) or as separate plots (False) fig : matplotlib.Figure, default = None If not None, matplotlib figure on which plot is plotted ax : matplotlib.Axis, default = None If not None, matplotlib axis on which plot is plotted return_fig : bool Whether to return figure and axis objects plot_engine : str, default='Matplotlib' Which engine to use for plotting. Both "matplotlib" and "plotly" are acceptable. For shorthand, 'mpl', 'm' also work for matplotlib; 'plty' or 'p' also work for plotly. Not case sensitive. save_dir : str or None Directory in which to save figures save_suffix : str Suffix to add to end of figure filename(s), if save_dir is used show_legend : bool, default=False Whether to show legend in plot show_plot : bool Whether to show plot close_figs : bool, default=False Whether to close figures before plotting clear_fig : bool, default=True Whether to clear figures before plotting **kwargs : keyword arguments Keyword arguments for matplotlib.pyplot Returns ------- fig, ax : matplotlib figure and axis objects Returns figure and axis matplotlib.pyplot objects if return_fig=True, otherwise, simply plots the figures """ orig_args = locals().copy() #Get the initial arguments if isinstance(hvsr_data, HVSRBatch): #If running batch, we'll loop through each site for site_name in hvsr_data.keys(): args = orig_args.copy() #Make a copy so we don't accidentally overwrite individual_params = hvsr_data[site_name] #Get what would normally be the "params" variable for each site args['hvsr_results'] = individual_params #reset the params parameter we originally read in to an individual site params if hvsr_data[site_name]['processing_status']['overall_status']: try: __hvsr_plot_batch(**args) #Call another function, that lets us run this function again except: print(f"{site_name} not able to be plotted.") return mplList = ['matplotlib', 'mpl', 'm'] plotlyList = ['plotly', 'plty', 'p'] if plot_engine.lower() in plotlyList: plotlyFigure = sprit_plot.plot_results_plotly(hvsr_data, plot_string=plot_type, azimuth=azimuth, results_fig=fig, return_fig=return_fig, use_figure_widget=False, show_results_plot=show_plot) if return_fig: return plotlyFigure else: #plot_engine.lower() in mplList or any other value not in plotly list if clear_fig and fig is not None and ax is not None: #Intended use for tkinter #Clear everything for key in ax: ax[key].clear() for t in fig.texts: del t fig.clear() if close_figs: plt.close('all') # The possible identifiers in plot_type for the different kind of plots hvsrList = ['hvsr', 'hv', 'h'] compList = ['c', 'comp', 'component', 'components'] specgramList = ['spec', 'specgram', 'spectrogram'] azList = ['azimuth', 'az', 'a', 'radial', 'r'] hvsrInd = np.nan compInd = np.nan specInd = np.nan azInd = np.nan plot_type = plot_type.replace(',', '') kList = plot_type.split(' ') for i, k in enumerate(kList): kList[i] = k.lower() # Get the plots in the right order, no matter how they were input (and ensure the right options go with the right plot) # HVSR index if len(set(hvsrList).intersection(kList)): for i, hv in enumerate(hvsrList): if hv in kList: hvsrInd = kList.index(hv) break # Component index #if len(set(compList).intersection(kList)): for i, c in enumerate(kList): if '+' in c and c[:-1] in compList: compInd = kList.index(c) break # Specgram index if len(set(specgramList).intersection(kList)): for i, sp in enumerate(specgramList): if sp in kList: specInd = kList.index(sp) break # Azimuth index if len(set(azList).intersection(kList)): for i, sp in enumerate(azList): if sp in kList: azInd = kList.index(sp) break # Get indices for all plot type indicators indList = [hvsrInd, compInd, specInd, azInd] indListCopy = indList.copy() plotTypeList = ['hvsr', 'comp', 'spec', 'az'] plotTypeOrder = [] plotIndOrder = [] # Get lists with first and last indices of the specifiers for each plot lastVal = 0 while lastVal != 99: firstInd = np.nanargmin(indListCopy) plotTypeOrder.append(plotTypeList[firstInd]) plotIndOrder.append(indList[firstInd]) lastVal = indListCopy[firstInd] indListCopy[firstInd] = 99 #just a high number plotTypeOrder.pop() plotIndOrder[-1] = len(kList) # set up subplots figLayout = 'constrained' figWidth = 6 figHeight = 4 figdpi = 220 for i, p in enumerate(plotTypeOrder): pStartInd = plotIndOrder[i] pEndInd = plotIndOrder[i+1] plotComponents = kList[pStartInd:pEndInd] if use_subplots and i == 0 and fig is None and ax is None: mosaicPlots = [] for pto in plotTypeOrder: if pto == 'az': for i, subp in enumerate(mosaicPlots): if (subp[0].lower() == 'hvsr' or subp[0].lower() == 'comp') and len([item for item in plotTypeOrder if item != "hvsr"]) > 0: mosaicPlots[i].append(subp[0]) mosaicPlots[i].append(subp[0]) else: mosaicPlots[i].append(subp[0]) mosaicPlots[i].append(pto) else: mosaicPlots.append([pto]) perSubPDict = {} if 'az' in plotTypeOrder: perSubPDict['az'] = {'projection':'polar'} fig, ax = plt.subplot_mosaic(mosaicPlots, per_subplot_kw=perSubPDict, layout=figLayout, figsize=(figWidth, figHeight), dpi=figdpi) axis = ax[p] elif use_subplots: with warnings.catch_warnings(): warnings.simplefilter("ignore") #Often warns about xlim when it is not an issue if hasattr(ax, '__len__'):#print(dir(ax), ax, len(ax)) ax[p].clear() axis = ax[p] else: fig, axis = plt.subplots(figsize=(figWidth, figHeight), dpi=figdpi) if p == 'hvsr': kwargs['subplot'] = p fig, ax[p] = _plot_hvsr(hvsr_data, fig=fig, ax=axis, plot_type=plotComponents, azimuth=azimuth, xtype='x_freqs', show_legend=show_legend, axes=ax, **kwargs) elif p == 'comp': plotComponents[0] = plotComponents[0][:-1] kwargs['subplot'] = p minY = 99999 # Start high maxY = -99999 # Start low for key in hvsr_data.psd_raw.keys(): if min(hvsr_data.ppsd_std_vals_m[key]) < minY: minY = min(hvsr_data.ppsd_std_vals_m[key]) if max(hvsr_data.ppsd_std_vals_m[key]) > maxY: maxY = max(hvsr_data.ppsd_std_vals_m[key]) yRange = maxY - minY compYlim = [float(minY - (yRange*0.05)), float(maxY + (yRange * 0.05))] compYlim.reverse() compKwargs = {'ylim':compYlim} compKwargs.update(kwargs) fig, ax[p] = _plot_hvsr(hvsr_data, fig=fig, ax=axis, plot_type=plotComponents, azimuth=azimuth, xtype='x_freqs', show_legend=show_legend, axes=ax, **kwargs) elif p == 'spec': plottypeKwargs = {} for c in plotComponents: plottypeKwargs[c] = True kwargs.update(plottypeKwargs) _plot_specgram_hvsr(hvsr_data, fig=fig, ax=axis, azimuth=azimuth, colorbar=False, **kwargs) elif p == 'az': kwargs['plot_type'] = plotComponents hvsr_data['Azimuth_fig'] = plot_azimuth(hvsr_data, fig=fig, ax=axis, **kwargs) else: warnings.warn('Plot type {p} not recognized', UserWarning) windowsUsedStr = f"{hvsr_data['hvsr_windows_df']['Use'].astype(bool).sum()}/{hvsr_data['hvsr_windows_df'].shape[0]} windows used" winText = fig.text(x=1, y=0.0, s=windowsUsedStr, ha='right', va='bottom', fontsize='xx-small', bbox=dict(facecolor='w', edgecolor=None, linewidth=0, alpha=1, pad=-1)) winText.set_in_layout(False) if len(plotTypeOrder)>1: matplotlib.rcParams["figure.constrained_layout.h_pad"] = 0.075 #if use_subplots: # fig.subplots_adjust()#.set(h_pad=0.075, hspace=-5) if show_plot: fig.canvas.draw() if return_fig: return fig returnFunction to plot HVSR data
Parameters
hvsr_data:dict- Dictionary containing output from process_hvsr function
plot_type:strorlist, default= 'HVSR ann p C+ ann p SPEC ann p'- The plot_type of plot(s) to plot. If list, will plot all plots listed - 'HVSR' - Standard HVSR plot, including standard deviation. Options are included below: - 'p' shows a vertical dotted line at frequency of the "best" peak - 'ann' annotates the frequency value of of the "best" peak - 'all' shows all the peaks identified in check_peaks() (by default, only the max is identified) - 't' shows the H/V curve for all time windows - 'tp' shows all the peaks from the H/V curves of all the time windows - 'fr' shows the window within which SpRIT will search for peak frequencies, as set by peak_freq_range - 'test' shows a visualization of the results of the peak validity test(s). Examples: - 'tests' visualizes the results of all the peak tests (not the curve tests) - 'test12' shows the results of tests 1 and 2. - Append any number 1-6 after 'test' to show a specific test result visualized - 'COMP' - plot of the PPSD curves for each individual component ("C" also works) - '+' (as a suffix in 'C+' or 'COMP+') plots C on a plot separate from HVSR (C+ is default, but without + will plot on the same plot as HVSR) - 'p' shows a vertical dotted line at frequency of the "best" peak - 'ann' annotates the frequency value of of the "best" peak - 'all' shows all the peaks identified in check_peaks() (by default, only the max is identified) - 't' shows the H/V curve for all time windows - 'SPEC' - spectrogram style plot of the H/V curve over time - 'p' shows a horizontal dotted line at the frequency of the "best" peak - 'ann' annotates the frequency value of the "best" peak - 'all' shows all the peaks identified in check_peaks() - 'tp' shows all the peaks of the H/V curve at all time windows - 'AZ' - circular plot of calculated azimuthal HV curves, similar in style to SPEC plot. - 'p' shows a point at each calculated (not interpolated) azimuth peak - 'g' shows grid lines at various angles - 'i' interpolates so that there is an interpolated azimuth at each degree interval (1 degree step) This is the default, so usually 'i' is not needed. - '-i' prohibits interpolation (only shows the calculated azimuths, as determined by azimuth_angle (default = 30))
azimuth:str, default= 'HV'- What 'azimuth' to plot, default being standard N E components combined
use_subplots:bool, default= True- Whether to output the plots as subplots (True) or as separate plots (False)
fig:matplotlib.Figure, default= None- If not None, matplotlib figure on which plot is plotted
ax:matplotlib.Axis, default= None- If not None, matplotlib axis on which plot is plotted
return_fig:bool- Whether to return figure and axis objects
plot_engine:str, default='Matplotlib'- Which engine to use for plotting. Both "matplotlib" and "plotly" are acceptable. For shorthand, 'mpl', 'm' also work for matplotlib; 'plty' or 'p' also work for plotly. Not case sensitive.
save_dir:strorNone- Directory in which to save figures
save_suffix:str- Suffix to add to end of figure filename(s), if save_dir is used
show_legend:bool, default=False- Whether to show legend in plot
show_plot:bool- Whether to show plot
close_figs:bool, default=False- Whether to close figures before plotting
clear_fig:bool, default=True- Whether to clear figures before plotting
**kwargs:keyword arguments- Keyword arguments for matplotlib.pyplot
Returns
fig,ax:matplotlib figure and axis objects- Returns figure and axis matplotlib.pyplot objects if return_fig=True, otherwise, simply plots the figures
def process_hvsr(hvsr_data,
horizontal_method=None,
smooth=True,
freq_smooth='konno ohmachi',
f_smooth_width=40,
resample=True,
outlier_curve_percentile_threshold=False,
azimuth=None,
verbose=False)-
Expand source code
def process_hvsr(hvsr_data, horizontal_method=None, smooth=True, freq_smooth='konno ohmachi', f_smooth_width=40, resample=True, outlier_curve_percentile_threshold=False, azimuth=None, verbose=False): """Process the input data and get HVSR data This is the main function that uses other (private) functions to do the bulk of processing of the HVSR data and the data quality checks. Parameters ---------- hvsr_data : HVSRData or HVSRBatch Data object containing all the parameters input and generated by the user (usually, during sprit.input_params(), sprit.fetch_data(), sprit.generate_psds() and/or sprit.remove_noise()). horizontal_method : int or str, default=3 Method to use for combining the horizontal components. Default is 3) Geometric Mean 0) (not used) 1) 'Diffuse field assumption' H = √( (eie_E + eie_N) / eie_Z), eie = equal interval energy 2) 'Arithmetic Mean' H ≡ (HN + HE)/2 3) 'Geometric Mean' H ≡ √(HN · HE), recommended by the SESAME project (2004) 4) 'Vector Summation' H ≡ √(HN^2 + HE^2) 5) 'Quadratic Mean' H ≡ √(HN^2 + HE^2)/2 6) 'Maximum Horizontal Value' H ≡ max {HN, HE} 7) 'Minimum Horizontal Valey' H ≡ min {HN, HE} 8) 'Single Azimuth' H = H2·cos(az) + H1·sin(az) smooth : bool, default=True bool or int may be used. If True, default to smooth H/V curve to using savgoy filter with window length of 51 (works well with default resample of 1000 pts) If int, the length of the window in the savgoy filter. freq_smooth : str {'konno ohmachi', 'constant', 'proportional'} Which frequency smoothing method to use. By default, uses the 'konno ohmachi' method. - The Konno & Ohmachi method uses the obspy.signal.konnoohmachismoothing.konno_ohmachi_smoothing() function: https://docs.obspy.org/packages/autogen/obspy.signal.konnoohmachismoothing.konno_ohmachi_smoothing.html - The constant method uses a window of constant length f_smooth_width - The proportional method uses a window the percentage length of the frequncy steps/range (f_smooth_width now refers to percentage) See here for more information: https://www.geopsy.org/documentation/geopsy/hv-processing.html f_smooth_width : int, default = 40 - For 'konno ohmachi': passed directly to the bandwidth parameter of the konno_ohmachi_smoothing() function, determines the width of the smoothing peak, with lower values resulting in broader peak. Must be > 0. - For 'constant': the size of a triangular smoothing window in the number of frequency steps - For 'proportional': the size of a triangular smoothing window in percentage of the number of frequency steps (e.g., if 1000 frequency steps/bins and f_smooth_width=40, window would be 400 steps wide) resample : bool, default = True bool or int. If True, default to resample H/V data to include 1000 frequency values for the rest of the analysis If int, the number of data points to interpolate/resample/smooth the component psd/HV curve data to. outlier_curve_percentile_threshold : bool, float, default = False If False, outlier curve removal is not carried out here. If True, defaults to 98 (98th percentile). Otherwise, float of percentile used as outlier_threshold of remove_outlier_curve(). azimuth : float, default = None The azimuth angle to use when method is single azimuth. verbose : bool, defualt=False Whether to print output to terminal Returns ------- hvsr_out : dict Dictionary containing all the information about the data, including input parameters """ orig_args = locals().copy() #Get the initial arguments start_time = datetime.datetime.now() # Update with processing parameters specified previously in input_params, if applicable if 'processing_parameters' in hvsr_data.keys(): if 'process_hvsr' in hvsr_data['processing_parameters'].keys(): update_msg = [] for k, v in hvsr_data['processing_parameters']['process_hvsr'].items(): defaultVDict = dict(zip(inspect.getfullargspec(process_hvsr).args[1:], inspect.getfullargspec(process_hvsr).defaults)) # Manual input to function overrides the imported parameter values if (not isinstance(v, (HVSRData, HVSRBatch))) and (k in orig_args.keys()) and (orig_args[k]==defaultVDict[k]): update_msg.append(f'\t\t{k} = {v} (previously {orig_args[k]})') orig_args[k] = v horizontal_method = orig_args['horizontal_method'] smooth = orig_args['smooth'] freq_smooth = orig_args['freq_smooth'] f_smooth_width = orig_args['f_smooth_width'] resample = orig_args['resample'] outlier_curve_percentile_threshold = orig_args['outlier_curve_percentile_threshold'] verbose = orig_args['verbose'] if (verbose and isinstance(hvsr_data, HVSRBatch)) or (verbose and not hvsr_data['batch']): if isinstance(hvsr_data, HVSRData) and hvsr_data['batch']: pass else: print('\nCalculating Horizontal/Vertical Ratios at all frequencies/time steps (process_hvsr())') print('\tUsing the following parameters:') for key, value in orig_args.items(): if key=='hvsr_data': pass else: print('\t {}={}'.format(key, value)) print() if 'processing_parameters' in hvsr_data.keys() and 'process_hvsr' in hvsr_data['processing_parameters'].keys(): if update_msg != []: update_msg.insert(0, '\tThe following parameters were updated using the processing_parameters attribute:') for msg_line in update_msg: print(msg_line) print() # PROCESSING STARTS HERE (SEPARATE LOOP FOR BATCH) if isinstance(hvsr_data, HVSRBatch): #If running batch, we'll loop through each site hvsr_out = {} for site_name in hvsr_data.keys(): args = orig_args.copy() #Make a copy so we don't accidentally overwrite args['hvsr_data'] = hvsr_data[site_name] #Get what would normally be the "hvsr_data" variable for each site if hvsr_data[site_name]['processing_status']['overall_status']: try: hvsr_out[site_name] = __process_hvsr_batch(**args) #Call another function, that lets us run this function again except: hvsr_out = hvsr_data hvsr_out[site_name]['processing_status']['process_hvsr_status']=False hvsr_out[site_name]['processing_status']['overall_status'] = False else: hvsr_out = hvsr_data hvsr_out[site_name]['processing_status']['process_hvsr_status']=False hvsr_out[site_name]['processing_status']['overall_status'] = False hvsr_out = HVSRBatch(hvsr_out, df_as_read=hvsr_data.input_df) hvsr_out = sprit_utils._check_processing_status(hvsr_out, start_time=start_time, func_name=inspect.stack()[0][3], verbose=verbose) return hvsr_out psds = hvsr_data['psds'].copy()#[k]['psd_values'] psds = sprit_utils._check_xvalues(psds) methodList = ['<placeholder_0>', # 0 'Diffuse Field Assumption', # 1 'Arithmetic Mean', # 2 'Geometric Mean', # 3 'Vector Summation', # 4 'Quadratic Mean', # 5 'Maximum Horizontal Value', # 6 'Minimum Horizontal Value', # 7 'Single Azimuth' ] # 8 x_freqs = {} x_periods = {} psdValsTAvg = {} stDev = {} stDevValsP = {} stDevValsM = {} psdRaw={} currTimesUsed={} hvsr_data['hvsr_windows_df']['Use'] = hvsr_data['hvsr_windows_df']['Use'].astype(bool) hvsrDF = hvsr_data['hvsr_windows_df'] def move_avg(y, box_pts): #box = np.ones(box_pts)/box_pts box = np.hanning(box_pts) y_smooth = np.convolve(y, box, mode='same') / sum(box) return y_smooth resampleList = ['period_bin_centers', 'period_bin_left_edges', 'period_bin_right_edges', 'period_xedges', 'psd_frequencies', 'psd_periods'] for k in psds.keys(): #for ppsdk, ppsdv in psds[k].items(): #print(ppsdk, isinstance(ppsdv, np.ndarray)) #input_ppsds = psds[k]['psd_values'] #original, not used anymore input_ppsds = np.stack(hvsrDF['psd_values_'+k].values) #currPPSDs = hvsrDF['psd_values_'+k][hvsrDF['Use']].values #used_ppsds = np.stack(currPPSDs) xValMin_per = np.round(1/hvsr_data['hvsr_band'][1], 4) xValMax_per = np.round(1/hvsr_data['hvsr_band'][0], 4) # If resampling has been selected... if resample is True or type(resample) is int or type(resample) is float: if resample is True: resample = 1000 #Default smooth value # Resample period bin values x_periods[k] = np.logspace(np.log10(xValMin_per), np.log10(xValMax_per), num=resample) if smooth or isinstance(smooth, (int, float)): if smooth: smooth = 51 #Default smoothing window padVal = 25 elif smooth % 2==0: smooth + 1 #Otherwise, needs to be odd padVal = smooth // 2 if padVal % 2 == 0: padVal += 1 # Resample raw ppsd values for i, ppsd_t in enumerate(input_ppsds): if i==0: psdRaw[k] = np.interp(x_periods[k], psds[k]['period_bin_centers'], ppsd_t) if smooth is not False and smooth is not None: padRawKPad = np.pad(psdRaw[k], [padVal, padVal], mode='reflect') #padRawKPadSmooth = scipy.signal.savgol_filter(padRawKPad, smooth, 3) padRawKPadSmooth = move_avg(padRawKPad, smooth) psdRaw[k] = padRawKPadSmooth[padVal:-padVal] else: psdRaw[k] = np.vstack((psdRaw[k], np.interp(x_periods[k], psds[k]['period_bin_centers'], ppsd_t))) if smooth is not False: padRawKiPad = np.pad(psdRaw[k][i], [padVal, padVal], mode='reflect') #padRawKiPadSmooth = scipy.signal.savgol_filter(padRawKiPad, smooth, 3) padRawKiPadSmooth = move_avg(padRawKiPad, smooth) psdRaw[k][i] = padRawKiPadSmooth[padVal:-padVal] # Resample other values for keys in resampleList: if keys == 'period_bin_centers': baseLength = len(psds[k][keys]) if psds[k][keys].ndim == 1: if psds[k][keys].shape[-1] == baseLength: psds[k][keys] = np.logspace(np.log10(min(psds[k][keys])), np.log10(max(psds[k][keys])), num=resample) else: psds[k][keys] = np.logspace(np.log10(min(psds[k][keys])), np.log10(max(psds[k][keys])), num=resample-1) else: arrList = [] for arr in psds[k][keys]: arrList.append(np.logspace(np.log10(min(arr)), np.log10(max(arr)), num=resample)) psds[k][keys] = np.array(arrList) else: #If no resampling desired x_periods[k] = np.array(psds[k]['period_bin_centers'])#[:-1]#np.round([1/p for p in hvsr_data['psds'][k]['period_xedges'][:-1]], 3) # Clean up edge freq. values x_periods[k][0] = 1/hvsr_data['hvsr_band'][1] x_periods[k][-1] = 1/hvsr_data['hvsr_band'][0] # If simple curve smooothing desired if smooth or isinstance(smooth, (int, float)): if smooth: smooth = 51 #Default smoothing window padVal = 25 elif smooth % 2==0: smooth + 1 #Otherwise, needs to be odd padVal = smooth // 2 if padVal % 2 == 0: padVal += 1 for i, ppsd_t in enumerate(input_ppsds): if i == 0: psdRaw[k] = ppsd_t padRawKPad = np.pad(psdRaw[k], [padVal, padVal], mode='reflect') #padRawKPadSmooth = scipy.signal.savgol_filter(padRawKPad, smooth, 3) padRawKPadSmooth = move_avg(padRawKPad, smooth) psdRaw[k] = padRawKPadSmooth[padVal:-padVal] else: psdRaw[k] = np.vstack((psdRaw[k], ppsd_t)) padRawKiPad = np.pad(psdRaw[k][i], [padVal, padVal], mode='reflect') #padRawKiPadSmooth = scipy.signal.savgol_filter(padRawKiPad, smooth, 3) padRawKiPadSmooth = move_avg(padRawKiPad, smooth) psdRaw[k][i] = padRawKiPadSmooth[padVal:-padVal] else: # If no simple curve smoothing psdRaw[k] = np.array(input_ppsds) hvsrDF['psd_values_'+k] = list(psdRaw[k]) use = hvsrDF['Use'].astype(bool) #Get average psd value across time for each channel (used to calc main H/V curve) psdValsTAvg[k] = np.nanmedian(np.stack(hvsrDF[use]['psd_values_'+k]), axis=0) x_freqs[k] = np.array([1/p for p in x_periods[k]]) #np.divide(np.ones_like(x_periods[k]), x_periods[k]) stDev[k] = np.nanstd(np.stack(hvsrDF[use]['psd_values_'+k]), axis=0) stDevValsM[k] = np.array(psdValsTAvg[k] - stDev[k]) stDevValsP[k] = np.array(psdValsTAvg[k] + stDev[k]) currTimesUsed[k] = np.stack(hvsrDF[use]['TimesProcessed_Obspy']) #currTimesUsed[k] = psds[k]['current_times_used'] #original one #print('XFREQS', x_freqs[k].shape) #print('XPERs', x_periods[k].shape) #print('PSDRAW', psdRaw[k].shape) # Get string of horizontal_method type # First, define default if horizontal_method is None: horizontal_method = 3 # Geometric mean is used as default if nothing is specified # If an azimuth has been calculated and it's only one, automatically use the single azimuth method if len(hvsr_data.stream.merge().select(component='R')) == 1: horizontal_method = 8 # Single azimuth # horizontal_method needs to be str or int # First check if input is a string if type(horizontal_method) is str: if horizontal_method.isdigit(): horizontal_method = int(horizontal_method) elif str(horizontal_method).title() in methodList: horizontal_method = methodList.index(horizontal_method.title()) else: print(f"\tHorizontal method {f} not recognized, reverting to default (geometric mean).\n\tMust be one of {methodList}") horizontal_method = 3 # Now, horizontal_method is int no matter how it was entered methodInt = horizontal_method horizontal_method = methodList[horizontal_method] hvsr_data['horizontal_method'] = horizontal_method #This gets the main hvsr curve averaged from all time steps anyK = list(x_freqs.keys())[0] hvsr_curve, hvsr_az, hvsr_tSteps = __get_hvsr_curve(x=x_freqs[anyK], psd=psdValsTAvg, horizontal_method=methodInt, hvsr_data=hvsr_data, azimuth=azimuth, verbose=verbose) origPPSD = hvsr_data['ppsds_obspy'].copy() #print('hvcurv', np.array(hvsr_curve).shape) #print('hvaz', np.array(hvsr_az).shape) #Add some other variables to our output dictionary hvsr_dataUpdate = {'input_params':hvsr_data, 'x_freqs':x_freqs, 'hvsr_curve':hvsr_curve, 'hvsr_az':hvsr_az, 'x_period':x_periods, 'psd_raw':psdRaw, 'current_times_used': currTimesUsed, 'psd_values_tavg':psdValsTAvg, 'ppsd_std':stDev, 'ppsd_std_vals_m':stDevValsM, 'ppsd_std_vals_p':stDevValsP, 'horizontal_method':horizontal_method, 'psds':psds, 'ppsds_obspy':origPPSD, 'tsteps_used': hvsr_data['tsteps_used'].copy(), 'hvsr_windows_df':hvsr_data['hvsr_windows_df'] } hvsr_out = HVSRData(hvsr_dataUpdate) #This is if manual editing was used (should probably be updated at some point to just use masks) if 'x_windows_out' in hvsr_data.keys(): hvsr_out['x_windows_out'] = hvsr_data['x_windows_out'] else: hvsr_out['x_windows_out'] = [] freq_smooth_ko = ['konno ohmachi', 'konno-ohmachi', 'konnoohmachi', 'konnohmachi', 'ko', 'k'] freq_smooth_constant = ['constant', 'const', 'c'] freq_smooth_proport = ['proportional', 'proportion', 'prop', 'p'] #Frequency Smoothing if not freq_smooth: if verbose: warnings.warn('No frequency smoothing is being applied. This is not recommended for noisy datasets.') elif freq_smooth is True or (freq_smooth.lower() in freq_smooth_ko and (not not f_smooth_width and not not freq_smooth)): from obspy.signal import konnoohmachismoothing for k in hvsr_out['psd_raw']: colName = f'psd_values_{k}' psd_data = np.stack(hvsr_out['hvsr_windows_df'][colName]) psd_data = hvsr_out['psd_raw'][k] freqs = hvsr_out['x_freqs'][k] padding_length = int(f_smooth_width) padding_value_R = np.nanmean(psd_data[:,-1*padding_length:]) padding_value_L = np.nanmean(psd_data[:,:padding_length]) # Pad the data to prevent boundary anamolies padded_ppsd_data = np.pad(psd_data, ((0, 0), (padding_length, padding_length)), 'constant', constant_values=(padding_value_L, padding_value_R)) # Pad the frequencies ratio = freqs[1] / freqs[0] # Generate new elements on either side and combine left_padding = [freqs[0] / (ratio ** i) for i in range(padding_length, 0, -1)] right_padding = [freqs[-1] * (ratio ** i) for i in range(1, padding_length + 1)] padded_freqs = np.concatenate([left_padding, freqs, right_padding]) #Filter out UserWarning for just this method, since it throws up a UserWarning that doesn't really matter about dtypes often with warnings.catch_warnings(): #warnings.simplefilter('ignore', category=UserWarning) padded_ppsd_data = padded_ppsd_data.astype(padded_freqs.dtype) # Make them the same datatype padded_ppsd_data = np.round(padded_ppsd_data, 12) # Prevent overflows padded_freqs = np.round(padded_freqs, 9) smoothed_ppsd_data = konnoohmachismoothing.konno_ohmachi_smoothing(padded_ppsd_data, padded_freqs, bandwidth=f_smooth_width, normalize=True) # Only use the original, non-padded data smoothed_ppsd_data = smoothed_ppsd_data[:,padding_length:-1*padding_length] hvsr_out['psd_raw'][k] = smoothed_ppsd_data hvsr_out['hvsr_windows_df'][colName] = pd.Series(list(smoothed_ppsd_data), index=hvsr_out['hvsr_windows_df'].index) elif freq_smooth.lower() in freq_smooth_constant: hvsr_out = __freq_smooth_window(hvsr_out, f_smooth_width, kind_freq_smooth='constant') elif freq_smooth.lower() in freq_smooth_proport: hvsr_out = __freq_smooth_window(hvsr_out, f_smooth_width, kind_freq_smooth='proportional') else: if verbose: warnings.warn(f'You indicated no frequency smoothing should be applied (freq_smooth = {freq_smooth}). This is not recommended for noisy datasets.') #Get hvsr curve from three components at each time step anyK = list(hvsr_out['psd_raw'].keys())[0] if horizontal_method==1 or horizontal_method =='dfa' or horizontal_method =='Diffuse Field Assumption': hvsr_tSteps_az = {} else: hvsr_tSteps = [] hvsr_tSteps_az = {} for tStep in range(len(hvsr_out['psd_raw'][anyK])): tStepDict = {} for k in hvsr_out['psd_raw']: tStepDict[k] = hvsr_out['psd_raw'][k][tStep] hvsr_tstep, hvsr_az_tstep, _ = __get_hvsr_curve(x=hvsr_out['x_freqs'][anyK], psd=tStepDict, horizontal_method=methodInt, hvsr_data=hvsr_out, verbose=verbose) hvsr_tSteps.append(np.float64(hvsr_tstep)) #Add hvsr curve for each time step to larger list of arrays with hvsr_curves for k, v in hvsr_az_tstep.items(): if tStep == 0: hvsr_tSteps_az[k] = [np.float32(v)] else: hvsr_tSteps_az[k].append(np.float32(v)) hvsr_out['hvsr_windows_df']['HV_Curves'] = hvsr_tSteps # Add azimuth HV Curves to hvsr_windows_df, if applicable for key, values in hvsr_tSteps_az.items(): hvsr_out['hvsr_windows_df']['HV_Curves_'+key] = values hvsr_out['ind_hvsr_curves'] = {} for col_name in hvsr_out['hvsr_windows_df']: if "HV_Curves" in col_name: if col_name == 'HV_Curves': colID = 'HV' else: colID = col_name.split('_')[2] hvsr_out['ind_hvsr_curves'][colID] = np.stack(hvsr_out['hvsr_windows_df'][hvsr_out['hvsr_windows_df']['Use']][col_name]) #Initialize array based only on the curves we are currently using indHVCurvesArr = np.stack(hvsr_out['hvsr_windows_df']['HV_Curves'][hvsr_out['hvsr_windows_df']['Use']]) if outlier_curve_percentile_threshold: if outlier_curve_percentile_threshold is True: outlier_curve_percentile_threshold = 98 hvsr_out = remove_outlier_curves(hvsr_out, use_percentile=True, outlier_threshold=outlier_curve_percentile_threshold, use_hv_curves=True, verbose=verbose) hvsr_out['ind_hvsr_stdDev'] = {} for col_name in hvsr_out['hvsr_windows_df'].columns: if "HV_Curves" in col_name: if col_name == 'HV_Curves': keyID = 'HV' else: keyID = col_name.split('_')[2] curr_indHVCurvesArr = np.stack(hvsr_out['hvsr_windows_df'][col_name][hvsr_out['hvsr_windows_df']['Use']]) hvsr_out['ind_hvsr_stdDev'][keyID] = np.nanstd(curr_indHVCurvesArr, axis=0) #Get peaks for each time step hvsr_out['ind_hvsr_peak_indices'] = {} tStepPFDict = {} #hvsr_out['hvsr_windows_df']['CurvesPeakFreqs'] = {} for col_name in hvsr_out['hvsr_windows_df'].columns: if col_name.startswith("HV_Curves"): tStepPeaks = [] if len(col_name.split('_')) > 2: colSuffix = "_"+'_'.join(col_name.split('_')[2:]) else: colSuffix = '_HV' for tStepHVSR in hvsr_out['hvsr_windows_df'][col_name]: tStepPeaks.append(__find_peaks(tStepHVSR)) hvsr_out['ind_hvsr_peak_indices']['CurvesPeakIndices'+colSuffix] = tStepPeaks tStepPFList = [] for tPeaks in tStepPeaks: tStepPFs = [] for pInd in tPeaks: tStepPFs.append(np.float32(hvsr_out['x_freqs'][anyK][pInd])) tStepPFList.append(tStepPFs) tStepPFDict['CurvesPeakFreqs'+colSuffix] = tStepPFList indHVPeakIndsDF = pd.DataFrame(hvsr_out['ind_hvsr_peak_indices'], index=hvsr_out['hvsr_windows_df'].index) tStepPFDictDF = pd.DataFrame(tStepPFDict, index=hvsr_out['hvsr_windows_df'].index) for col in indHVPeakIndsDF.columns: hvsr_out['hvsr_windows_df'][col] = indHVPeakIndsDF.loc[:, col] for col in tStepPFDictDF.columns: hvsr_out['hvsr_windows_df'][col] = tStepPFDictDF.loc[:, col] #Get peaks of main HV curve hvsr_out['hvsr_peak_indices'] = {} hvsr_out['hvsr_peak_indices']['HV'] = __find_peaks(hvsr_out['hvsr_curve']) for k in hvsr_az.keys(): hvsr_out['hvsr_peak_indices'][k] = __find_peaks(hvsr_out['hvsr_az'][k]) #Get frequency values at HV peaks in main curve hvsr_out['hvsr_peak_freqs'] = {} for k in hvsr_out['hvsr_peak_indices'].keys(): hvsrPF = [] for p in hvsr_out['hvsr_peak_indices'][k]: hvsrPF.append(hvsr_out['x_freqs'][anyK][p]) hvsr_out['hvsr_peak_freqs'][k] = np.array(hvsrPF) #Get other HVSR parameters (i.e., standard deviations, etc.) hvsr_out = __gethvsrparams(hvsr_out) #Include the original obspy stream in the output hvsr_out['input_stream'] = hvsr_dataUpdate['input_params']['input_stream'] #input_stream hvsr_out = sprit_utils._make_it_classy(hvsr_out) hvsr_out['processing_status']['process_hvsr_status'] = True if 'processing_parameters' not in hvsr_out.keys(): hvsr_out['processing_parameters'] = {} hvsr_out['processing_parameters']['process_hvsr'] = {} exclude_params_list = ['hvsr_data'] for key, value in orig_args.items(): if key not in exclude_params_list: hvsr_out['processing_parameters']['process_hvsr'][key] = value if str(horizontal_method) == '8' or horizontal_method.lower() == 'single azimuth': if azimuth is None: azimuth = 90 hvsr_out['single_azimuth'] = azimuth hvsr_out = sprit_utils._check_processing_status(hvsr_out, start_time=start_time, func_name=inspect.stack()[0][3], verbose=verbose) return hvsr_outProcess the input data and get HVSR data
This is the main function that uses other (private) functions to do the bulk of processing of the HVSR data and the data quality checks.
Parameters
- hvsr_data : HVSRData or HVSRBatch
- Data object containing all the parameters input and generated by the user (usually, during sprit.input_params(), sprit.fetch_data(), sprit.generate_psds() and/or sprit.remove_noise()).
- horizontal_method : int or str, default=3
- Method to use for combining the horizontal components. Default is 3) Geometric Mean
- 0) (not used)
- 1) 'Diffuse field assumption' H = √( (eie_E + eie_N) / eie_Z), eie = equal interval energy
- 2) 'Arithmetic Mean' H ≡ (HN + HE)/2
- 3) 'Geometric Mean' H ≡ √(HN · HE), recommended by the SESAME project (2004)
- 4) 'Vector Summation' H ≡ √(HN^2 + HE^2)
- 5) 'Quadratic Mean' H ≡ √(HN^2 + HE^2)/2
- 6) 'Maximum Horizontal Value' H ≡ max
- 7) 'Minimum Horizontal Valey' H ≡ min
- 8) 'Single Azimuth' H = H2·cos(az) + H1·sin(az)
- smooth : bool, default=True
- bool or int may be used.
- If True, default to smooth H/V curve to using savgoy filter with window length of 51 (works well with default resample of 1000 pts)
- If int, the length of the window in the savgoy filter.
freq_smooth:str {'konno ohmachi', 'constant', 'proportional'}- Which frequency smoothing method to use. By default, uses the 'konno ohmachi' method. - The Konno & Ohmachi method uses the obspy.signal.konnoohmachismoothing.konno_ohmachi_smoothing() function: https://docs.obspy.org/packages/autogen/obspy.signal.konnoohmachismoothing.konno_ohmachi_smoothing.html - The constant method uses a window of constant length f_smooth_width - The proportional method uses a window the percentage length of the frequncy steps/range (f_smooth_width now refers to percentage) See here for more information: https://www.geopsy.org/documentation/geopsy/hv-processing.html
f_smooth_width:int, default= 40-
- For 'konno ohmachi': passed directly to the bandwidth parameter of the konno_ohmachi_smoothing() function, determines the width of the smoothing peak, with lower values resulting in broader peak. Must be > 0.
- For 'constant': the size of a triangular smoothing window in the number of frequency steps
- For 'proportional': the size of a triangular smoothing window in percentage of the number of frequency steps (e.g., if 1000 frequency steps/bins and f_smooth_width=40, window would be 400 steps wide)
- resample : bool, default = True
- bool or int.
- If True, default to resample H/V data to include 1000 frequency values for the rest of the analysis
- If int, the number of data points to interpolate/resample/smooth the component psd/HV curve data to.
outlier_curve_percentile_threshold:bool, float, default= False- If False, outlier curve removal is not carried out here. If True, defaults to 98 (98th percentile). Otherwise, float of percentile used as outlier_threshold of remove_outlier_curve().
azimuth:float, default= None- The azimuth angle to use when method is single azimuth.
verbose:bool, defualt=False- Whether to print output to terminal
Returns
hvsr_out : dict Dictionary containing all the information about the data, including input parameters def read_tromino_files(input_data,
struct_format='H',
tromino_model=None,
diagnose=False,
sampling_rate=None,
set_record_duration=None,
start_byte=24576,
verbose=False,
**kwargs)-
Expand source code
def read_tromino_files(input_data, struct_format='H', tromino_model=None, diagnose=False, sampling_rate=None, set_record_duration=None, start_byte=24576, verbose=False, **kwargs): """Function to read data from tromino. Specifically, this has been lightly tested on Tromino 3G+ and Blue machines Parameters ---------- input_data : str Falseilepath to .trc file struct_format : str, optional This is the format used in the struct module. Usually should not be changed, by default 'H' tromino_model : str, optional Which tromino model is being read. Currently only "Yellow" and "Blue" are supported. If None, assumes "Yellow", by default None. sampling_rate : int, optional Sampling rate (samples per second) used during acquisition. This may later be detected automatically. If None, 128 used, by default None set_record_duration : int, optional Duration of record to set manually in minutes, by default None start_byte : int, optional Used internally, by default 24576 verbose : bool, optional Whether to print information to terminal, by default False Returns ------- obspy.stream.Stream Obspy Stream object with Tromino data """ blueModelList = ['blue', 'blu', 'tromino blu', 'tromino blue'] # Check if input_data is HVSRData object and extract filepath if so input_filepath = input_data if isinstance(input_data, HVSRData): input_filepath = input_data['input_data'] # Allow reading of tromino partition folders (and get the .trc file inside), not just .trc file if pathlib.Path(input_filepath).is_dir(): trDirGlob = pathlib.Path(input_filepath).glob('*trc') for trcFile in trDirGlob: input_data = trcFile if verbose: print(f'\t Input file updated to {pathlib.Path(input_filepath).name} in specified directory.') if str(tromino_model).lower() in blueModelList or 'blue' in str(tromino_model).lower(): tBlueKwargs = {k: v for k, v in kwargs.items() if k in tuple(inspect.signature(__read_tromino_data_blue).parameters.keys())} if 'sampling_rate' not in tBlueKwargs: tBlueKwargs['sampling_rate'] = sampling_rate return __read_tromino_data_blue(input_filepath, verbose=False, **tBlueKwargs) else: return __read_tromino_data_yellow(input_data=input_data, sampling_rate=sampling_rate, struct_format=struct_format, tromino_model="3G+",diagnose=diagnose, set_record_duration=set_record_duration, start_byte=start_byte, return_dict=False, verbose=verbose, **kwargs)Function to read data from tromino. Specifically, this has been lightly tested on Tromino 3G+ and Blue machines
Parameters
input_data:str- Falseilepath to .trc file
struct_format:str, optional- This is the format used in the struct module. Usually should not be changed, by default 'H'
tromino_model:str, optional- Which tromino model is being read. Currently only "Yellow" and "Blue" are supported. If None, assumes "Yellow", by default None.
sampling_rate:int, optional- Sampling rate (samples per second) used during acquisition. This may later be detected automatically. If None, 128 used, by default None
set_record_duration:int, optional- Duration of record to set manually in minutes, by default None
start_byte:int, optional- Used internally, by default 24576
verbose:bool, optional- Whether to print information to terminal, by default False
Returns
obspy.stream.Stream- Obspy Stream object with Tromino data
def remove_noise(hvsr_data,
remove_method=None,
processing_window=None,
sat_percent=0.995,
noise_percent=0.8,
sta=2,
lta=30,
stalta_thresh=[8, 16],
std_ratio_thresh=2.0,
std_window_size=20.0,
min_std_win=5.0,
warmup_time=0,
cooldown_time=0,
min_win_size=1,
remove_raw_noise=False,
show_stalta_plot=False,
verbose=False)-
Expand source code
def remove_noise(hvsr_data, remove_method=None, processing_window=None, sat_percent=0.995, noise_percent=0.80, sta=2, lta=30, stalta_thresh=[8, 16], std_ratio_thresh=2.0, std_window_size=20.0, min_std_win=5.0, warmup_time=0, cooldown_time=0, min_win_size=1, remove_raw_noise=False, show_stalta_plot=False, verbose=False): """Function to remove noisy windows from data, using various methods. Methods include - Manual window selection (by clicking on a chart with spectrogram and stream data), - Auto window selection, which does the following two in sequence (these can also be done indepently): - A sta/lta "antitrigger" method (using stalta values to automatically remove triggered windows where there appears to be too much noise) - A noise threshold method, that cuts off all times where the noise threshold equals more than (by default) 80% of the highest amplitude noise sample for the length specified by lta (in seconds) - A saturation threshold method, that cuts off all times where the noise threshold equals more than (by default) 99.5% of the highest amplitude noise sample. Parameters ---------- hvsr_data : dict, obspy.Stream, or obspy.Trace Dictionary containing all the data and parameters for the HVSR analysis remove_method : str, {'auto', 'manual', 'stalta'/'antitrigger', 'saturation threshold', 'noise threshold', 'warmup'/'cooldown'/'buffer'/'warm_cool'} The different methods for removing noise from the dataset. A list of strings will also work, in which case, it should be a list of the above strings. See descriptions above for what how each method works. By default 'auto.' If remove_method='auto', this is the equivalent of remove_method=['noise threshold', 'antitrigger', 'saturation threshold', 'warm_cool'] processing_window : list, tuple, or None A list/tuple of two items [s, e] or a list/tuple of two-item lists/tuples [[s0, e0], [s1,e1],...[sn, en]] with start and end time(s) for windows to *keep* for processing. Data outside of these times will be excluded from processing. Times should be obspy.UTCDateTime objects to ensure precision, but time strings ("13:05") will also work in most cases (excpetions may be when the data stream starts/ends on different UTC days) sat_percent : float, default=0.995 Percentage (between 0 and 1), to use as the threshold at which to remove data. This is used in the saturation method. By default 0.995. If a value is passed that is greater than 1, it will be divided by 100 to obtain the percentage. noise_percent : float, default = 0.8 Percentage (between 0 and 1), to use as the threshold at which to remove data, if it persists for longer than time (in seconds (specified by min_win_size)). This is used in the noise threshold method. By default 0.8. If a value is passed that is greater than 1, it will be divided by 100 to obtain the percentage. sta : int, optional Short term average (STA) window (in seconds), by default 2. For use with sta/lta antitrigger method. lta : int, optional Long term average (STA) window (in seconds), by default 30. For use with sta/lta antitrigger method. stalta_thresh : list, default=[0.5,5] Two-item list or tuple with the thresholds for the stalta antitrigger. The first value (index [0]) is the lower threshold, the second value (index [1] is the upper threshold), by default [0.5,5] std_ratio_thresh : float, optional The ratio to use as a threshold for removal of noise. The ratio represents the standard deviation value for a rolling window (the size of which is determined by the std_window_size parameter) divided by the standard deviation calculated for the entire trace. This rolling window standard deviation method is similar to the default noise removal method used by the Grilla HVSR software. std_window_size : float, optional The length of the window (in seconds) to use for calculating the rolling/moving standard deviation of a trace for the rolling standard deviation method. min_std_win : float, optional The minimum size of "window" that will be remove using the rolling standard deviation method. warmup_time : int, default=0 Time in seconds to allow for warmup of the instrument (or while operator is still near instrument). This will renove any data before this time, by default 0. cooldown_time : int, default=0 Time in seconds to allow for cooldown of the instrument (or for when operator is nearing instrument). This will renove any data before this time, by default 0. min_win_size : float, default=1 The minumum size a window must be over specified threshold (in seconds) for it to be removed remove_raw_noise : bool, default=False If remove_raw_noise=True, will perform operation on raw data ('input_stream'), rather than potentially already-modified data ('stream'). verbose : bool, default=False Whether to print status of remove_noise Returns ------- output : dict Dictionary similar to hvsr_data, but containing modified data with 'noise' removed """ #Get intput paramaters orig_args = locals().copy() start_time = datetime.datetime.now() # Update with processing parameters specified previously in input_params, if applicable if 'processing_parameters' in hvsr_data.keys(): if 'remove_noise' in hvsr_data['processing_parameters'].keys(): update_msg = [] for k, v in hvsr_data['processing_parameters']['remove_noise'].items(): defaultVDict = dict(zip(inspect.getfullargspec(remove_noise).args[1:], inspect.getfullargspec(remove_noise).defaults)) # Manual input to function overrides the imported parameter values if (not isinstance(v, (HVSRData, HVSRBatch))) and (k in orig_args.keys()) and (orig_args[k]==defaultVDict[k]): update_msg.append(f'\t\t{k} = {v} (previously {orig_args[k]})') orig_args[k] = v remove_method = orig_args['remove_method'] processing_window = orig_args['processing_window'] sat_percent = orig_args['sat_percent'] noise_percent = orig_args['noise_percent'] sta = orig_args['sta'] lta = orig_args['lta'] stalta_thresh = orig_args['stalta_thresh'] warmup_time = orig_args['warmup_time'] cooldown_time = orig_args['cooldown_time'] min_win_size = orig_args['min_win_size'] remove_raw_noise = orig_args['remove_raw_noise'] verbose = orig_args['verbose'] if (verbose and isinstance(hvsr_data, HVSRBatch)) or (verbose and not hvsr_data['batch']): if isinstance(hvsr_data, HVSRData) and hvsr_data['batch']: pass else: print('\nRemoving noisy data windows (remove_noise())') print('\tUsing the following parameters:') for key, value in orig_args.items(): if key=='hvsr_data': pass else: print('\t {}={}'.format(key, value)) print() if 'processing_parameters' in hvsr_data.keys() and 'remove_noise' in hvsr_data['processing_parameters'].keys(): if update_msg != []: update_msg.insert(0, '\tThe following parameters were updated using the processing_parameters attribute:') for msg_line in update_msg: print(msg_line) print() # Set up lists manualList = ['manual', 'man', 'm', 'window', 'windows', 'w'] autoList = ['auto', 'automatic', 'all', 'a'] antitrigger = ['stalta', 'anti', 'antitrigger', 'trigger', 'at'] movingstdList = ['moving_std', 'std', 'stdev', 'standard deviation', 'stdev', 'moving_stdev', 'movingstd', 'movingstdev'] saturationThresh = ['saturation threshold', 'sat_thresh', 'sat thresh', 'saturation', 'sat', 's'] noiseThresh = ['noise threshold', 'noise thresh', 'noise_thresh', 'noise', 'threshold', 'n'] warmup_cooldown=['warmup', 'cooldown', 'warm', 'cool', 'buffer', 'warmup-cooldown', 'warmup_cooldown', 'wc', 'warm_cool', 'warm-cool'] procWinList = ['processing_window', 'processing window', 'windows', 'window', 'win', 'pw'] # Do batch runs if isinstance(hvsr_data, HVSRBatch): #If running batch, we'll loop through each site hvsr_out = {} for site_name in hvsr_data.keys(): args = orig_args.copy() #Make a copy so we don't accidentally overwrite args['hvsr_data'] = hvsr_data[site_name] #Get what would normally be the "hvsr_data" variable for each site if hvsr_data[site_name]['processing_status']['overall_status']: try: hvsr_out[site_name] = __remove_noise_batch(**args) #Call another function, that lets us run this function again except Exception as e: hvsr_out[site_name]['processing_status']['remove_noise_status']=False hvsr_out[site_name]['processing_status']['overall_status']=False if verbose: print(e) else: hvsr_data[site_name]['processing_status']['remove_noise_status']=False hvsr_data[site_name]['processing_status']['overall_status']=False hvsr_out = hvsr_data output = HVSRBatch(hvsr_out, df_as_read=hvsr_data.input_df) return output if not isinstance(hvsr_data, (HVSRData, dict, obspy.Stream, obspy.Trace)): warnings.warn(f"Input of type type(hvsr_data)={type(hvsr_data)} cannot be used.") return hvsr_data # Which stream to use (input, or current) if isinstance(hvsr_data, (HVSRData, dict)): if remove_raw_noise: inStream = hvsr_data['input_stream'].copy() else: inStream = hvsr_data['stream'].copy() output = hvsr_data#.copy() else: inStream = hvsr_data.copy() output = inStream.copy() outStream = inStream # Get remove_method into consistent format (list) if isinstance(remove_method, str): if ',' in remove_method: remove_method = remove_method.split(',') else: remove_method = [remove_method] elif isinstance(remove_method, (list, tuple)): pass elif not remove_method: remove_method=[None] else: warnings.warn(f"Input value remove_method={remove_method} must be either string, list of strings, None, or False. No noise removal will be carried out. Please choose one of the following: 'manual', 'auto', 'antitrigger', 'noise threshold', 'warmup_cooldown'.") return output orig_removeMeth = remove_method # Check if any parameter values are different from default (if they are, automatically add that method to remove_method) rn_signature = inspect.signature(remove_noise) methodDict = {'moving_std': ['std_ratio_thresh', 'std_window_size', 'min_std_win'], 'sat_thresh': ['sat_percent'], 'antitrigger': ['sta', 'lta', 'stalta_thresh', 'show_stalta_plot'], 'noise_thresh': ['noise_percent', 'min_win_size'], 'warmup_cooldown': ['warmup_time', 'cooldown_time'], 'processing_window': ['processing_window']} defaultValDict = {param.name: param.default for param in rn_signature.parameters.values() if param.default is not inspect.Parameter.empty} # If a non-default parameter is specified, add the method it corresponds to to remove_method for key, def_val in defaultValDict.items(): if key in orig_args: if def_val != orig_args[key]: for methodKey, methParamList in methodDict.items(): if key in methParamList: # Add the corresponding method to remove_mehtod if not already if (methodKey not in remove_method) and ('auto' not in remove_method): if remove_method == [None]: remove_method = [methodKey] else: remove_method.append(methodKey) # Reorder list so manual is always first, if it is specified do_manual = False if len(set(remove_method).intersection(manualList)) > 0: do_manual = True manInd = list(set(remove_method).intersection(manualList))[0] remove_method.remove(manInd) remove_method.insert(0, manInd) # Reorder list so auto is always first (if no manual) or second (if manual) # B/c if 'auto' is carried out, no other methods need to be carried out (repetitive) newAutoInd = 0 if do_manual: newAutoInd = 1 if len(set(remove_method).intersection(autoList)) > 0: autoInd = list(set(remove_method).intersection(autoList))[0] remove_method.remove(autoInd) remove_method.insert(newAutoInd, autoInd) #Go through each type of removal and remove if orig_removeMeth != remove_method: if verbose: print(f'\tThe remove_method parameter has been updated because non-default parameter values were detected.') print(f'\tThe remove_method parameter was entered as {orig_removeMeth}, but has been updated to {remove_method}') # REMOVE DATA FROM ANALYSIS for rem_kind in remove_method: try: if not rem_kind: break elif rem_kind.lower() in manualList: if isinstance(output, (HVSRData, dict)): if 'x_windows_out' in output.keys(): pass else: output = _select_windows(output) window_list = output['x_windows_out'] if isinstance(outStream, obspy.core.stream.Stream): if window_list is not None: output['stream'] = __remove_windows(inStream, window_list, warmup_time) else: output = _select_windows(output) elif isinstance(output, (HVSRData, dict)): pass else: RuntimeError("Only obspy.core.stream.Stream data type is currently supported for manual noise removal method.") elif rem_kind.lower() in autoList: outStream = __remove_moving_std(stream=outStream, std_ratio_thresh=std_ratio_thresh, std_window_s=std_window_size, min_win_size=min_std_win, verbose=verbose) outStream = __remove_noise_saturate(outStream, sat_percent=sat_percent, min_win_size=min_win_size, verbose=verbose) # Break for-loop, since all the rest are already done as part of auto break elif rem_kind.lower() in antitrigger: outStream = __remove_anti_stalta(outStream, sta=sta, lta=lta, thresh=stalta_thresh, show_stalta_plot=show_stalta_plot, verbose=verbose) elif rem_kind.lower() in movingstdList: outStream = __remove_moving_std(stream=outStream, std_ratio_thresh=std_ratio_thresh, std_window_s=std_window_size, min_win_size=min_std_win, verbose=verbose) elif rem_kind.lower() in saturationThresh: outStream = __remove_noise_saturate(outStream, sat_percent=sat_percent, min_win_size=min_win_size, verbose=verbose) elif rem_kind.lower() in noiseThresh: outStream = __remove_noise_thresh(outStream, noise_percent=noise_percent, lta=lta, min_win_size=min_win_size, verbose=verbose) elif rem_kind.lower() in warmup_cooldown: outStream = __remove_warmup_cooldown(stream=outStream, warmup_time=warmup_time, cooldown_time=cooldown_time, verbose=verbose) elif rem_kind.lower() in procWinList and str(processing_window).lower() != 'none': outStream = _keep_processing_windows(stream=outStream, processing_window=processing_window, verbose=verbose) else: if len(remove_method)==1: warnings.warn(f"Input value remove_method={remove_method} is not recognized. No noise removal will be carried out. Please choose one of the following: 'manual', 'auto', 'antitrigger', 'noise threshold', 'warmup_cooldown'.") break warnings.warn(f"Input value remove_method={remove_method} is not recognized. Continuing with other noise removal methods.") except Exception as e: print(f'\t *Error with {rem_kind} method. Data was not removed using that method.') print(f'\t *{e}') # Add output if isinstance(output, (HVSRData, dict)): if isinstance(outStream, (obspy.Stream, obspy.Trace)): output['stream_edited'] = outStream else: output['stream_edited'] = outStream['stream'] output['input_stream'] = hvsr_data['input_stream'] if 'processing_parameters' not in output.keys(): output['processing_parameters'] = {} output['processing_parameters']['remove_noise'] = {} for key, value in orig_args.items(): output['processing_parameters']['remove_noise'][key] = value output['processing_status']['remove_noise_status'] = True output = sprit_utils._check_processing_status(output, start_time=start_time, func_name=inspect.stack()[0][3], verbose=verbose) output = __remove_windows_from_df(output, verbose=verbose) #if 'hvsr_windows_df' in output.keys() or ('params' in output.keys() and 'hvsr_windows_df' in output['params'].keys())or ('input_params' in output.keys() and 'hvsr_windows_df' in output['input_params'].keys()): # hvsrDF = output['hvsr_windows_df'] # # outStream = output['stream_edited'].split() # for i, trace in enumerate(outStream): # if i == 0: # trEndTime = trace.stats.endtime # comp_end = trace.stats.component # continue # trStartTime = trace.stats.starttime # comp_start = trace.stats.component # if trEndTime < trStartTime and comp_end == comp_start: # gap = [trEndTime,trStartTime] # output['hvsr_windows_df']['Use'] = (hvsrDF['TimesProcessed_Obspy'].gt(gap[0]) & hvsrDF['TimesProcessed_Obspy'].gt(gap[1]) )| \ # (hvsrDF['TimesProcessed_ObspyEnd'].lt(gap[0]) & hvsrDF['TimesProcessed_ObspyEnd'].lt(gap[1]))# | \ # output['hvsr_windows_df']['Use'] = output['hvsr_windows_df']['Use'].astype(bool) # # trEndTime = trace.stats.endtime # # outStream.merge() # output['stream_edited'] = outStream elif isinstance(hvsr_data, obspy.Stream) or isinstance(hvsr_data, obspy.Trace): output = outStream else: warnings.warn(f"Output of type {type(output)} for this function will likely result in errors in other processing steps. Returning hvsr_data data.") return hvsr_data output = sprit_utils._make_it_classy(output) if 'x_windows_out' not in output.keys(): output['x_windows_out'] = [] return outputFunction to remove noisy windows from data, using various methods.
Methods include - Manual window selection (by clicking on a chart with spectrogram and stream data), - Auto window selection, which does the following two in sequence (these can also be done indepently): - A sta/lta "antitrigger" method (using stalta values to automatically remove triggered windows where there appears to be too much noise) - A noise threshold method, that cuts off all times where the noise threshold equals more than (by default) 80% of the highest amplitude noise sample for the length specified by lta (in seconds) - A saturation threshold method, that cuts off all times where the noise threshold equals more than (by default) 99.5% of the highest amplitude noise sample.
Parameters
hvsr_data:dict, obspy.Stream,orobspy.Trace- Dictionary containing all the data and parameters for the HVSR analysis
remove_method:str, {'auto', 'manual', 'stalta'/'antitrigger', 'saturation threshold', 'noise threshold', 'warmup'/'cooldown'/'buffer'/'warm_cool'}- The different methods for removing noise from the dataset. A list of strings will also work, in which case, it should be a list of the above strings. See descriptions above for what how each method works. By default 'auto.' If remove_method='auto', this is the equivalent of remove_method=['noise threshold', 'antitrigger', 'saturation threshold', 'warm_cool']
processing_window:list, tuple,orNone- A list/tuple of two items [s, e] or a list/tuple of two-item lists/tuples [[s0, e0], [s1,e1],…[sn, en]] with start and end time(s) for windows to keep for processing. Data outside of these times will be excluded from processing. Times should be obspy.UTCDateTime objects to ensure precision, but time strings ("13:05") will also work in most cases (excpetions may be when the data stream starts/ends on different UTC days)
sat_percent:float, default=0.995- Percentage (between 0 and 1), to use as the threshold at which to remove data. This is used in the saturation method. By default 0.995. If a value is passed that is greater than 1, it will be divided by 100 to obtain the percentage.
noise_percent:float, default= 0.8- Percentage (between 0 and 1), to use as the threshold at which to remove data, if it persists for longer than time (in seconds (specified by min_win_size)). This is used in the noise threshold method. By default 0.8. If a value is passed that is greater than 1, it will be divided by 100 to obtain the percentage.
sta:int, optional- Short term average (STA) window (in seconds), by default 2. For use with sta/lta antitrigger method.
lta:int, optional- Long term average (STA) window (in seconds), by default 30. For use with sta/lta antitrigger method.
stalta_thresh:list, default=[0.5,5]- Two-item list or tuple with the thresholds for the stalta antitrigger. The first value (index [0]) is the lower threshold, the second value (index [1] is the upper threshold), by default [0.5,5]
std_ratio_thresh:float, optional- The ratio to use as a threshold for removal of noise. The ratio represents the standard deviation value for a rolling window (the size of which is determined by the std_window_size parameter) divided by the standard deviation calculated for the entire trace. This rolling window standard deviation method is similar to the default noise removal method used by the Grilla HVSR software.
std_window_size:float, optional- The length of the window (in seconds) to use for calculating the rolling/moving standard deviation of a trace for the rolling standard deviation method.
min_std_win:float, optional- The minimum size of "window" that will be remove using the rolling standard deviation method.
warmup_time:int, default=0- Time in seconds to allow for warmup of the instrument (or while operator is still near instrument). This will renove any data before this time, by default 0.
cooldown_time:int, default=0- Time in seconds to allow for cooldown of the instrument (or for when operator is nearing instrument). This will renove any data before this time, by default 0.
min_win_size:float, default=1- The minumum size a window must be over specified threshold (in seconds) for it to be removed
remove_raw_noise:bool, default=False- If remove_raw_noise=True, will perform operation on raw data ('input_stream'), rather than potentially already-modified data ('stream').
verbose:bool, default=False- Whether to print status of remove_noise
Returns
output:dict- Dictionary similar to hvsr_data, but containing modified data with 'noise' removed
def remove_outlier_curves(hvsr_data,
outlier_method='prototype',
outlier_threshold=50,
use_percentile=True,
min_pts=5,
use_hv_curves=False,
plot_engine='matplotlib',
show_outlier_plot=False,
generate_outlier_plot=True,
verbose=False,
**kwargs)-
Expand source code
def remove_outlier_curves(hvsr_data, outlier_method='prototype', outlier_threshold=50, use_percentile=True, min_pts=5, use_hv_curves=False, plot_engine='matplotlib', show_outlier_plot=False, generate_outlier_plot=True, verbose=False, **kwargs): """Function used to remove outliers curves using a "prototype" or "dbscan" method. Prototype method calculates a prototype curve (i.e., median) and calculates the distance of the H/V or PSD curve from each window from that prototype curve. Currently, Root Mean Square Error is used to calculate the distance for each windowed H/V or PSD curve at each frequency step for all times. It calculates the RMSE for the PPSD curves of each component individually. All curves are removed from analysis. DBSCAN uses the DBSCAN method, outlier_threshold being by default the percentile value of distances of all curves from all other curves. Distance is calculated using scipy.spatial.distance.pdist, by default with 'euclidean' distance. The `min_pts` parameter specifies the minimum number of curves whose distance must be within the threshold distance percentile/value to be retained. Some abberant curves often occur due to the remove_noise() function, so this should be run some time after remove_noise(). In general, the recommended workflow is to run this immediately following the `generate_psds()` function. or if use_hv_curves=True, after `process_hvsr()`. Parameters ---------- hvsr_data : dict Input dictionary containing all the values and parameters of interest outlier_method : str, default='prototype' The method to use for outlier detection. Currently, 'dbscan' and 'prototype' is supported. outlier_threshold : float or int, default=98 The Root Mean Square Error value to use as a threshold for determining whether a curve is an outlier. This averages over each individual entire curve so that curves with very abberant data (often occurs when using the remove_noise() method), can be identified. Otherwise, specify a float or integer to use as the cutoff RMSE value (all curves with RMSE above will be removed) use_percentile : float, default=True Whether outlier_threshold should be interepreted as a raw RMSE value or as a percentile of the RMSE values. min_pts : int, default=5 The minimum number of points to use for the outlier detection method. This is only used if outlier_method='dbscan' This is minimum number of points a point needs in its neighborhood to not be considered an outlier. use_hv_curves : bool, default=False Whether to use the calculated HV Curve or the individual components. This can only be True after process_hvsr() has been run. show_plot : bool, default=False Whether to show a plot of the removed data verbose : bool, default=False Whether to print output of function to terminal Returns ------- hvsr_data : dict Input dictionary with values modified based on work of function. SEE ALSO -------- [scipy.spatial.distance.pdist](https://docs.scipy.org/doc/scipy/reference/generated/scipy.spatial.distance.pdist.html#scipy.spatial.distance.pdist) """ # Setup function #Get intput paramaters orig_args = locals().copy() start_time = datetime.datetime.now() # Update with processing parameters specified previously in input_params, if applicable if 'processing_parameters' in hvsr_data.keys(): if 'remove_outlier_curves' in hvsr_data['processing_parameters'].keys() and 'remove_noise' in hvsr_data['processing_parameters'].keys(): update_msg = [] for k, v in hvsr_data['processing_parameters']['remove_noise'].items(): defaultVDict = dict(zip(inspect.getfullargspec(remove_outlier_curves).args[1:], inspect.getfullargspec(remove_outlier_curves).defaults)) # Manual input to function overrides the imported parameter values if (not isinstance(v, (HVSRData, HVSRBatch))) and (k in orig_args.keys()) and (orig_args[k]==defaultVDict[k]): update_msg.append(f'\t\t{k} = {v} (previously {orig_args[k]})') orig_args[k] = v # Reset parameters in case of manual override of imported parameters outlier_method = orig_args['outlier_method'] outlier_threshold = orig_args['outlier_threshold'] use_percentile = orig_args['use_percentile'] min_pts = orig_args['min_pts'] use_hv_curves = orig_args['use_hv_curves'] plot_engine = orig_args['plot_engine'] show_outlier_plot = orig_args['show_outlier_plot'] generate_outlier_plot = orig_args['generate_outlier_plot'] verbose = orig_args['verbose'] # Allow skipping step if outlier_method specified as None (may help GUIs) if str(outlier_method).lower() == 'none' or outlier_method is None: return hvsr_data #Print if verbose, which changes depending on if batch data or not if (verbose and isinstance(hvsr_data, HVSRBatch)) or (verbose and not hvsr_data['batch']): if isinstance(hvsr_data, HVSRData) and hvsr_data['batch']: pass else: print('\nRemoving outlier curves from further analysis (remove_outlier_curves())') print('\tUsing the following parameters:') for key, value in orig_args.items(): if key == 'hvsr_data': pass else: print('\t {}={}'.format(key, value)) print() if 'processing_parameters' in hvsr_data.keys() and 'remove_outlier_curves' in hvsr_data['processing_parameters'].keys(): if update_msg != []: update_msg.insert(0, '\tThe following parameters were updated using the processing_parameters attribute:') for msg_line in update_msg: print(msg_line) print() #First, divide up for batch or not #Site is in the keys anytime it's not batch if isinstance(hvsr_data, HVSRBatch): #If running batch, we'll loop through each site hvsr_out = {} for site_name in hvsr_data.keys(): args = orig_args.copy() #Make a copy so we don't accidentally overwrite args['hvsr_data'] = hvsr_data[site_name] #Get what would normally be the "hvsr_data" variable for each site if hvsr_data[site_name]['processing_status']['overall_status']: try: hvsr_out[site_name] = __remove_outlier_curves(**args) #Call another function, that lets us run this function again except: hvsr_out = hvsr_data hvsr_out[site_name]['processing_status']['remove_outlier_curves_status'] = False hvsr_out[site_name]['processing_status']['overall_status'] = False else: hvsr_out = hvsr_data hvsr_out[site_name]['processing_status']['remove_outlier_curves_status'] = False hvsr_out[site_name]['processing_status']['overall_status'] = False hvsr_out = HVSRBatch(hvsr_out, df_as_read=hvsr_data.input_df) hvsr_out = sprit_utils._check_processing_status(hvsr_out, start_time=start_time, func_name=inspect.stack()[0][3], verbose=verbose) return hvsr_out dbscanList = ['dbscan', 'distance', 'dist', 'dbs', 'db', 'd'] prototypeList = ['prototype', 'proto', 'ptype', 'p', 'root mean square', 'root mean square error', 'rms', 'rmse', 'r'] # Determine names of hvsr_windows_df columns to use if not use_hv_curves: compNames = ['Z', 'E', 'N'] for col_name in hvsr_data['hvsr_windows_df'].columns: if 'psd_values' in col_name and 'RMSE' not in col_name: cName = col_name.split('_')[2] if cName not in compNames: compNames.append(cName) col_prefix = 'psd_values_' colNames = [col_prefix+cn for cn in compNames] else: compNames = [] for col_name in hvsr_data['hvsr_windows_df'].columns: if col_name.startswith('HV_Curves') and "Log10" not in col_name: compNames.append(col_name) colNames = compNames col_prefix = 'HV_Curves' # Remove outlier depending on method, prototype as default if nothing else specified if str(outlier_method).lower() == 'none' or outlier_method is None: # Skip all outlier removal if 'processing_parameters' not in hvsr_out.keys(): hvsr_out['processing_parameters'] = {} hvsr_out['processing_parameters']['remove_outlier_curves'] = {} exclude_params_list = ['hvsr_data'] for key, value in orig_args.items(): if key not in exclude_params_list: hvsr_out['processing_parameters']['remove_outlier_curves'][key] = value hvsr_out['processing_status']['remove_outlier_curves_status'] = None hvsr_out = sprit_utils._check_processing_status(hvsr_out, start_time=start_time, func_name=inspect.stack()[0][3], verbose=verbose) return hvsr_out elif str(outlier_method).lower() in dbscanList: hvsr_out = __dbscan_outlier_detect(hvsr_data=hvsr_data, use_hv_curves=use_hv_curves, use_percentile=use_percentile, neighborhood_size=outlier_threshold, dist_metric='euclidean', min_neighborhood_pts=min_pts, col_names=colNames, comp_names=compNames, col_prefix=col_prefix, verbose=verbose) elif str(outlier_method).lower() in prototypeList: hvsr_out = __prototype_outlier_detect(hvsr_data, use_hv_curves=use_hv_curves, use_percentile=use_percentile, outlier_threshold=outlier_threshold, col_names=colNames, comp_names=compNames, col_prefix=col_prefix, verbose=verbose) else: hvsr_out = __prototype_outlier_detect(hvsr_data, use_hv_curves=use_hv_curves, use_percentile=use_percentile, outlier_threshold=outlier_threshold, col_names=colNames, comp_names=compNames, col_prefix=col_prefix, verbose=verbose) # Show plot of removed/retained data if plot_engine.lower() == 'matplotlib' and (generate_outlier_plot or show_outlier_plot): hvsr_data['Outlier_Plot'] = sprit_plot.plot_outlier_curves(hvsr_data, outlier_threshold=outlier_threshold, use_percentile=use_percentile, use_hv_curves=use_hv_curves, plot_engine='matplotlib', show_plot=show_outlier_plot, verbose=verbose) elif plot_engine.lower() == 'plotly' and (generate_outlier_plot or show_outlier_plot): hvsr_data['Outlier_Plot'] = sprit_plot.plot_outlier_curves(hvsr_data, outlier_threshold=outlier_threshold, use_percentile=use_percentile, use_hv_curves=use_hv_curves, plot_engine='plotly', from_roc=True, show_plot=show_outlier_plot, verbose=verbose) else: pass if 'processing_parameters' not in hvsr_out.keys(): hvsr_out['processing_parameters'] = {} hvsr_out['processing_parameters']['remove_outlier_curves'] = {} exclude_params_list = ['hvsr_data'] for key, value in orig_args.items(): if key not in exclude_params_list: hvsr_out['processing_parameters']['remove_outlier_curves'][key] = value hvsr_out['processing_status']['remove_outlier_curves_status'] = True hvsr_out = sprit_utils._check_processing_status(hvsr_out, start_time=start_time, func_name=inspect.stack()[0][3], verbose=verbose) return hvsr_outFunction used to remove outliers curves using a "prototype" or "dbscan" method. Prototype method calculates a prototype curve (i.e., median) and calculates the distance of the H/V or PSD curve from each window from that prototype curve. Currently, Root Mean Square Error is used to calculate the distance for each windowed H/V or PSD curve at each frequency step for all times. It calculates the RMSE for the PPSD curves of each component individually. All curves are removed from analysis.
DBSCAN uses the DBSCAN method, outlier_threshold being by default the percentile value of distances of all curves from all other curves. Distance is calculated using scipy.spatial.distance.pdist, by default with 'euclidean' distance. The
min_ptsparameter specifies the minimum number of curves whose distance must be within the threshold distance percentile/value to be retained.Some abberant curves often occur due to the remove_noise() function, so this should be run some time after remove_noise(). In general, the recommended workflow is to run this immediately following the
generate_psds()function. or if use_hv_curves=True, afterprocess_hvsr().Parameters
hvsr_data:dict- Input dictionary containing all the values and parameters of interest
outlier_method:str, default='prototype'- The method to use for outlier detection. Currently, 'dbscan' and 'prototype' is supported.
outlier_threshold:floatorint, default=98- The Root Mean Square Error value to use as a threshold for determining whether a curve is an outlier. This averages over each individual entire curve so that curves with very abberant data (often occurs when using the remove_noise() method), can be identified. Otherwise, specify a float or integer to use as the cutoff RMSE value (all curves with RMSE above will be removed)
use_percentile:float, default=True- Whether outlier_threshold should be interepreted as a raw RMSE value or as a percentile of the RMSE values.
min_pts:int, default=5- The minimum number of points to use for the outlier detection method. This is only used if outlier_method='dbscan' This is minimum number of points a point needs in its neighborhood to not be considered an outlier.
use_hv_curves:bool, default=False- Whether to use the calculated HV Curve or the individual components. This can only be True after process_hvsr() has been run.
show_plot:bool, default=False- Whether to show a plot of the removed data
verbose:bool, default=False- Whether to print output of function to terminal
Returns
hvsr_data:dict- Input dictionary with values modified based on work of function.
See Also
[scipy.spatial.distance.pdist](https://docs.scipy.org/doc/scipy/reference/generated/scipy.spatial.distance.pdist.html#scipy.spatial.distance.pdist) def run(input_data=None,
source='file',
azimuth_calculation=False,
noise_removal=False,
outlier_curves_removal=False,
skip_steps=None,
verbose=False,
**kwargs)-
Expand source code
def run(input_data=None, source='file', azimuth_calculation=False, noise_removal=False, outlier_curves_removal=False, skip_steps=None, verbose=False, **kwargs): """The sprit.run() is the main function that allows you to do all your HVSR processing in one simple step (sprit.run() is how you would call it in your code, but it may also be called using sprit.sprit_hvsr.run()) The input_data parameter of sprit.run() is the only required parameter (if nothing entered, it will run sample data). This can be either a single file, a list of files (one for each component, for example), a directory (in which case, all obspy-readable files will be added to an HVSRBatch instance), a Rasp. Shake raw data directory, or sample data. Notes ----- The sprit.run() function calls the following functions. This is the recommended order/set of functions to run to process HVSR using SpRIT. See the API documentation for these functions for more information: - input_params(): The input_data parameter of input_params() is the only required variable, though others may also need to be called for your data to process correctly. - fetch_data(): the source parameter of fetch_data() is the only explicit variable in the sprit.run() function aside from input_data and verbose. Everything else gets delivered to the correct function via the kwargs dictionary - remove_noise(): by default, the kind of noise removal is remove_method='auto'. See the remove_noise() documentation for more information. If remove_method is set to anything other than one of the explicit options in remove_noise, noise removal will not be carried out. - calculate_azimuth(): calculate one or several azimuths. Single azimuth can be a way to combine H components too. - generate_psds(): generates psds for each component, which will be combined/used later. Any parameter of obspy.signal.spectral_estimation.PPSD() may also be read into this function. - remove_outlier_curves(): removes any outlier ppsd curves so that the data quality for when curves are combined will be enhanced. See the remove_outlier_curves() documentation for more information. - process_hvsr(): this is the main function processing the hvsr curve and statistics. See process_hvsr() documentation for more details. The hvsr_band parameter sets the frequency spectrum over which these calculations occur. - check_peaks(): this is the main function that will find and 'score' peaks to get a best peak. The parameter peak_freq_range can be set to limit the frequencies within which peaks are checked and scored. - get_report(): this is the main function that will print, plot, and/or save the results of the data. See the get_report() API documentation for more information. - export_hvsr(): this function exports the final data output as a pickle file (by default, this pickle object has a .hvsr extension). This can be used to read data back into SpRIT without having to reprocess data. Parameters ---------- input_data : str or filepath object that can be read by obspy Filepath to data to be processed. This may be a file or directory, depending on what kind of data is being processed (this can be specified with the source parameter). For sample data, The following can be specified as the input_data parameter: - Any integer 1-6 (inclusive), or the string (e.g., input_data="1" or input_data=1 will work) - The word "sample" before any integer (e.g., input_data="sample1") - The word "sample" will default to "sample1" if source='file'. - If source='batch', input_data should be input_data='sample' or input_data='batch'. In this case, it will read and process all the sample files using the HVSRBatch class. Set verbose=True to see all the information in the sample batch csv file. source : str, optional _description_, by default 'file' azimuth_calculation : bool, optional Whether to perform azimuthal analysis, by default False. noise_removal : bool, default=False Whether to remove noise (before processing PPSDs) outlier_curves_removal : bool, default=False Whether to remove outlier curves from HVSR time windows skip_steps : list, str, or None A list of function names to skip (as strings), to manually prevent any function from being performed. For example, skip_steps=["input_params", "fetch_data"] will prevent sprit.input_params() and sprit.fetch_data() from being called in sprit.run(). show_plot : bool, default=True Whether to show plots. This does not affect whether the plots are created (and then inserted as an attribute of HVSRData), only whether they are shown. verbose : bool, optional _description_, by default False **kwargs Keyword arguments for the functions listed above. The keyword arguments are unique, so they will get parsed out and passed into the appropriate function. Returns ------- hvsr_results : sprit.HVSRData or sprit.HVSRBatch object If a single file/data point is being processed, a HVSRData object will be returned. Otherwise, it will be a HVSRBatch object. See their documention for more information. See Also -------- input_params fetch_data remove_noise calculate_azimuth generate_psds remove_outlier_curves process_hvsr check_peaks get_report export_hvsr Raises ------ RuntimeError If the input parameter may not be read correctly. This is raised if the input_params() function fails. This raises an error since no other data processing or reading steps will be able to carried out correctly. RuntimeError If the data is not read/fetched correctly using fetch_data(), an error will be raised. This is raised if the fetch_data() function fails. This raises an error since no other data processing steps will be able to carried out correctly. RuntimeError If the data being processed is a single file, an error will be raised if generate_psds() does not work correctly. No errors are raised for remove_noise() errors (since that is an optional step) and the process_hvsr() step (since that is the last processing step) . """ if input_data is None or input_data == '': print("********************* PROCESSING SAMPLE DATA *****************************************") print("To read in your own data, use sprit.run(input_data='/path/to/your/seismic/data.mseed')") print("See SpRIT Wiki or API documentation for more information:") print("\t Wiki: https://github.com/RJbalikian/SPRIT-HVSR/wiki") print("\t API Documentation: https://sprit.readthedocs.io/en/latest/#") print("**************************************************************************************") print() input_data = 'sample' orig_args = locals().copy() # Get the initial arguments global do_run do_run = True if verbose: print('Using sprit.run() with the following parameters:') print(f'\tinput_data = {input_data}') print(f'\tazimuth_calculation = {azimuth_calculation}') print(f'\tnoise_removal = {noise_removal}') print(f'\toutlier_curves_removal = {outlier_curves_removal}') print("\tWith the following kwargs: ", end='') if kwargs is not {}: print() for k, v in kwargs.items(): print(f"\t\t{k} = {v}") else: print("{None}") print() if 'hvsr_band' not in kwargs.keys(): kwargs['hvsr_band'] = inspect.signature(input_params).parameters['hvsr_band'].default if 'peak_freq_range' not in kwargs.keys(): kwargs['peak_freq_range'] = inspect.signature(input_params).parameters['peak_freq_range'].default if 'processing_parameters' not in kwargs.keys(): kwargs['processing_parameters'] = {} # Separate out input_params and fetch_data processes based on whether batch has been specified batchlist = ['batch', 'bach', 'bath', 'b'] if str(source).lower() in batchlist and str('input_data').lower() not in SAMPLE_LIST: try: batch_data_read_kwargs = {k: v for k, v in kwargs.items() if k in tuple(inspect.signature(batch_data_read).parameters.keys())} hvsrDataIN = batch_data_read(batch_data=input_data, verbose=verbose, **batch_data_read_kwargs) except Exception as e: raise RuntimeError(f'Batch data read in was not successful:\n{e}') else: # Get the input parameters params = input_data try: input_params_kwargs = {k: v for k, v in kwargs.items() if k in tuple(inspect.signature(input_params).parameters.keys())} if 'acq_date' not in input_params_kwargs: input_params_kwargs['acq_date'] = NOWTIME.date() if 'starttime' not in input_params_kwargs: input_params_kwargs['starttime'] = NOWTIME.time() if skip_steps is None or 'input_params' not in skip_steps: params = input_params(input_data=input_data, verbose=verbose, **input_params_kwargs) except Exception as e: if hasattr(e, 'message'): errMsg = e.message else: errMsg = e print(f"ERROR during input_params(): {errMsg}") # Even if batch, this is reading in data for all sites so we want to raise error, not just warn raise RuntimeError('Input parameters not read correctly, see sprit.input_params() function and parameters') # If input_params fails, initialize params as an HVSRDATA #params = {'processing_status':{'input_params_status':False, 'overall_status':False}} #params.update(input_params_kwargs) #params = sprit_utils._make_it_classy(params) # Fetch Data hvsrDataIN = params try: fetch_data_kwargs = {k: v for k, v in kwargs.items() if k in tuple(inspect.signature(fetch_data).parameters.keys())} fetch_data_kwargs.update({k: v for k, v in kwargs.items() if k in tuple(inspect.signature(read_tromino_files).parameters.keys())}) if 'obspy_ppsds' in kwargs: fetch_data_kwargs['obspy_ppsds'] = kwargs['obspy_ppsds'] else: fetch_data_kwargs['obspy_ppsds'] = False if skip_steps is None or 'fetch_data' not in skip_steps: hvsrDataIN = fetch_data(params=params, source=source, verbose=verbose, **fetch_data_kwargs) except Exception as e: # Even if batch, this is reading in data for all sites so we want to raise error, not just warn if hasattr(e, 'message'): errMsg = e.message else: errMsg = e print(f"ERROR during fetch_data(): {errMsg}") raise RuntimeError('Data not read correctly, see sprit.fetch_data() function and parameters for more details.') # BREAK OUT FOR BATCH PROCESSING run_kwargs_for_df = [] if isinstance(hvsrDataIN, HVSRBatch): # Create dictionary that will be used to create HVSRBatch object hvsrBatchDict = {} # Loop through each site and run sprit.run() for each HVSRData object for site_name, site_data in hvsrDataIN.items(): run_kwargs = {} #orig_args.copy() # Make a copy so we don't accidentally overwrite print(f'\n\n**PROCESSING DATA FOR SITE {site_name.upper()}**\n') run_kwargs['input_data'] = site_data # Update run kwargs # First, get processing_parameters per site for funname, fundict in site_data['processing_parameters'].items(): for funk, funv in fundict.items(): run_kwargs[funk] = funv # Overwrite per-site processing parameters with params passed to sprit.run() as kwargs for paramname, paramval in kwargs.items(): if paramname != 'source': # Don't update source for batch data run_kwargs[paramname] = paramval dont_update_these_args = ['input_data', 'source', 'kwargs'] # Overwrite per-site processing parameters with sprit.run() run_args = orig_args.copy() for k, v in run_args.items(): if k not in dont_update_these_args: if v != inspect.signature(run).parameters[k].default: run_kwargs[k] = v try: hvsrBatchDict[site_name] = run(**run_kwargs) run_kwargs_for_df.append(run_kwargs) except Exception as e: hvsrBatchDict[site_name] = site_data hvsrBatchDict[site_name]['Error_Message'] = sprit_utils._get_error_from_exception(e, print_error_message=False, return_error_message=True) if verbose: sprit_utils._get_error_from_exception(e) print(f"Error processing site {site_name}. Continuing processing of remaining sites.") hvsrBatchDict[site_name]['processing_status']['generate_psds_status'] = False hvsrBatchDict[site_name]['processing_status']['overall_status'] = False # Create batch object hvsrBatchData = HVSRBatch(hvsrBatchDict, df_as_read=pd.DataFrame(run_kwargs_for_df)) # Use batch object to get Output Table with all data, including results and inputs for s, site in enumerate(hvsrBatchData): if hasattr(hvsrBatchData[site], 'Table_Report'): if s == 0: table_reports = hvsrBatchData[site].Table_Report else: table_reports = pd.concat([table_reports, hvsrBatchData[site].Table_Report]) else: if s == 0: table_reports = pd.DataFrame() hvsrBatchData['Table_Report'] = pd.merge(left=hvsrBatchData.input_df, right=table_reports, how='outer', left_on='site', right_on='Site Name') return hvsrBatchData # Calculate azimuths hvsr_az = hvsrDataIN azimuth_kwargs = {k: v for k, v in kwargs.items() if k in tuple(inspect.signature(calculate_azimuth).parameters.keys())} azList = ['azimuth', 'single azimuth', 'single'] azCond1 = 'horizontal_method' in kwargs.keys() and str(kwargs['horizontal_method']) == '8' azCond2 = 'horizontal_method' in kwargs.keys() and str(kwargs['horizontal_method']).lower() in azList azCond3 = azimuth_calculation azCond4 = len(azimuth_kwargs.keys()) > 0 if (azCond1 or azCond2 or azCond3 or azCond4) and (skip_steps is None or 'calculate_azimuth' not in skip_steps): azimuth_calculation = True azimuth_kwargs['azimuth_type'] = kwargs['azimuth_type'] = 'single' if 'azimuth_angle' not in kwargs.keys(): azimuth_kwargs['azimuth_angle'] = kwargs['azimuth_angle'] = 45 kwargs['azimuth'] = "R" # str(kwargs['azimuth_angle']).zfill(3) if 'horizontal_method' not in kwargs.keys(): kwargs['horizontal_method'] = 'Single Azimuth' try: hvsr_az = calculate_azimuth(hvsrDataIN, verbose=verbose, **azimuth_kwargs) except Exception as e: if hasattr(e, 'message'): errMsg = e.message else: errMsg = e print(f"Error during calculate_azimuth() for {hvsr_az.site}: \n{errMsg}") if isinstance(hvsr_az, HVSRBatch): for site_name in hvsr_az.keys(): hvsr_az[site_name]['processing_status']['calculate_azimuths_status'] = False else: hvsr_az['processing_status']['calculate_azimuths_status'] = False else: azimuth_calculation = False # Remove Noise data_noiseRemoved = hvsr_az try: remove_noise_kwargs = {k: v for k, v in kwargs.items() if k in tuple(inspect.signature(remove_noise).parameters.keys())} if noise_removal or remove_noise_kwargs != {}: remove_noise_kwargs = {k: v for k, v in kwargs.items() if k in tuple(inspect.signature(remove_noise).parameters.keys())} try: if skip_steps is None or 'remove_noise' not in skip_steps: data_noiseRemoved = remove_noise(hvsr_data=data_noiseRemoved, verbose=verbose, **remove_noise_kwargs) except Exception as e: if hasattr(e, 'message'): errMsg = e.message else: errMsg = e print(f"Error with remove_noise for site {data_noiseRemoved.site}: {errMsg}") # Mark that remove_noise failed # Reformat data so HVSRData and HVSRBatch data both work here if isinstance(data_noiseRemoved, HVSRData): data_noiseRemoved = {data_noiseRemoved.site: data_noiseRemoved} data_noiseRemoved = {data_noiseRemoved.site: data_noiseRemoved} for site_name in data_noiseRemoved.keys(): data_noiseRemoved[site_name]['processing_status']['remove_noise_status'] = False # Since noise removal is not required for data processing, check others first if data_noiseRemoved[site_name]['processing_status']['overall_status']: data_noiseRemoved[site_name]['processing_status']['overall_status'] = True else: data_noiseRemoved[site_name]['processing_status']['overall_status'] = False # If it wasn't originally HVSRBatch, make it HVSRData object again if not data_noiseRemoved[site_name]['batch']: data_noiseRemoved = data_noiseRemoved[site_name] else: if isinstance(data_noiseRemoved, HVSRData): data_noiseRemoved = {data_noiseRemoved.site: data_noiseRemoved} for site_name in data_noiseRemoved.keys(): # This should work more or less the same for batch and regular data now data_noiseRemoved[site_name]['stream_edited'] = data_noiseRemoved[site_name]['stream'] data_noiseRemoved[site_name]['processing_status']['remove_noise_status'] = None # If it wasn't originally HVSRBatch, make it HVSRData object again #if not data_noiseRemoved[site_name]['batch']: data_noiseRemoved = data_noiseRemoved[site_name] except Exception as e: if (source == 'file' or source == 'raw'): if hasattr(e, 'message'): errMsg = e.message else: errMsg = e if not ('batch' in data_noiseRemoved.keys() and data_noiseRemoved['batch']): raise RuntimeError(f"generate_psds() error: {errMsg}") # Generate PPSDs psd_data = data_noiseRemoved try: generate_psds_kwargs = {k: v for k, v in kwargs.items() if k in tuple(inspect.signature(generate_psds).parameters.keys())} PPSDkwargs = {k: v for k, v in kwargs.items() if k in tuple(inspect.signature(PPSD).parameters.keys())} generate_psds_kwargs.update(PPSDkwargs) generate_psds_kwargs['azimuthal_psds'] = azimuth_calculation if skip_steps is None or ('generate_psds' not in skip_steps and 'generate_ppsds' not in skip_steps): psd_data = generate_psds(hvsr_data=psd_data, verbose=verbose, **generate_psds_kwargs) except Exception as e: if hasattr(e, 'message'): errMsg = e.message else: errMsg = e if verbose: print(f"Error during generate_psds() for {site_name}: \n{errMsg}") if (source == 'file' or source == 'raw'): raise RuntimeError(f"generate_psds() error: \n{errMsg}") # Reformat data so HVSRData and HVSRBatch data both work here if isinstance(psd_data, HVSRData): psd_data = {psd_data['site']: psd_data} for site_name in psd_data.keys(): # This should work more or less the same for batch and regular data now psd_data[site_name]['processing_status']['generate_psds_status'] = False psd_data[site_name]['processing_status']['overall_status'] = False #If it wasn't originally HVSRBatch, make it HVSRData object again if not psd_data[site_name]['batch']: psd_data = psd_data[site_name] # Remove Outlier PSD Curves data_curvesRemoved = psd_data try: remove_outlier_curve_kwargs = {k: v for k, v in kwargs.items() if k in tuple(inspect.signature(remove_outlier_curves).parameters.keys())} if len(remove_outlier_curve_kwargs.keys())==1 and 'plot_engine' in remove_outlier_curve_kwargs.keys(): remove_outlier_curve_kwargs = {} if 'use_hv_curves' not in remove_outlier_curve_kwargs.keys(): use_hv_curves = False else: use_hv_curves = remove_outlier_curve_kwargs['use_hv_curves'] # Check whether it is indicated to remove outlier curves outlier_curve_keys_used = True if remove_outlier_curve_kwargs == {} or list(remove_outlier_curve_kwargs.keys()) == ['show_plot']: outlier_curve_keys_used = False if (outlier_curves_removal or outlier_curve_keys_used) and not use_hv_curves and (skip_steps is None or 'remove_outlier_curves' not in skip_steps): remove_outlier_curve_kwargs['remove_outliers_during_plot'] = False data_curvesRemoved = remove_outlier_curves(hvsr_data=data_curvesRemoved, verbose=verbose,**remove_outlier_curve_kwargs) except Exception as e: traceback.print_exception(sys.exc_info()[1]) exc_type, exc_obj, tb = sys.exc_info() f = tb.tb_frame lineno = tb.tb_lineno filename = f.f_code.co_filename errLineNo = str(traceback.extract_tb(sys.exc_info()[2])[-1].lineno) error_category = type(e).__name__.title().replace('error', 'Error') error_message = f"{e} ({errLineNo})" print(f"{error_category} ({errLineNo}): {error_message}") print(lineno, filename, f) # Reformat data so HVSRData and HVSRBatch data both work here if isinstance(data_curvesRemoved, HVSRData): data_curvesRemoved_interim = {data_curvesRemoved['site']: data_curvesRemoved} else: data_curvesRemoved_interim = data_curvesRemoved for site_name in data_curvesRemoved_interim.keys(): # This should work more or less the same for batch and regular data now data_curvesRemoved_interim[site_name]['processing_status']['remove_outlier_curves_status'] = False #data_curvesRemoved_interim[site_name]['processing_status']['overall_status'] = False #If it wasn't originally HVSRBatch, make it HVSRData object again if not data_curvesRemoved_interim[site_name]['batch']: data_curvesRemoved_interim = data_curvesRemoved_interim[site_name] data_curvesRemoved = data_curvesRemoved_interim # Process HVSR Curves hvsr_results = data_curvesRemoved try: process_hvsr_kwargs = {k: v for k, v in kwargs.items() if k in tuple(inspect.signature(process_hvsr).parameters.keys())} if azimuth_calculation: if azimuth_kwargs['azimuth_type'] == 'single': process_hvsr_kwargs['azimuth'] = azimuth_kwargs['azimuth_angle'] if skip_steps is None or 'process_hvsr' not in skip_steps: hvsr_results = process_hvsr(hvsr_data=psd_data, verbose=verbose, **process_hvsr_kwargs) except Exception as e: sprit_utils._get_error_from_exception(e, print_error_message=True) if isinstance(hvsr_results, HVSRData): hvsr_results = {hvsr_results['site']: hvsr_results} for site_name in hvsr_results.keys(): # This should work more or less the same for batch and regular data now hvsr_results[site_name]['processing_status']['process_hvsr_status']=False hvsr_results[site_name]['processing_status']['overall_status'] = False # If it wasn't originally HVSRBatch, make it HVSRData object again if not hvsr_results[site_name]['batch']: hvsr_results = hvsr_results[site_name] # Remove outlier HV Curves try: remove_outlier_curve_kwargs = {k: v for k, v in kwargs.items() if k in tuple(inspect.signature(remove_outlier_curves).parameters.keys())} if 'use_hv_curves' not in remove_outlier_curve_kwargs.keys(): use_hv_curves = False else: use_hv_curves = remove_outlier_curve_kwargs['use_hv_curves'] # Check whether it is indicated to remove outlier curves outlier_curve_keys_used = True if remove_outlier_curve_kwargs == {} or list(remove_outlier_curve_kwargs.keys()) == ['show_plot']: outlier_curve_keys_used = False if (outlier_curves_removal or outlier_curve_keys_used) and use_hv_curves and (skip_steps is None or 'remove_outlier_curves' not in skip_steps): remove_outlier_curve_kwargs['remove_outliers_during_plot'] = False hvsr_results = remove_outlier_curves(hvsr_data=hvsr_results, verbose=verbose,**remove_outlier_curve_kwargs) except Exception as e: traceback.print_exception(sys.exc_info()[1]) exc_type, exc_obj, tb = sys.exc_info() f = tb.tb_frame lineno = tb.tb_lineno filename = f.f_code.co_filename errLineNo = str(traceback.extract_tb(sys.exc_info()[2])[-1].lineno) error_category = type(e).__name__.title().replace('error', 'Error') error_message = f"{e} ({errLineNo})" print(f"{error_category} ({errLineNo}): {error_message}") print(lineno, filename, f) # Reformat data so HVSRData and HVSRBatch data both work here if isinstance(hvsr_results, HVSRData): data_curvesRemoved_interim = {hvsr_results['site']: hvsr_results} else: data_curvesRemoved_interim = hvsr_results for site_name in data_curvesRemoved_interim.keys(): # This should work more or less the same for batch and regular data now data_curvesRemoved_interim[site_name]['processing_status']['remove_outlier_curves_status'] = False #data_curvesRemoved_interim[site_name]['processing_status']['overall_status'] = False #If it wasn't originally HVSRBatch, make it HVSRData object again if not data_curvesRemoved_interim[site_name]['batch']: data_curvesRemoved_interim = data_curvesRemoved_interim[site_name] hvsr_results = data_curvesRemoved_interim # Final post-processing/reporting # Check peaks check_peaks_kwargs = {k: v for k, v in kwargs.items() if k in tuple(inspect.signature(check_peaks).parameters.keys())} if skip_steps is None or 'check_peaks' not in skip_steps: hvsr_results = check_peaks(hvsr_data=hvsr_results, verbose=verbose, **check_peaks_kwargs) get_report_kwargs = {k: v for k, v in kwargs.items() if k in tuple(inspect.signature(get_report).parameters.keys())} # Add 'az' as a default plot if the following conditions # first check if report_formats is specified, if not, add default value if 'report_formats' not in get_report_kwargs.keys(): get_report_kwargs['report_formats'] = inspect.signature(get_report).parameters['report_formats'].default # Now, check if plot is specified, then if plot_type is specified, then add 'az' if stream has azimuths if 'plot' in get_report_kwargs['report_formats']: plot_hvsr_kwargs = {k: v for k, v in kwargs.items() if k in tuple(inspect.signature(plot_hvsr).parameters.keys())} get_report_kwargs.update(plot_hvsr_kwargs) usingDefault = True if 'plot_type' not in get_report_kwargs.keys(): get_report_kwargs['plot_type'] = inspect.signature(get_report).parameters['plot_type'].default else: usingDefault = False # Check if az is already specified as plot output azList = ['azimuth', 'az', 'a', 'radial', 'r'] az_requested = False get_report_kwargs['plot_type'] = [item.lower() for item in get_report_kwargs['plot_type'].split(' ')] for azStr in azList: if azStr.lower() in get_report_kwargs['plot_type']: az_requested = True break get_report_kwargs['plot_type'] = ' '.join(get_report_kwargs['plot_type']) if isinstance(hvsr_results, HVSRData): hvsr_results_interim = {hvsr_results['site']: hvsr_results} else: hvsr_results_interim = hvsr_results for site_name in hvsr_results_interim.keys(): # This should work more or less the same for batch and regular data now # Check if data has azimuth data hasAz = False if 'stream' in hvsr_results_interim[site_name].keys(): for tr in hvsr_results_interim[site_name]['stream']: if tr.stats.component == 'R': hasAz = True break # Assuming all sites in batch have az if one does if hasAz: break # If it wasn't originally HVSRBatch, make it HVSRData object again #if not hvsr_results_interim[site_name]['batch']: # hvsr_results_interim = hvsr_results_interim[site_name] # Add azimuth as a requested plot if azimuthal data exists but not requested in plot if not az_requested and hasAz and hvsr_results.horizontal_method != 'Single Azimuth': get_report_kwargs['plot_type'] = get_report_kwargs['plot_type'] + ' az' if skip_steps is None or ('get_report' not in skip_steps and 'report' not in skip_steps): hvsr_results = get_report(hvsr_results=hvsr_results, verbose=verbose, **get_report_kwargs) if verbose: if 'report_formats' in get_report_kwargs.keys(): if type(get_report_kwargs['report_formats']) is str: report_formats = get_report_kwargs['report_formats'].lower() elif isinstance(get_report_kwargs['report_formats'], (tuple, list)): for i, rf in enumerate(get_report_kwargs['report_formats']): get_report_kwargs['report_formats'][i] = rf.lower() # if report_formats is 'print', we would have already printed it in previous step if get_report_kwargs['report_formats'] == 'print' or 'print' in get_report_kwargs['report_formats'] or isinstance(hvsr_results, HVSRBatch): # We do not need to print another report if already printed to terminal pass else: # We will just change the report_formats kwarg to print, since we already got the originally intended report format above, # now need to print for verbose output get_report_kwargs['report_formats'] = 'print' get_report(hvsr_results=hvsr_results, **get_report_kwargs) if get_report_kwargs['report_formats'] == 'plot' or 'plot' in get_report_kwargs['report_formats']: # We do not need to plot another report if already plotted pass else: # hvplot_kwargs = {k: v for k, v in kwargs.items() if k in plot_hvsr.__code__.co_varnames} # hvsr_results['Plot_Report'] = plot_hvsr(hvsr_results, return_fig=True, show_plot=False, close_figs=True) pass else: pass # Export processed data if hvsr_export_path(as pickle currently, default .hvsr extension) if 'hvsr_export_path' in kwargs.keys(): if kwargs['hvsr_export_path'] is None: pass else: if 'ext' in kwargs.keys(): ext = kwargs['ext'] else: ext = 'hvsr' export_hvsr(hvsr_data=hvsr_results, hvsr_export_path=kwargs['hvsr_export_path'], ext=ext, verbose=verbose) if 'show_plot' in kwargs: if not kwargs['show_plot']: plt.close() return hvsr_resultsThe sprit.run() is the main function that allows you to do all your HVSR processing in one simple step (sprit.run() is how you would call it in your code, but it may also be called using sprit.sprit_hvsr.run())
The input_data parameter of sprit.run() is the only required parameter (if nothing entered, it will run sample data). This can be either a single file, a list of files (one for each component, for example), a directory (in which case, all obspy-readable files will be added to an HVSRBatch instance), a Rasp. Shake raw data directory, or sample data.
Notes
The sprit.run() function calls the following functions. This is the recommended order/set of functions to run to process HVSR using SpRIT. See the API documentation for these functions for more information: - input_params(): The input_data parameter of input_params() is the only required variable, though others may also need to be called for your data to process correctly. - fetch_data(): the source parameter of fetch_data() is the only explicit variable in the sprit.run() function aside from input_data and verbose. Everything else gets delivered to the correct function via the kwargs dictionary - remove_noise(): by default, the kind of noise removal is remove_method='auto'. See the remove_noise() documentation for more information. If remove_method is set to anything other than one of the explicit options in remove_noise, noise removal will not be carried out. - calculate_azimuth(): calculate one or several azimuths. Single azimuth can be a way to combine H components too. - generate_psds(): generates psds for each component, which will be combined/used later. Any parameter of obspy.signal.spectral_estimation.PPSD() may also be read into this function. - remove_outlier_curves(): removes any outlier ppsd curves so that the data quality for when curves are combined will be enhanced. See the remove_outlier_curves() documentation for more information. - process_hvsr(): this is the main function processing the hvsr curve and statistics. See process_hvsr() documentation for more details. The hvsr_band parameter sets the frequency spectrum over which these calculations occur. - check_peaks(): this is the main function that will find and 'score' peaks to get a best peak. The parameter peak_freq_range can be set to limit the frequencies within which peaks are checked and scored. - get_report(): this is the main function that will print, plot, and/or save the results of the data. See the get_report() API documentation for more information. - export_hvsr(): this function exports the final data output as a pickle file (by default, this pickle object has a .hvsr extension). This can be used to read data back into SpRIT without having to reprocess data.
Parameters
input_data:strorfilepath object that can be read by obspy- Filepath to data to be processed. This may be a file or directory, depending on what kind of data is being processed (this can be specified with the source parameter). For sample data, The following can be specified as the input_data parameter: - Any integer 1-6 (inclusive), or the string (e.g., input_data="1" or input_data=1 will work) - The word "sample" before any integer (e.g., input_data="sample1") - The word "sample" will default to "sample1" if source='file'. - If source='batch', input_data should be input_data='sample' or input_data='batch'. In this case, it will read and process all the sample files using the HVSRBatch class. Set verbose=True to see all the information in the sample batch csv file.
source:str, optional- description, by default 'file'
azimuth_calculation:bool, optional- Whether to perform azimuthal analysis, by default False.
noise_removal:bool, default=False- Whether to remove noise (before processing PPSDs)
outlier_curves_removal:bool, default=False- Whether to remove outlier curves from HVSR time windows
skip_steps:list, str,orNone- A list of function names to skip (as strings), to manually prevent any function from being performed. For example, skip_steps=["input_params", "fetch_data"] will prevent sprit.input_params() and sprit.fetch_data() from being called in sprit.run().
show_plot:bool, default=True- Whether to show plots. This does not affect whether the plots are created (and then inserted as an attribute of HVSRData), only whether they are shown.
verbose:bool, optional- description, by default False
**kwargs- Keyword arguments for the functions listed above. The keyword arguments are unique, so they will get parsed out and passed into the appropriate function.
input_params:function name (not an actual parameter)- Function for designating input parameters for reading in and processing data See API documentation: input_params()
input_data:any, default= '<no default>'- See API documentation at link above or at
help(input_params())for specifics. site:any, default= 'HVSRSite'- See API documentation at link above or at
help(input_params())for specifics. project:any, default= None- See API documentation at link above or at
help(input_params())for specifics. network:any, default= 'AM'- See API documentation at link above or at
help(input_params())for specifics. station:any, default= 'NONE'- See API documentation at link above or at
help(input_params())for specifics. location:any, default= '00'- See API documentation at link above or at
help(input_params())for specifics. channels:any, default= ['EHZ', 'EHN', 'EHE']- See API documentation at link above or at
help(input_params())for specifics. acq_date:any, default= None- See API documentation at link above or at
help(input_params())for specifics. starttime:any, default= None- See API documentation at link above or at
help(input_params())for specifics. endtime:any, default= None- See API documentation at link above or at
help(input_params())for specifics. tzone:any, default= 'UTC'- See API documentation at link above or at
help(input_params())for specifics. xcoord:any, default= -88.229- See API documentation at link above or at
help(input_params())for specifics. ycoord:any, default= 40.101- See API documentation at link above or at
help(input_params())for specifics. elevation:any, default= 225- See API documentation at link above or at
help(input_params())for specifics. input_crs:any, default= 'EPSG:4326'- See API documentation at link above or at
help(input_params())for specifics. output_crs:any, default= None- See API documentation at link above or at
help(input_params())for specifics. elev_unit:any, default= 'meters'- See API documentation at link above or at
help(input_params())for specifics. depth:any, default= 0- See API documentation at link above or at
help(input_params())for specifics. instrument:any, default= 'Seismometer'- See API documentation at link above or at
help(input_params())for specifics. metadata:any, default= None- See API documentation at link above or at
help(input_params())for specifics. hvsr_band:any, default= [0.5, 40]- See API documentation at link above or at
help(input_params())for specifics. peak_freq_range:any, default= [0.5, 40]- See API documentation at link above or at
help(input_params())for specifics. processing_parameters:any, default= {}- See API documentation at link above or at
help(input_params())for specifics. verbose:any, default= False- See API documentation at link above or at
help(input_params())for specifics. fetch_data:function name (not an actual parameter)- Fetch ambient seismic data from a source to read into obspy stream. See API documentation: fetch_data()
params:any, default= '<outputofprevious function>'- See API documentation at link above or at
help(fetch_data())for specifics. source:any, default= 'file'- See API documentation at link above or at
help(fetch_data())for specifics. data_export_path:any, default= None- See API documentation at link above or at
help(fetch_data())for specifics. data_export_format:any, default= 'mseed'- See API documentation at link above or at
help(fetch_data())for specifics. detrend:any, default= 'spline'- See API documentation at link above or at
help(fetch_data())for specifics. detrend_options:any, default= 2- See API documentation at link above or at
help(fetch_data())for specifics. filter_type:any, default= None- See API documentation at link above or at
help(fetch_data())for specifics. filter_options:any, default= {}- See API documentation at link above or at
help(fetch_data())for specifics. update_metadata:any, default= True- See API documentation at link above or at
help(fetch_data())for specifics. plot_input_stream:any, default= False- See API documentation at link above or at
help(fetch_data())for specifics. plot_engine:any, default= 'matplotlib'- See API documentation at link above or at
help(fetch_data())for specifics. show_plot:any, default= True- See API documentation at link above or at
help(fetch_data())for specifics. verbose:any, default= False- See API documentation at link above or at
help(fetch_data())for specifics. kwargs:any, default= {}- See API documentation at link above or at
help(fetch_data())for specifics. calculate_azimuth:function name (not an actual parameter)- Function to calculate azimuthal horizontal component at specified angle(s). See API documentation: calculate_azimuth()
hvsr_data:any, default= '<outputofprevious function>'- See API documentation at link above or at
help(calculate_azimuth())for specifics. azimuth_angle:any, default= 45- See API documentation at link above or at
help(calculate_azimuth())for specifics. azimuth_type:any, default= 'multiple'- See API documentation at link above or at
help(calculate_azimuth())for specifics. azimuth_unit:any, default= 'degrees'- See API documentation at link above or at
help(calculate_azimuth())for specifics. show_az_plot:any, default= False- See API documentation at link above or at
help(calculate_azimuth())for specifics. verbose:any, default= False- See API documentation at link above or at
help(calculate_azimuth())for specifics. plot_azimuth_kwargs:any, default= {}- See API documentation at link above or at
help(calculate_azimuth())for specifics. remove_noise:function name (not an actual parameter)- Function to remove noisy windows from data, using various methods. See API documentation: remove_noise()
hvsr_data:any, default= '<outputofprevious function>'- See API documentation at link above or at
help(remove_noise())for specifics. remove_method:any, default= None- See API documentation at link above or at
help(remove_noise())for specifics. processing_window:any, default= None- See API documentation at link above or at
help(remove_noise())for specifics. sat_percent:any, default= 0.995- See API documentation at link above or at
help(remove_noise())for specifics. noise_percent:any, default= 0.8- See API documentation at link above or at
help(remove_noise())for specifics. sta:any, default= 2- See API documentation at link above or at
help(remove_noise())for specifics. lta:any, default= 30- See API documentation at link above or at
help(remove_noise())for specifics. stalta_thresh:any, default= [8, 16]- See API documentation at link above or at
help(remove_noise())for specifics. std_ratio_thresh:any, default= 2.0- See API documentation at link above or at
help(remove_noise())for specifics. std_window_size:any, default= 20.0- See API documentation at link above or at
help(remove_noise())for specifics. min_std_win:any, default= 5.0- See API documentation at link above or at
help(remove_noise())for specifics. warmup_time:any, default= 0- See API documentation at link above or at
help(remove_noise())for specifics. cooldown_time:any, default= 0- See API documentation at link above or at
help(remove_noise())for specifics. min_win_size:any, default= 1- See API documentation at link above or at
help(remove_noise())for specifics. remove_raw_noise:any, default= False- See API documentation at link above or at
help(remove_noise())for specifics. show_stalta_plot:any, default= False- See API documentation at link above or at
help(remove_noise())for specifics. verbose:any, default= False- See API documentation at link above or at
help(remove_noise())for specifics. generate_psds:function name (not an actual parameter)- Calculate Power Spectral Density (PSD) curves for each channel. See API documentation: generate_psds()
hvsr_data:any, default= '<outputofprevious function>'- See API documentation at link above or at
help(generate_psds())for specifics. window_length:any, default= 30.0- See API documentation at link above or at
help(generate_psds())for specifics. overlap_pct:any, default= 0.5- See API documentation at link above or at
help(generate_psds())for specifics. window_type:any, default= 'hann'- See API documentation at link above or at
help(generate_psds())for specifics. window_length_method:any, default= 'length'- See API documentation at link above or at
help(generate_psds())for specifics. remove_response:any, default= False- See API documentation at link above or at
help(generate_psds())for specifics. skip_on_gaps:any, default= True- See API documentation at link above or at
help(generate_psds())for specifics. num_freq_bins:any, default= 512- See API documentation at link above or at
help(generate_psds())for specifics. hvsr_band:any, default= [0.5, 40]- See API documentation at link above or at
help(generate_psds())for specifics. obspy_ppsds:any, default= False- See API documentation at link above or at
help(generate_psds())for specifics. azimuthal_psds:any, default= False- See API documentation at link above or at
help(generate_psds())for specifics. verbose:any, default= False- See API documentation at link above or at
help(generate_psds())for specifics. plot_psds:any, default= False- See API documentation at link above or at
help(generate_psds())for specifics. obspy_ppsd_kwargs:any, default= {}- See API documentation at link above or at
help(generate_psds())for specifics. process_hvsr:function name (not an actual parameter)- Process the input data and get HVSR data See API documentation: process_hvsr()
hvsr_data:any, default= '<outputofprevious function>'- See API documentation at link above or at
help(process_hvsr())for specifics. horizontal_method:any, default= None- See API documentation at link above or at
help(process_hvsr())for specifics. smooth:any, default= True- See API documentation at link above or at
help(process_hvsr())for specifics. freq_smooth:any, default= 'konno ohmachi'- See API documentation at link above or at
help(process_hvsr())for specifics. f_smooth_width:any, default= 40- See API documentation at link above or at
help(process_hvsr())for specifics. resample:any, default= True- See API documentation at link above or at
help(process_hvsr())for specifics. outlier_curve_percentile_threshold:any, default= False- See API documentation at link above or at
help(process_hvsr())for specifics. azimuth:any, default= None- See API documentation at link above or at
help(process_hvsr())for specifics. verbose:any, default= False- See API documentation at link above or at
help(process_hvsr())for specifics. remove_outlier_curves:function name (not an actual parameter)- Function used to remove outliers curves using a "prototype" or "dbscan" method. See API documentation: remove_outlier_curves()
hvsr_data:any, default= '<outputofprevious function>'- See API documentation at link above or at
help(remove_outlier_curves())for specifics. outlier_method:any, default= 'prototype'- See API documentation at link above or at
help(remove_outlier_curves())for specifics. outlier_threshold:any, default= 50- See API documentation at link above or at
help(remove_outlier_curves())for specifics. use_percentile:any, default= True- See API documentation at link above or at
help(remove_outlier_curves())for specifics. min_pts:any, default= 5- See API documentation at link above or at
help(remove_outlier_curves())for specifics. use_hv_curves:any, default= False- See API documentation at link above or at
help(remove_outlier_curves())for specifics. plot_engine:any, default= 'matplotlib'- See API documentation at link above or at
help(remove_outlier_curves())for specifics. show_outlier_plot:any, default= False- See API documentation at link above or at
help(remove_outlier_curves())for specifics. generate_outlier_plot:any, default= True- See API documentation at link above or at
help(remove_outlier_curves())for specifics. verbose:any, default= False- See API documentation at link above or at
help(remove_outlier_curves())for specifics. kwargs:any, default= {}- See API documentation at link above or at
help(remove_outlier_curves())for specifics. check_peaks:function name (not an actual parameter)- Function to run tests on HVSR peaks to find best one and see if it passes SESAME quality checks See API documentation: check_peaks()
hvsr_data:any, default= '<outputofprevious function>'- See API documentation at link above or at
help(check_peaks())for specifics. hvsr_band:any, default= [0.5, 40]- See API documentation at link above or at
help(check_peaks())for specifics. peak_selection:any, default= 'max'- See API documentation at link above or at
help(check_peaks())for specifics. peak_freq_range:any, default= [0.5, 40]- See API documentation at link above or at
help(check_peaks())for specifics. azimuth:any, default= 'HV'- See API documentation at link above or at
help(check_peaks())for specifics. verbose:any, default= False- See API documentation at link above or at
help(check_peaks())for specifics. get_report:function name (not an actual parameter)- Generate and/or print and/or export a report of the HVSR analysis in a variety of formats. See API documentation: get_report()
hvsr_results:any, default= '<outputofprevious function>'- See API documentation at link above or at
help(get_report())for specifics. report_formats:any, default= ['print', 'table', 'plot', 'html', 'pdf']- See API documentation at link above or at
help(get_report())for specifics. azimuth:any, default= 'HV'- See API documentation at link above or at
help(get_report())for specifics. plot_type:any, default= 'HVSR p ann COMP+ p ann SPEC p ann'- See API documentation at link above or at
help(get_report())for specifics. plot_engine:any, default= 'matplotlib'- See API documentation at link above or at
help(get_report())for specifics. show_print_report:any, default= True- See API documentation at link above or at
help(get_report())for specifics. show_table_report:any, default= False- See API documentation at link above or at
help(get_report())for specifics. show_plot_report:any, default= False- See API documentation at link above or at
help(get_report())for specifics. show_html_report:any, default= False- See API documentation at link above or at
help(get_report())for specifics. show_pdf_report:any, default= True- See API documentation at link above or at
help(get_report())for specifics. suppress_report_outputs:any, default= False- See API documentation at link above or at
help(get_report())for specifics. show_report_outputs:any, default= False- See API documentation at link above or at
help(get_report())for specifics. csv_handling:any, default= 'append'- See API documentation at link above or at
help(get_report())for specifics. report_export_format:any, default= None- See API documentation at link above or at
help(get_report())for specifics. report_export_path:any, default= None- See API documentation at link above or at
help(get_report())for specifics. verbose:any, default= False- See API documentation at link above or at
help(get_report())for specifics. kwargs:any, default= {}- See API documentation at link above or at
help(get_report())for specifics. export_hvsr:function name (not an actual parameter)- Export data into pickle format that can be read back in using import_data(). See API documentation: export_hvsr()
hvsr_data:any, default= '<outputofprevious function>'- See API documentation at link above or at
help(export_hvsr())for specifics. hvsr_export_path:any, default= None- See API documentation at link above or at
help(export_hvsr())for specifics. ext:any, default= 'hvsr'- See API documentation at link above or at
help(export_hvsr())for specifics. export_type:any, default= 'gzip'- See API documentation at link above or at
help(export_hvsr())for specifics. export_plots:any, default= False- See API documentation at link above or at
help(export_hvsr())for specifics. verbose:any, default= False- See API documentation at link above or at
help(export_hvsr())for specifics.
Returns
hvsr_results:HVSRDataorHVSRBatch object- If a single file/data point is being processed, a HVSRData object will be returned. Otherwise, it will be a HVSRBatch object. See their documention for more information.
See Also
input_params()fetch_data()remove_noise()calculate_azimuth()generate_psds()remove_outlier_curves()process_hvsr()check_peaks()get_report()export_hvsr()Raises
RuntimeError- If the input parameter may not be read correctly. This is raised if the input_params() function fails. This raises an error since no other data processing or reading steps will be able to carried out correctly.
RuntimeError- If the data is not read/fetched correctly using fetch_data(), an error will be raised. This is raised if the fetch_data() function fails. This raises an error since no other data processing steps will be able to carried out correctly.
RuntimeError- If the data being processed is a single file, an error will be raised if generate_psds() does not work correctly. No errors are raised for remove_noise() errors (since that is an optional step) and the process_hvsr() step (since that is the last processing step) .
def test_function()-
Expand source code
def test_function(): print('is this working?') def update_elevation(hvsr_data, updated_surface_elevation, updated_elevation_unit)-
Expand source code
def update_elevation(hvsr_data, updated_surface_elevation, updated_elevation_unit): """Function to quickly update all attributes associated with elevation of an HVSRData object Parameters ---------- hvsr_data : HVSRData or HVSRBatch HVSRData or HVSRBatch object containing attributes related to elevation. If HVSRBatch, updated_surface_elevation should be list or tuple and updated_elevation_unit may either be str or list/tuple of strings. updated_surface_elevation : numbers.Number Number (float or int) with the updated elevation. Meters is the preferred unit. If feet are used instead, it will be converted to meters. updated_elevation_unit : str Unit used for updated_surface_elevation. If 'feet', it will be converted to meters. Returns ------- HVSRData HVSRData object with all attributes related to elevation updated """ # Break out for HVSRBatch if isinstance(hvsr_data, HVSRBatch): if len(updated_surface_elevation) != len(hvsr_data.sites): warnings.warn(f'Elevations for HVSRBatch object could not be updated. \ Length of updated_surface_elevation ({len(updated_surface_elevation)}) must equal\ the number of sites ({len(hvsr_data.sites)}) in hvsr_data') return hvsr_data if isinstance(updated_elevation_unit, (list, tuple)): if len(updated_elevation_unit) != len(hvsr_data.sites): warnings.warn(f'Elevations for HVSRBatch object could not be updated. \ Length of updated_elevation_unit ({len(updated_elevation_unit)}) must equal\ the number of sites ({len(hvsr_data.sites)}) in hvsr_data') return hvsr_data elif type(updated_elevation_unit) is str: updated_elevation_unit = [updated_elevation_unit] * len(hvsr_data.sites) else: warnings.warn(f"updated_elevation_unit must be list, tuple, or str, not {type(updated_elevation_unit)}") for i, sitename in enumerate( hvsr_data): hvsr_data[sitename] = _update_elevation(hvsr_data[sitename], updated_surface_elevation[i], updated_elevation_unit[i]) return hvsr_data #elevation_attrs = ['elevation', 'x_elev_m', 'x_elev_ft'] if hasattr(hvsr_data, 'elevation'): elev_diff = hvsr_data['elevation'] - updated_surface_elevation else: elev_diff = -1 * updated_surface_elevation mList = ['meters', 'm', 'standard', 'metric', 'si', 'metres', 'metre', 'meter'] fList = ['feet', 'ft', 'f', 'foot', 'american', 'imperial', 'imp'] # Update parameters with elevations in them if str(updated_elevation_unit).lower() in fList: updated_surface_elevation = updated_surface_elevation * 0.3048 hvsr_data['elevation'] = updated_surface_elevation hvsr_data['elev_unit'] = 'meters' if hasattr(hvsr_data, 'x_elev_m'): hvsr_data['x_elev_m']['Z'] = hvsr_data['x_elev_m']['Z'] - elev_diff hvsr_data['x_elev_m']['E'] = hvsr_data['x_elev_m']['E'] - elev_diff hvsr_data['x_elev_m']['N'] = hvsr_data['x_elev_m']['N'] - elev_diff hvsr_data['x_elev_ft']['Z'] = hvsr_data['x_elev_m']['Z'] / 0.3048 hvsr_data['x_elev_ft']['E'] = hvsr_data['x_elev_m']['E'] / 0.3048 hvsr_data['x_elev_ft']['N'] = hvsr_data['x_elev_m']['N'] / 0.3048 # Update elevations in Table_Report table_report_cols = ['Elevation', 'BedrockElevation'] if hasattr(hvsr_data, 'Table_Report'): hvsr_data.Table_Report['Elevation'] = updated_surface_elevation if 'BedrockDepth' in hvsr_data.Table_Report.columns: hvsr_data.Table_Report['BedrockElevation'] = updated_surface_elevation - hvsr_data.Table_Report['BedrockDepth'] # Update elevations in Print_Report if hasattr(hvsr_data, "Print_Report"): hvsr_data['Print_Report'] = re.sub(r"Elevation:\s*[\d.]+", f"Elevation: {updated_surface_elevation}", hvsr_data['Print_Report']) # Update elevations in HTML_Report if hasattr(hvsr_data, "HTML_Report"): hvsr_data['HTML_Report'] = re.sub(r"Elevation:\s*[\d.]+", f"Elevation: {updated_surface_elevation}", hvsr_data['HTML_Report']) # Update elevations in PeakReport attributes azList = ['HV'] azList.extend(list(hvsr_data.hvsr_az.keys())) for az in azList: for peakReport in hvsr_data.PeakReport[az]: if 'Table_Report' in peakReport['Report']: #This is a dict peakReport['Report']['Table_Report']['Elevation'] = updated_surface_elevation if 'BedrockDepth' in peakReport['Report']['Table_Report'].columns: peakReport['Report']['Table_Report']['BedrockElevation'] = updated_surface_elevation - peakReport['Report']['Table_Report']['BedrockDepth'] if 'Print_Report' in peakReport['Report']: #This is a dict peakReport['Report']['Print_Report'] = re.sub(r"Elevation:\s*[\d.]+", f"Elevation: {updated_surface_elevation}", peakReport['Report']['Print_Report']) # Update processing_parameters to reflect new elevations hvsr_data['processing_parameters']['fetch_data']['params']['elevation'] = updated_surface_elevation hvsr_data['processing_parameters']['fetch_data']['params']['elev_unit'] = 'meters' hvsr_data['processing_parameters']['fetch_data']['params']['params']['elevation'] = updated_surface_elevation hvsr_data['processing_parameters']['fetch_data']['params']['params']['elev_unit'] = 'meters' return hvsr_dataFunction to quickly update all attributes associated with elevation of an HVSRData object
Parameters
hvsr_data:HVSRDataorHVSRBatch- HVSRData or HVSRBatch object containing attributes related to elevation. If HVSRBatch, updated_surface_elevation should be list or tuple and updated_elevation_unit may either be str or list/tuple of strings.
updated_surface_elevation:numbers.Number- Number (float or int) with the updated elevation. Meters is the preferred unit. If feet are used instead, it will be converted to meters.
updated_elevation_unit:str- Unit used for updated_surface_elevation. If 'feet', it will be converted to meters.
Returns
HVSRData- HVSRData object with all attributes related to elevation updated
def update_resp_file(resp_file,
new_network,
new_station,
return_inv=True,
new_channels='CHZ',
new_location='',
starttime_new=None,
endtime_new=None,
new_resp_file=None,
existing_starttime='2015,001,00:00:00.0000',
existing_endtime='No Ending Time',
existing_network='XX',
existing_station='NS124',
existing_channel='CHZ',
existing_location='??')-
Expand source code
def update_resp_file(resp_file, new_network, new_station, return_inv=True, new_channels='CHZ', new_location="", starttime_new=None, endtime_new=None, new_resp_file=None, existing_starttime='2015,001,00:00:00.0000', existing_endtime="No Ending Time", existing_network='XX', existing_station='NS124', existing_channel='CHZ', existing_location='??'): """Function to update headers in .RESP instrument response files for easy copying. It is recommended to read this into a variable and set it as the metadata parameter of input_params if it is desired to correct for instrument response, for example. Parameters ---------- resp_file : str Filepath to input response file new_network : str Name of network to update header to. new_station : str Name of station to update header to. return_inv : bool, optional Whether to return an obspy inventory object. If False, a .RESP file will be saved in the same directory as resp_file, by default True new_channels : str, optional Name or list of channels to update the header to. If list, multiple inventory objects will be created/saved, by default 'CHZ' new_location : str, optional New instrument location attribute to update header to, by default "" starttime : obspy.UTCDateTime, optional Input to update starttime. Must be readable by obspy.UTCDateTime(), by default None endtime : obspy.UTCDateTime, optional Input to update endtime. Must be readable by obspy.UTCDateTime(), by default None new_resp_file : str, optional Filepath to designate for .RESP file output, if desired (and return_inv=False) If None, uses same directory as resp_file, by default None existing_network : str, optional Name of network as specified in input file, by default 'XX' existing_station : str, optional name of station as specified in input file, by default 'NS124' existing_channel : str, optional Name of channel as specified in input file, by default 'CHZ' existing_location : str, optional Name of location as specified in input file, by default '??' Returns ------- obspy.Inventory Only returned if return_inv = True """ with open(resp_file) as inFile: respTextIN = inFile.read() respText = respTextIN.replace(existing_network, new_network) respText = respText.replace(existing_station, new_station) respTextNoChann = respText.replace(existing_location, new_location) if not isinstance(new_channels, (list, tuple)): new_channels = [new_channels] if starttime_new is not None: sTime = obspy.UTCDateTime(starttime_new) sTimeText = existing_starttime.replace('2015,', str(sTime.year)+',') sTimeText = sTimeText.replace('001,', str(sTime.julday)+',') sTimeText = sTimeText.replace('00:00:00.0000', str(sTime.strftime("%H:%M:%S.%f"))) respTextNoChann = respTextNoChann.replace(existing_starttime, sTimeText) if endtime_new is not None: eTime = obspy.UTCDateTime(endtime_new) respTextNoChann = respTextNoChann.replace(existing_endtime, f"{eTime.year},{eTime.julday},{eTime.strftime('%H:%M:%S.%f')}") invList = [] for i, newcha in enumerate(new_channels): print(newcha) respText = respTextNoChann.replace(existing_channel, newcha) if return_inv: invList.append(obspy.read_inventory(io.StringIO(respText))) else: if new_resp_file is None: dir = pathlib.Path(resp_file).parent new_resp_file = dir.joinpath(f"RESP_{new_network}.{new_station}.{new_station}.{newcha}.resp") else: new_resp_file = pathlib.Path(new_resp_file) with open(new_resp_file.as_posix(), 'w') as outFile: outFile.write(new_resp_file.as_posix()) if return_inv: for i, r in enumerate(invList): if i == 0: inv = r else: inv = inv + r return invFunction to update headers in .RESP instrument response files for easy copying. It is recommended to read this into a variable and set it as the metadata parameter of input_params if it is desired to correct for instrument response, for example.
Parameters
resp_file:str- Filepath to input response file
new_network:str- Name of network to update header to.
new_station:str- Name of station to update header to.
return_inv:bool, optional- Whether to return an obspy inventory object. If False, a .RESP file will be saved in the same directory as resp_file, by default True
new_channels:str, optional- Name or list of channels to update the header to. If list, multiple inventory objects will be created/saved, by default 'CHZ'
new_location:str, optional- New instrument location attribute to update header to, by default ""
starttime:obspy.UTCDateTime, optional- Input to update starttime. Must be readable by obspy.UTCDateTime(), by default None
endtime:obspy.UTCDateTime, optional- Input to update endtime. Must be readable by obspy.UTCDateTime(), by default None
new_resp_file:str, optional- Filepath to designate for .RESP file output, if desired (and return_inv=False) If None, uses same directory as resp_file, by default None
existing_network:str, optional- Name of network as specified in input file, by default 'XX'
existing_station:str, optional- name of station as specified in input file, by default 'NS124'
existing_channel:str, optional- Name of channel as specified in input file, by default 'CHZ'
existing_location:str, optional- Name of location as specified in input file, by default '??'
Returns
obspy.Inventory- Only returned if return_inv = True
Classes
class HVSRBatch (*args, **kwargs)-
Expand source code
class HVSRBatch: """HVSRBatch is the data container used for batch processing. It contains several HVSRData objects (one for each site). These can be accessed using their site name, either square brackets (HVSRBatchVariable["SiteName"]) or the dot (HVSRBatchVariable.SiteName) accessor. The dot accessor may not work if there is a space in the site name. All of the functions in the sprit package are designed to perform the bulk of their operations iteratively on the individual HVSRData objects contained in the HVSRBatch object, and do little with the HVSRBatch object itself, besides using it determine which sites are contained within it. """ @check_instance def __init__(self, batch_input, batch_ext=None, batch_use=None, df_as_read=None): """HVSR Batch initializer Parameters ---------- batch_input : dict, list, tuple, HVSRData, or filepath(s) If: * dict, dictionary containing Key value pairs with {sitename: HVSRData object}. * list or tuple, assumed to be dicts, HVSRData objects, or filepaths to processed .hvsr files or seismic data to be processed. * HVSRData object, will transform into HVSRBatch object with single HVSRData object. The add() or append() methods, or using square brackes can be used to add additional sites. * filepaths, if: * If directory, will use `batch_ext` as the input to a `glob()` function to get all files in that directory and add them to batch. Defaults to '.hvsr' files if `batch_ext` not specified. * Filepath, will make a HVSRBatch object importing that single file, or if readable by pandas.read_csv() will use in conjunction with `batch_use` (see below) batch_ext : str or None Filepath extension to use in `glob()` function for filetypes to import, if batch_input is a filepath. batch_use : {dict, list, tuple, None} Intended to be used as dict with keys "site", "filepath", and "batch". In this case, should be {'site':"name_of_df_col_with_sitenames", 'filepath':"name_of_df_col_with_filepaths_to_data", 'batch':values_to_include}. values_to_include can be a value (or list of values) in a column called "batch" to specify that that row should be included in the HVSRBatch object or a dictionary where they keys are column names and the values are the values to look for in each column name for inclusion in HVSRBatch object. If not specified, defaults to None and uses all rows in dataframe. df_as_read : {None, pd.DataFrame} Used in various sprit functions to allow original DataFrame used to create HVSRBatch object to be carried through. """ # Just return it as-is if it's already Batch object if isinstance(batch_input, HVSRBatch): return batch_input self._batch_input = batch_input self.batch_input = self._batch_input self._batch_dict = self.batch_dict = {} self._input_df = df_as_read self.input_df = self._input_df self.batch = True if isinstance(batch_input, (list, tuple,)): # This is for a list/tuple with the following structure: # batch_input = [HVSRData, HVSRData, HVSRData] # or batch_input = ['/file/path1.hvsr', '/file/path2.hvsr'] # Can also be mixed: [HVSRData, '/file/path3/.hvsr'] siteNo = 0 zfilldigs = len(str(len(batch_input))) for hvdata in batch_input: if isinstance(hvdata, (dict, HVSRData)): if hasattr(hvdata, 'site'): sitename = hvdata.site elif hasattr(hvdata, 'Table_Report') and 'Site Name' in hvdata.Table_Report.columns: sitename = hvdata.Table_Report['Site Name'][0] else: sitename = f"HVSRSite{str(siteNo).zfill(zfilldigs)}" siteNo += 1 self.batch_dict[sitename] = hvdata elif pathlib.Path(hvdata).exists(): def _get_sitename(proposed_sitename, batch_dict): # Get unique site name based on stem j = 0 if proposed_sitename in batch_dict.keys(): # 100 is limit for index in range(100): if len(proposed_sitename.split('_')) <= index: if proposed_sitename.split('_')[-1].isdigit(): j = int(proposed_sitename.split('_')[-1]) + 1 sitenameList = proposed_sitename.split('_') sitenameList[-1] = str(j) proposed_sitename = '_'.join(sitenameList) break else: proposed_sitename = proposed_sitename+'_'+str(j) break j += 1 else: proposed_sitename = '_'.join(proposed_sitename.split('_')[:index+1]) return proposed_sitename if 'hvsr' in pathlib.Path(hvdata).suffix: sitename = pathlib.path(hvdata).stem sitename = _get_sitename(sitename, batch_dict) self.batch_dict[sitename] = hvdata elif pathlib.Path(hvdata).suffix.upper()[1:] in OBSPY_FORMATS: if verbose: print(f"Site specified for inclusion in HVSRBatch has not been processed. Processing. ({hvdata})") sitename = pathlib.Path(hvdata).stem sitename = _get_sitename(sitename, batch_dict) self.batch_dict[sitename] = run(pathlib.Path(hvdata).as_posix()) else: print(f"Could not parse Batch input. Excluding from HVSRBatch object: {hvdata}") elif isinstance(batch_input, dict): # This is for a dictionary with the following structure: # batch_input = {"SiteName1":HVSRData, "Sitename2":HVSRData} self.batch_dict = batch_input elif isinstance(batch_input, HVSRData): # If iniitializing HVSRBatch with single HVSRData self.batch_dict[batch_input['site']] = batch_input elif pathlib.Path(batch_input).exists(): # This is intended for filepaths if pathlib.Path(batch_input).is_dir(): if batch_ext is not None: batchfileglob = pathlib.Path(batch_input).glob("*."+batch_ext) batchfiledict = {} #if 'hvsr' in batch_ext: for hvfile in batchfileglob: currhvfile = import_data(hvfile) batchfiledict[currhvfile['site']] = currhvfile self.batch_dict = self._batch_dict = batchfiledict else: # Assume it is .hvsr file you wish to import batchfileglob = [] batchfiledict = {} batchfileglob = pathlib.Path(batch_input).glob("*") for hvfile in batchfileglob: if hvfile.as_posix().lower().endswith('hvsr'): currhvfile = import_data(hvfile.as_posix()) batchfiledict[currhvfile['site']] = currhvfile self.batch_dict = self._batch_dict = batchfiledict else: if '.hvsr' in pathlib.Path(batch_input).suffix: # In this case, assume this is alreayd a batch file and import/return it return import_data(batch_input) else: # For reading in a csv and specifying column map batch_df = pd.read_csv(batch_input) # Convert columns to lowercase batch_df.columns = [c.lower() for c in batch_df.columns] # This is for if dictionary mapping is not specified snList = ['site', 'sitename', 'sites', 'sitenames', 'identifier', 'batch', 'profile', 'crosssection', 'group'] pathList = ['hvsr_export_path', 'import_filepath', 'batch_input', 'filepath', 'input_data', 'path', 'filepath', 'filename', 'file', 'hvsrdata', 'hvsr', 'data'] siteCol = batch_df.columns[0] for sn in snList: if sn in snList: siteCol = sn break pathCol = batch_df.columns[1] for pa in pathList: if pa in pathList: pathCol = pa break def _read_data_into_batch(batch_df_row, site_col, path_col): if '.hvsr' in str(batch_df_row[path_col]): dataObj = import_data(str(batch_df_row[path_col])) elif pathlib.Path(batch_df_row[path_col]).suffix.upper()[1:] in OBSPY_FORMATS: dataObj = run(pathlib.Path(batch_df_row[path_col]).as_posix()) else: warnings.Warn(f"Batch input specified as site {batch_df_row[site_col]} cannot be read, skipping: {batch_df_row[path_col]}") dataObj = None return dataObj if isinstance(batch_use, dict): # Dictionary of {'site':"site_col", 'filepath':'path_col', 'batch':values_in_batch_col_to_include} if len(list(batch_use.keys())) != 3: warnMsg = f"batch_use dict should have three keys called 'site', 'filepath', and 'batch' (not {len(list(batch_use.keys()))}: {list(batch_use.keys())}). \n\t'batch' may be changed to name of column you are using to specify inclusion in HVSRBatch object, or input DataFrame should have column called 'batch'" warnings.Warn(warnMsg) # Should be site and filepath, but just in case for k in batch_use.keys(): if str(k).lower() in snList: siteCol = batch_use[k] siteKey = k if str(k).lower() in pathList: pathCol = batch_use[k] pathKey = k if str(k).lower() not in snList and str(k).lower() not in pathList: includeMe = batch_use[k] batchKey = k # Get subset df with only rows that we want #includeMe = batchCol#batch_use[batchCol] if isinstance(includeMe, (list, tuple)): sites_df = batch_df[batch_df[batchKey].isin(includeMe)] elif isinstance(includeMe, dict): sitesDFList = [] for batchCol, includeValue in includeMe.items(): sitesDFList.append(batch_df[batch_df[batchCol]==includeValue]) sites_df = pd.concat(sitesDFList, ignore_index=True) else: sites_df = batch_df[batch_df[batchKey]==includeMe] # Import, process, or otherwise read data into batch object for i, row in sites_df.iterrows(): dataObj = _read_data_into_batch(row, siteCol, pathCol) if dataObj is not None: self.batch_dict[str(row[siteCol])] = dataObj elif isinstance(batch_use, (list, tuple)): # This should be list/tuples of site names sites_df = batch_df[batch_df[siteCol].isin(batch_use)] for i, row in sites_df.iterrows(): dataObj = _read_data_into_batch(row, siteCol, pathCol) if dataObj is not None: self.batch_dict[str(row[siteCol])] = dataObj else: # Use all rows (as possible) print(f"**NOTE**: All data specified will be read into batch object, from: {batch_input}") for i, row in batch_df.iterrows(): dataObj = _read_data_into_batch(row, siteCol, pathCol) if dataObj is not None: self.batch_dict[str(row[siteCol])] = dataObj else: raise TypeError(f"The batch_input parameter of the HVSRBatch class must be a dict of parameters, list or tuple of HVSRData obejcts, or an HVSRData object itself. {type(batch_input)}") self._batch_dict = self.batch_dict for sitename, hvsrdata in self.batch_dict.items(): setattr(self, sitename, hvsrdata) self[sitename]['batch'] = True self.sites = list(self.batch_dict.keys()) # METHODS def __to_json(self, filepath): """Not yet implemented, but may allow import/export to json files in the future, rather than just .hvsr pickles Parameters ---------- filepath : filepath object Location to save HVSRBatch object as json """ # open the file with the given filepath with open(filepath, 'w') as f: # dump the JSON string to the file json.dump(self, f, default=lambda o: o.__dict__, sort_keys=True, indent=4) def add(self, hvsr_data): """Function to add HVSRData objects to existing HVSRBatch objects""" if isinstance(hvsr_data, (dict, HVSRData)): hvsr_data = [hvsr_data] if isinstance(hvsr_data, (list, tuple,)): siteNo = 0 zfilldigs = len(str(len(hvsr_data))) for hvdata in hvsr_data: sitename = f"HVSRSite{str(siteNo).zfill(zfilldigs)}" if hasattr(hvdata, 'site'): sitename = hvdata.site elif hasattr(hvdata, 'Table_Report') and 'Site Name' in hvdata.Table_Report.columns: sitename = hvdata.Table_Report['Site Name'][0] elif isinstance(hvdata, dict): if 'site' in hvdata.keys(): sitename = hvdata['site'] self[sitename] = hvsr_data def append(self, hvsr_data): """Alias of add()""" add(self, hvsr_data) def export(self, hvsr_export_path=True, ext='hvsr'): """Method to export HVSRData objects in HVSRBatch container to indivdual .hvsr pickle files. Parameters ---------- hvsr_export_path : filepath, default=True Filepath to save file. Can be either directory (which will assign a filename based on the HVSRData attributes). By default True. If True, it will first try to save each file to the same directory as input_data, then if that does not work, to the current working directory, then to the user's home directory, by default True ext : str, optional The extension to use for the output, by default 'hvsr'. This is still a pickle file that can be read with pickle.load(), but will have .hvsr extension. """ export_hvsr(hvsr_data=self, hvsr_export_path=hvsr_export_path, ext=ext) def keys(self): """Method to return the "keys" of the HVSRBatch object. For HVSRBatch objects, these are the site names. Functions similar to dict.keys(). Returns ------- dict_keys A dict_keys object listing the site names of each of the HVSRData objects contained in the HVSRBatch object """ return self.batch_dict.keys() def items(self): """Method to return both the site names and the HVSRData object as a set of dict_items tuples. Functions similar to dict.items(). Returns ------- _type_ _description_ """ return self.batch_dict.items() def copy(self, type='shallow'): """Make a copy of the HVSRBatch object. Uses python copy module. Parameters ---------- type : str {'shallow', 'deep'} Based on input, creates either a shallow or deep copy of the HVSRBatch object. Shallow is equivalent of copy.copy(). Input of 'deep' is equivalent of copy.deepcopy() (still experimental). Defaults to shallow. """ if type.lower()=='deep': return HVSRBatch(copy.deepcopy(self._batch_dict), df_as_read=self._input_df) else: return HVSRBatch(copy.copy(self._batch_dict), df_as_read=self._input_df) #Method wrapper of sprit.plot_hvsr function def plot(self, **kwargs): """Method to plot data, based on the sprit.plot_hvsr() function. All the same kwargs and default values apply as plot_hvsr(). For return_fig, returns it to the 'Plot_Report' attribute of each HVSRData object Returns ------- _type_ _description_ See Also -------- plot_hvsr """ for sitename in self: if 'return_fig' in kwargs.keys() and kwargs['return_fig']: self[sitename]['Plot_Report'] = plot_hvsr(self[sitename], **kwargs) else: plot_hvsr(self[sitename], **kwargs) return self def get_report(self, **kwargs): """Method to get report from processed data, in print, graphical, or tabular format. Returns ------- Variable May return nothing, pandas.Dataframe, or pyplot Figure, depending on input. See Also -------- get_report """ if 'report_formats' in kwargs.keys(): if 'table' == kwargs['report_formats']: for sitename in self: rowList = [] rowList.append(get_report(self[sitename], **kwargs)) return pd.concat(rowList, ignore_index=True) elif 'plot' == kwargs['report_formats']: plotDict = {} for sitename in self: if 'return_fig' in kwargs.keys() and kwargs['return_fig']: plotDict[sitename] = get_report(self[sitename], **kwargs) else: get_report(self[sitename], **kwargs) return plotDict #Only report_formats left is print, doesn't return anything, so doesn't matter if defalut or not for sitename in self: get_report(self[sitename], **kwargs) return def report(self, **kwargs): """Wrapper of get_report() See Also -------- get_report """ return self.get_report(**kwargs) def export_settings(self, site_name=None, export_settings_path='default', export_settings_type='all', include_location=False, verbose=True): """Method to export settings from HVSRData object in HVSRBatch object. Simply calls sprit.export_settings() from specified HVSRData object in the HVSRBatch object. See sprit.export_settings() for more details. Parameters ---------- site_name : str, default=None The name of the site whose settings should be exported. If None, will default to the first site, by default None. export_settings_path : str, optional Filepath to output file. If left as 'default', will save as the default value in the resources directory. If that is not possible, will save to home directory, by default 'default' export_settings_type : str, {'all', 'instrument', 'processing'}, optional They type of settings to save, by default 'all' include_location : bool, optional Whether to include the location information in the instrument settings, if that settings type is selected, by default False verbose : bool, optional Whether to print output (filepath and settings) to terminal, by default True See Also -------- export_settings """ #If no site name selected, use first site if site_name is None: site_name = self.sites[0] export_settings(hvsr_data=self[site_name], export_settings_path=export_settings_path, export_settings_type=export_settings_type, include_location=include_location, verbose=verbose) def __iter__(self): return iter(self._batch_dict.keys()) def __setitem__(self, key, value): setattr(self, key, value) def __getitem__(self, key): return getattr(self, key)HVSRBatch is the data container used for batch processing. It contains several HVSRData objects (one for each site). These can be accessed using their site name, either square brackets (HVSRBatchVariable["SiteName"]) or the dot (HVSRBatchVariable.SiteName) accessor.
The dot accessor may not work if there is a space in the site name.
All of the functions in the sprit package are designed to perform the bulk of their operations iteratively on the individual HVSRData objects contained in the HVSRBatch object, and do little with the HVSRBatch object itself, besides using it determine which sites are contained within it.
Methods
def add(self, hvsr_data)-
Expand source code
def add(self, hvsr_data): """Function to add HVSRData objects to existing HVSRBatch objects""" if isinstance(hvsr_data, (dict, HVSRData)): hvsr_data = [hvsr_data] if isinstance(hvsr_data, (list, tuple,)): siteNo = 0 zfilldigs = len(str(len(hvsr_data))) for hvdata in hvsr_data: sitename = f"HVSRSite{str(siteNo).zfill(zfilldigs)}" if hasattr(hvdata, 'site'): sitename = hvdata.site elif hasattr(hvdata, 'Table_Report') and 'Site Name' in hvdata.Table_Report.columns: sitename = hvdata.Table_Report['Site Name'][0] elif isinstance(hvdata, dict): if 'site' in hvdata.keys(): sitename = hvdata['site'] self[sitename] = hvsr_dataFunction to add HVSRData objects to existing HVSRBatch objects
def append(self, hvsr_data)-
Expand source code
def append(self, hvsr_data): """Alias of add()""" add(self, hvsr_data)Alias of add()
def copy(self, type='shallow')-
Expand source code
def copy(self, type='shallow'): """Make a copy of the HVSRBatch object. Uses python copy module. Parameters ---------- type : str {'shallow', 'deep'} Based on input, creates either a shallow or deep copy of the HVSRBatch object. Shallow is equivalent of copy.copy(). Input of 'deep' is equivalent of copy.deepcopy() (still experimental). Defaults to shallow. """ if type.lower()=='deep': return HVSRBatch(copy.deepcopy(self._batch_dict), df_as_read=self._input_df) else: return HVSRBatch(copy.copy(self._batch_dict), df_as_read=self._input_df)Make a copy of the HVSRBatch object. Uses python copy module.
Parameters
type:str {'shallow', 'deep'}- Based on input, creates either a shallow or deep copy of the HVSRBatch object. Shallow is equivalent of copy.copy(). Input of 'deep' is equivalent of copy.deepcopy() (still experimental). Defaults to shallow.
def export(self, hvsr_export_path=True, ext='hvsr')-
Expand source code
def export(self, hvsr_export_path=True, ext='hvsr'): """Method to export HVSRData objects in HVSRBatch container to indivdual .hvsr pickle files. Parameters ---------- hvsr_export_path : filepath, default=True Filepath to save file. Can be either directory (which will assign a filename based on the HVSRData attributes). By default True. If True, it will first try to save each file to the same directory as input_data, then if that does not work, to the current working directory, then to the user's home directory, by default True ext : str, optional The extension to use for the output, by default 'hvsr'. This is still a pickle file that can be read with pickle.load(), but will have .hvsr extension. """ export_hvsr(hvsr_data=self, hvsr_export_path=hvsr_export_path, ext=ext)Method to export HVSRData objects in HVSRBatch container to indivdual .hvsr pickle files.
Parameters
hvsr_export_path:filepath, default=True- Filepath to save file. Can be either directory (which will assign a filename based on the HVSRData attributes). By default True. If True, it will first try to save each file to the same directory as input_data, then if that does not work, to the current working directory, then to the user's home directory, by default True
ext:str, optional- The extension to use for the output, by default 'hvsr'. This is still a pickle file that can be read with pickle.load(), but will have .hvsr extension.
def export_settings(self,
site_name=None,
export_settings_path='default',
export_settings_type='all',
include_location=False,
verbose=True)-
Expand source code
def export_settings(self, site_name=None, export_settings_path='default', export_settings_type='all', include_location=False, verbose=True): """Method to export settings from HVSRData object in HVSRBatch object. Simply calls sprit.export_settings() from specified HVSRData object in the HVSRBatch object. See sprit.export_settings() for more details. Parameters ---------- site_name : str, default=None The name of the site whose settings should be exported. If None, will default to the first site, by default None. export_settings_path : str, optional Filepath to output file. If left as 'default', will save as the default value in the resources directory. If that is not possible, will save to home directory, by default 'default' export_settings_type : str, {'all', 'instrument', 'processing'}, optional They type of settings to save, by default 'all' include_location : bool, optional Whether to include the location information in the instrument settings, if that settings type is selected, by default False verbose : bool, optional Whether to print output (filepath and settings) to terminal, by default True See Also -------- export_settings """ #If no site name selected, use first site if site_name is None: site_name = self.sites[0] export_settings(hvsr_data=self[site_name], export_settings_path=export_settings_path, export_settings_type=export_settings_type, include_location=include_location, verbose=verbose)Method to export settings from HVSRData object in HVSRBatch object.
Simply calls sprit.export_settings() from specified HVSRData object in the HVSRBatch object. See sprit.export_settings() for more details.
Parameters
site_name:str, default=None- The name of the site whose settings should be exported. If None, will default to the first site, by default None.
export_settings_path:str, optional- Filepath to output file. If left as 'default', will save as the default value in the resources directory. If that is not possible, will save to home directory, by default 'default'
export_settings_type:str, {'all', 'instrument', 'processing'}, optional- They type of settings to save, by default 'all'
include_location:bool, optional- Whether to include the location information in the instrument settings, if that settings type is selected, by default False
verbose:bool, optional- Whether to print output (filepath and settings) to terminal, by default True
See Also
def get_report(self, **kwargs)-
Expand source code
def get_report(self, **kwargs): """Method to get report from processed data, in print, graphical, or tabular format. Returns ------- Variable May return nothing, pandas.Dataframe, or pyplot Figure, depending on input. See Also -------- get_report """ if 'report_formats' in kwargs.keys(): if 'table' == kwargs['report_formats']: for sitename in self: rowList = [] rowList.append(get_report(self[sitename], **kwargs)) return pd.concat(rowList, ignore_index=True) elif 'plot' == kwargs['report_formats']: plotDict = {} for sitename in self: if 'return_fig' in kwargs.keys() and kwargs['return_fig']: plotDict[sitename] = get_report(self[sitename], **kwargs) else: get_report(self[sitename], **kwargs) return plotDict #Only report_formats left is print, doesn't return anything, so doesn't matter if defalut or not for sitename in self: get_report(self[sitename], **kwargs) returnMethod to get report from processed data, in print, graphical, or tabular format.
Returns
Variable- May return nothing, pandas.Dataframe, or pyplot Figure, depending on input.
See Also
def items(self)-
Expand source code
def items(self): """Method to return both the site names and the HVSRData object as a set of dict_items tuples. Functions similar to dict.items(). Returns ------- _type_ _description_ """ return self.batch_dict.items()Method to return both the site names and the HVSRData object as a set of dict_items tuples. Functions similar to dict.items().
Returns
_type_- description
def keys(self)-
Expand source code
def keys(self): """Method to return the "keys" of the HVSRBatch object. For HVSRBatch objects, these are the site names. Functions similar to dict.keys(). Returns ------- dict_keys A dict_keys object listing the site names of each of the HVSRData objects contained in the HVSRBatch object """ return self.batch_dict.keys()Method to return the "keys" of the HVSRBatch object. For HVSRBatch objects, these are the site names. Functions similar to dict.keys().
Returns
dict_keys- A dict_keys object listing the site names of each of the HVSRData objects contained in the HVSRBatch object
def plot(self, **kwargs)-
Expand source code
def plot(self, **kwargs): """Method to plot data, based on the sprit.plot_hvsr() function. All the same kwargs and default values apply as plot_hvsr(). For return_fig, returns it to the 'Plot_Report' attribute of each HVSRData object Returns ------- _type_ _description_ See Also -------- plot_hvsr """ for sitename in self: if 'return_fig' in kwargs.keys() and kwargs['return_fig']: self[sitename]['Plot_Report'] = plot_hvsr(self[sitename], **kwargs) else: plot_hvsr(self[sitename], **kwargs) return selfMethod to plot data, based on the sprit.plot_hvsr() function.
All the same kwargs and default values apply as plot_hvsr(). For return_fig, returns it to the 'Plot_Report' attribute of each HVSRData object
Returns
_type_- description
See Also
def report(self, **kwargs)-
Expand source code
def report(self, **kwargs): """Wrapper of get_report() See Also -------- get_report """ return self.get_report(**kwargs)
class HVSRData (*args, **kwargs)-
Expand source code
class HVSRData: """HVSRData is the basic data class of the sprit package. It contains all the processed data, input parameters, and reports. These attributes and objects can be accessed using square brackets or the dot accessor. For example, to access the site name, HVSRData['site'] and HVSRData.site will both return the site name. Some of the methods that work on the HVSRData object (e.g., .plot() and .get_report()) are essentially wrappers for some of the main sprit package functions (sprit.plot_hvsr() and sprit.get_report(), respectively) """ @check_instance def __init__(self, params): self.params = params self.batch = False #self.tsteps_used = [] for key, value in params.items(): setattr(self, key, value) if key == 'input_params': for k, v in params[key].items(): setattr(self, k, v) self.processing_status = {'input_params_status': None, 'fetch_data_status': None, 'calculate_azimuths_status': None, 'remove_noise_status': None, 'generate_psds_status': None, 'process_hvsr_status': None, 'remove_outlier_curves_status': None, 'overall_status': False} def __setitem__(self, key, value): setattr(self, key, value) def __getitem__(self, key): return getattr(self, key) def __str__(self): attrsToUse = ['project', 'site', 'instrument', 'network', 'station', 'location', 'channels', 'acq_date', 'starttime', 'endtime', 'xcoord', 'ycoord', 'input_crs', 'elevation', 'elev_unit', ] if not all([atu in self.keys() for atu in attrsToUse]): return 'String representation cannot be generated. Object not instatianted correctly using sprit.input_params()' def __get_ip_default(parameter): if parameter in inspect.signature(input_params).parameters: return inspect.signature(input_params).parameters[parameter].default elif parameter in params: return params[parameter] else: return parameter # Get title lines formatted if self.project == __get_ip_default('project'): projStr = 'No project specified' else: projStr = self.project hvsrIDStr = '' if hasattr(self, 'hvsr_id'): hvsrIDStr = self.hvsr_id elif 'hvsr_id' in params: hvsrIDStr = params['hvsr_id'] titleInfoStr =f"\nSpRIT HVSR DATA INFORMATION\n" titleLen = len(titleInfoStr) bigLineBreak = "—"*titleLen+ '\n' titleInfoStr += bigLineBreak titleInfoStr += f"Site Name: {self.site}\nProject: ({projStr})\n" titleInfoStr = f"{titleInfoStr}HVSRID (autogenerated): {hvsrIDStr}\n" titleInfoStr += bigLineBreak # Acquisition instrument information instInfoStr = "\n\nINSTRUMENT INFO\n" instInfoStr += '-'*(len(instInfoStr)-3) + '\n' instStr = f"Instrument in use: {self.instrument}" if self.instrument == __get_ip_default('instrument'): instStr = 'No instrument type specified' netStr = self.network staStr = self.station locStr = self.location chaStr = self.channels if chaStr == __get_ip_default('channels'): chaStr = f'No channels specified (using {chaStr})' acqInstStr = instInfoStr acqInstStr += f"{instStr}" acqInstStr += f"\n\tInstrument ID: {netStr}.{staStr}.{locStr}" acqInstStr += f"\n\t\tChannels: {chaStr}" # Acquisition site information xcoordINStr = self.xcoord_input xcoordStr = self.xcoord lonStr = self.longitude ycoordINstr = self.ycoord_input ycoordStr = self.ycoord latStr = self.latitude inCRSStr = self.input_crs outCRSStr = self.output_crs inputLocStr = f"{xcoordINStr}, {ycoordINstr} (as input in {inCRSStr})\n" transLocStr = '' if inCRSStr != outCRSStr: transLocStr = f"{xcoordStr}, {ycoordstr} (transformed to output_crs: {outCRSStr})\n" wgs84Str = f"{lonStr:.5f}°, {latStr:.5f}° | Lon/Lat in WGS84 (EPSG:4326)" siteLocInfoStr = "\n\nSITE INFO\n" siteLocInfoStr += '-'*(len(siteLocInfoStr)-3) + '\n' siteLocInfoStr += inputLocStr + transLocStr + wgs84Str # Acquistion time information acqTimeStr = "\n\nACQUISITION TIME\n" acqTimeStr += '-'*(len(acqTimeStr)-3) + '\n' aDateStr = self.acq_date sTimeStr = self.starttime eTimeStr = self.endtime if hasattr(self, 'stream'): dataST = self.stream utcSTime = dataST[0].stats.starttime utcETime = dataST[0].stats.endtime else: utcSTime = self.starttime utcETime = self.endtime minDur = int(str((utcETime - utcSTime)//60).split('.')[0]) secDur = float(round((((utcETime - utcSTime) / 60) - int(minDur)) * 60, 3)) if secDur >= 60: minDur += int(secDur//60) secDur = secDur - (secDur//60)*60 acqDurStr = f'Record duration: {minDur}:{secDur:06.3f} ({utcETime-utcSTime} seconds)' if aDateStr == __get_ip_default('acq_date') and sTimeStr == __get_ip_default('starttime'): acqTimeStr += 'No acquisition time specified.\n' else: acqTimeStr += f"Acquisition Date: {aDateStr}\n" acqTimeStr += f"\tStarted at: {sTimeStr}\n" acqTimeStr += f"\tEnded at : {eTimeStr}\n" acqTimeStr += acqDurStr # PEAK INFORMATION (IF CALCULATED) peakInfoStr = '' azimuth='HV' if 'BestPeak' in self.keys(): curvTestsPassed = (self['BestPeak'][azimuth]['PassList']['WinLen'] + self['BestPeak'][azimuth]['PassList']['SigCycles']+ self['BestPeak'][azimuth]['PassList']['LowCurveStD']) curvePass = curvTestsPassed > 2 #Peak Pass? peakTestsPassed = ( self['BestPeak'][azimuth]['PassList']['ProminenceLow'] + self['BestPeak'][azimuth]['PassList']['ProminenceHi']+ self['BestPeak'][azimuth]['PassList']['AmpClarity']+ self['BestPeak'][azimuth]['PassList']['FreqStability']+ self['BestPeak'][azimuth]['PassList']['LowStDev_Freq']+ self['BestPeak'][azimuth]['PassList']['LowStDev_Amp']) peakPass = peakTestsPassed >= 5 peakInfoStr = "\nCALCULATED F₀\n" peakInfoStr += "-"*(len(peakInfoStr) - 3) + '\n' peakInfoStr += '{0:.3f} Hz ± {1:.4f} Hz'.format(self['BestPeak'][azimuth]['f0'], float(self["BestPeak"][azimuth]['Sf'])) if curvePass and peakPass: peakInfoStr += '\n\t {} Peak at {} Hz passed SESAME quality tests! :D'.format(sprit_utils._check_mark(), round(self['BestPeak'][azimuth]['f0'],3)) else: peakInfoStr += '\n\t {} Peak at {} Hz did NOT pass SESAME quality tests :('.format(sprit_utils._x_mark(), round(self['BestPeak'][azimuth]['f0'],3)) else: peakInfoStr = 'F₀ not Calculated' printList = [ titleInfoStr, peakInfoStr, acqInstStr, siteLocInfoStr, acqTimeStr ] strRep = '' for ps in printList: strRep += ps return strRep #try: # Check if running in IPython environment # from IPython.display import display, HTML # return f"<b>Person Information:</b><br>Name: {self.name}<br>Age: {self.age}" #except ImportError: # Fallback for terminal/console # return f"Person Information:\nName: {self.name}\nAge: {self.age}" def __repr__(self): return self.__str__() # METHODS (many reflect dictionary methods) def to_json(self, json_filepath=None, export_json=True, return_json=False, **kwargs): """Not yet supported, will export HVSRData object to json""" class_keys_to_convert = (datetime.date, obspy.UTCDateTime, datetime.time, CRS, obspy.Inventory) def iterative_json_parser(input_attrib=self, level=0): outValue = input_attrib if isinstance(input_attrib, dict): # simplified condition for demo # if isinstance(input_attrib, (dict, sprit.HVSRData)): # use this line instead outValue = {} level += 1 for i, (key, value) in enumerate(input_attrib.items()): outKey = key print(level, "".join([' ']*level), outKey) if not isinstance(outKey, (str, int, float, bool, type(None))): outKey = str(outKey) # Recursively process the value processed_value = iterative_json_parser(value, level) # Apply string conversion if needed if isinstance(processed_value, class_keys_to_convert): processed_value = str(processed_value) outValue[outKey] = processed_value return outValue elif isinstance(input_attrib, list): outValue = [] for item in input_attrib: if isinstance(item, np.ndarray): outValue.append(item.tolist()) else: # Recursively process list items outValue.append(iterative_json_parser(item, level)) return outValue elif isinstance(input_attrib, np.ndarray): outValue = input_attrib.tolist() return outValue elif isinstance(input_attrib, pd.DataFrame): # Convert DataFrame to dict, but then recursively process it dict_value = input_attrib.to_dict() return iterative_json_parser(dict_value, level) elif isinstance(input_attrib, class_keys_to_convert): return str(input_attrib) else: return input_attrib sKeys = True if 'sort_keys' in kwargs: sKeys = kwargs['sort_keys'] del kwargs['sort_keys'] indent = 4 if 'indent' in kwargs: indent = kwargs['indent'] del kwargs['indent'] if export_json and json_filepath is not None: with open(json_filepath, 'w') as f: # dump the JSON string to the file json.dump(self, fp=f, default=iterative_json_parser, sort_keys=True, indent=indent, **kwargs) if return_json or json_filepath is None: return json.dumps(self, default=iterative_json_parser, sort_keys=True, indent=indent, **kwargs) def export(self, **kwargs): """Method to export HVSRData objects to .hvsr pickle files. Parameters ---------- hvsr_export_path : filepath, default=True Filepath to save file. Can be either directory (which will assign a filename based on the HVSRData attributes). By default True. If True, it will first try to save each file to the same directory as input_data, then if that does not work, to the current working directory, then to the user's home directory, by default True ext : str, optional The extension to use for the output, by default 'hvsr'. This is still a pickle file that can be read with pickle.load(), but will have .hvsr extension. See Also -------- export_hvsr """ if 'hvsr_data' in kwargs: del kwargs['hvsr_data'] export_hvsr(hvsr_data=self, **kwargs) def copy(self, copy_type='shallow'): """Make a copy of the HVSRData object. Uses python copy module. Parameters ---------- copy_type : str {'shallow', 'deep'} Based on input, creates either a shallow or deep copy of the HVSRData object. Shallow is equivalent of copy.copy(). Input of copy_type='deep' is equivalent of copy.deepcopy() (still experimental). Defaults to shallow. """ if copy_type.lower() == 'deep': return copy.deepcopy(self) else: return HVSRData(copy.copy(self.params)) def export_settings(self, export_settings_path='default', export_settings_type='all', include_location=False, verbose=True): """Method to export settings from HVSRData object. Simply calls sprit.export_settings() from the HVSRData object. See sprit.export_settings() for more details. Parameters ---------- export_settings_path : str, optional Filepath to output file. If left as 'default', will save as the default value in the resources directory. If that is not possible, will save to home directory, by default 'default' export_settings_type : str, {'all', 'instrument', 'processing'}, optional They type of settings to save, by default 'all' include_location : bool, optional Whether to include the location information in the instrument settings, if that settings type is selected, by default False verbose : bool, optional Whether to print output (filepath and settings) to terminal, by default True """ export_settings(hvsr_data=self, export_settings_path=export_settings_path, export_settings_type=export_settings_type, include_location=include_location, verbose=verbose) def get_report(self, **kwargs): """Method to get report from processed data, in print, graphical, or tabular format. Returns ------- Variable May return nothing, pandas.Dataframe, or pyplot Figure, depending on input. See Also -------- get_report """ report_return = get_report(hvsr_results=self, **kwargs) return report_return def items(self): """Method to return the "items" of the HVSRData object. For HVSRData objects, this is a dict_items object with the keys and values in tuples. Functions similar to dict.items(). Returns ------- dict_items A dict_items object of the HVSRData objects attributes, parameters, etc. """ return self.params.items() def keys(self): """Method to return the "keys" of the HVSRData object. For HVSRData objects, these are the attributes and parameters of the object. Functions similar to dict.keys(). Returns ------- dict_keys A dict_keys object of the HVSRData objects attributes, parameters, etc. """ keyList = [] for k in dir(self): if not k.startswith('_'): keyList.append(k) return keyList def plot(self, **kwargs): """Method to plot data, wrapper of sprit.plot_hvsr() Returns ------- matplotlib.Figure, matplotlib.Axis (if return_fig=True) See Also -------- plot_hvsr plot_azimuth """ if 'close_figs' not in kwargs.keys(): kwargs['close_figs']=True plot_return = plot_hvsr(self, **kwargs) plt.show() return plot_return def report(self, **kwargs): """Wrapper of get_report() See Also -------- get_report """ report_return = get_report(hvsr_results=self, **kwargs) return report_return def select(self, **kwargs): """Wrapper for obspy select method on 'stream' attribute of HVSRData object""" if hasattr(self, 'stream'): stream = self['stream'].select(**kwargs) return stream else: warnings.Warn("HVSRData.select() method applied, but 'stream' attribute (obspy.Stream object) not found") # ATTRIBUTES @property def params(self): """Dictionary containing the parameters used to process the data Returns ------- dict Dictionary containing the process parameters """ return self._params @params.setter def params(self, value): if not (isinstance(value, dict)): raise ValueError("params must be a dict type, currently passing {} type.".format(type(value))) self._params = value # batch @property def batch(self): """Whether this HVSRData object is part of an HVSRBatch object. This is used throughout the code to help direct the object into the proper processing pipeline. Returns ------- bool True if HVSRData object is part of HVSRBatch object, otherwise, False """ return self._batch @batch.setter def batch(self, value): if value == 0: value = False elif value == 1: value = True else: value = None if not isinstance(value, bool): raise ValueError("batch must be boolean type") self._batch = value #PPSD object from obspy (static) @property def ppsds_obspy(self): """The original ppsd information from the obspy.signal.spectral_estimation.PPSD(), so as to keep original if copy is manipulated/changed.""" return self._ppsds_obspy @ppsds_obspy.setter def ppsds_obspy(self, value): """Checks whether the ppsd_obspy is of the proper type before saving as attribute""" if not isinstance(value, obspy.signal.spectral_estimation.PPSD): if not isinstance(value, dict): raise ValueError("ppsds_obspy must be obspy.PPSD or dict with osbpy.PPSDs") else: for key in value.keys(): if not isinstance(value[key], obspy.signal.spectral_estimation.PPSD): raise ValueError("ppsds_obspy must be obspy.PPSD or dict with osbpy.PPSDs") self._ppsds_obspy=value #PPSD dict, copied from obspy ppsds (dynamic) @property def ppsds(self): """Dictionary copy of the class object obspy.signal.spectral_estimation.PPSD(). The dictionary copy allows manipulation of the data in PPSD, whereas that data cannot be easily manipulated in the original Obspy object. Returns ------- dict Dictionary copy of the PPSD information from generate_psds() """ return self._ppsds @ppsds.setter def ppsds(self, value): if not isinstance(value, dict): raise ValueError("ppsds dict with infomration from osbpy.PPSD (created by sprit.generate_psds())") self._ppsds=valueHVSRData is the basic data class of the sprit package. It contains all the processed data, input parameters, and reports.
These attributes and objects can be accessed using square brackets or the dot accessor. For example, to access the site name, HVSRData['site'] and HVSRData.site will both return the site name.
Some of the methods that work on the HVSRData object (e.g., .plot() and .get_report()) are essentially wrappers for some of the main sprit package functions (sprit.plot_hvsr() and sprit.get_report(), respectively)
Instance variables
prop batch-
Expand source code
@property def batch(self): """Whether this HVSRData object is part of an HVSRBatch object. This is used throughout the code to help direct the object into the proper processing pipeline. Returns ------- bool True if HVSRData object is part of HVSRBatch object, otherwise, False """ return self._batchWhether this HVSRData object is part of an HVSRBatch object. This is used throughout the code to help direct the object into the proper processing pipeline.
Returns
bool- True if HVSRData object is part of HVSRBatch object, otherwise, False
prop params-
Expand source code
@property def params(self): """Dictionary containing the parameters used to process the data Returns ------- dict Dictionary containing the process parameters """ return self._paramsDictionary containing the parameters used to process the data
Returns
dict- Dictionary containing the process parameters
prop ppsds-
Expand source code
@property def ppsds(self): """Dictionary copy of the class object obspy.signal.spectral_estimation.PPSD(). The dictionary copy allows manipulation of the data in PPSD, whereas that data cannot be easily manipulated in the original Obspy object. Returns ------- dict Dictionary copy of the PPSD information from generate_psds() """ return self._ppsdsDictionary copy of the class object obspy.signal.spectral_estimation.PPSD(). The dictionary copy allows manipulation of the data in PPSD, whereas that data cannot be easily manipulated in the original Obspy object.
Returns
dict- Dictionary copy of the PPSD information from generate_psds()
prop ppsds_obspy-
Expand source code
@property def ppsds_obspy(self): """The original ppsd information from the obspy.signal.spectral_estimation.PPSD(), so as to keep original if copy is manipulated/changed.""" return self._ppsds_obspyThe original ppsd information from the obspy.signal.spectral_estimation.PPSD(), so as to keep original if copy is manipulated/changed.
Methods
def copy(self, copy_type='shallow')-
Expand source code
def copy(self, copy_type='shallow'): """Make a copy of the HVSRData object. Uses python copy module. Parameters ---------- copy_type : str {'shallow', 'deep'} Based on input, creates either a shallow or deep copy of the HVSRData object. Shallow is equivalent of copy.copy(). Input of copy_type='deep' is equivalent of copy.deepcopy() (still experimental). Defaults to shallow. """ if copy_type.lower() == 'deep': return copy.deepcopy(self) else: return HVSRData(copy.copy(self.params))Make a copy of the HVSRData object. Uses python copy module.
Parameters
copy_type:str {'shallow', 'deep'}- Based on input, creates either a shallow or deep copy of the HVSRData object. Shallow is equivalent of copy.copy(). Input of copy_type='deep' is equivalent of copy.deepcopy() (still experimental). Defaults to shallow.
def export(self, **kwargs)-
Expand source code
def export(self, **kwargs): """Method to export HVSRData objects to .hvsr pickle files. Parameters ---------- hvsr_export_path : filepath, default=True Filepath to save file. Can be either directory (which will assign a filename based on the HVSRData attributes). By default True. If True, it will first try to save each file to the same directory as input_data, then if that does not work, to the current working directory, then to the user's home directory, by default True ext : str, optional The extension to use for the output, by default 'hvsr'. This is still a pickle file that can be read with pickle.load(), but will have .hvsr extension. See Also -------- export_hvsr """ if 'hvsr_data' in kwargs: del kwargs['hvsr_data'] export_hvsr(hvsr_data=self, **kwargs)Method to export HVSRData objects to .hvsr pickle files.
Parameters
hvsr_export_path:filepath, default=True- Filepath to save file. Can be either directory (which will assign a filename based on the HVSRData attributes). By default True. If True, it will first try to save each file to the same directory as input_data, then if that does not work, to the current working directory, then to the user's home directory, by default True
ext:str, optional- The extension to use for the output, by default 'hvsr'. This is still a pickle file that can be read with pickle.load(), but will have .hvsr extension.
See Also
def export_settings(self,
export_settings_path='default',
export_settings_type='all',
include_location=False,
verbose=True)-
Expand source code
def export_settings(self, export_settings_path='default', export_settings_type='all', include_location=False, verbose=True): """Method to export settings from HVSRData object. Simply calls sprit.export_settings() from the HVSRData object. See sprit.export_settings() for more details. Parameters ---------- export_settings_path : str, optional Filepath to output file. If left as 'default', will save as the default value in the resources directory. If that is not possible, will save to home directory, by default 'default' export_settings_type : str, {'all', 'instrument', 'processing'}, optional They type of settings to save, by default 'all' include_location : bool, optional Whether to include the location information in the instrument settings, if that settings type is selected, by default False verbose : bool, optional Whether to print output (filepath and settings) to terminal, by default True """ export_settings(hvsr_data=self, export_settings_path=export_settings_path, export_settings_type=export_settings_type, include_location=include_location, verbose=verbose)Method to export settings from HVSRData object. Simply calls sprit.export_settings() from the HVSRData object. See sprit.export_settings() for more details.
Parameters
export_settings_path:str, optional- Filepath to output file. If left as 'default', will save as the default value in the resources directory. If that is not possible, will save to home directory, by default 'default'
export_settings_type:str, {'all', 'instrument', 'processing'}, optional- They type of settings to save, by default 'all'
include_location:bool, optional- Whether to include the location information in the instrument settings, if that settings type is selected, by default False
verbose:bool, optional- Whether to print output (filepath and settings) to terminal, by default True
def get_report(self, **kwargs)-
Expand source code
def get_report(self, **kwargs): """Method to get report from processed data, in print, graphical, or tabular format. Returns ------- Variable May return nothing, pandas.Dataframe, or pyplot Figure, depending on input. See Also -------- get_report """ report_return = get_report(hvsr_results=self, **kwargs) return report_returnMethod to get report from processed data, in print, graphical, or tabular format.
Returns
Variable- May return nothing, pandas.Dataframe, or pyplot Figure, depending on input.
See Also
def items(self)-
Expand source code
def items(self): """Method to return the "items" of the HVSRData object. For HVSRData objects, this is a dict_items object with the keys and values in tuples. Functions similar to dict.items(). Returns ------- dict_items A dict_items object of the HVSRData objects attributes, parameters, etc. """ return self.params.items()Method to return the "items" of the HVSRData object. For HVSRData objects, this is a dict_items object with the keys and values in tuples. Functions similar to dict.items().
Returns
dict_items- A dict_items object of the HVSRData objects attributes, parameters, etc.
def keys(self)-
Expand source code
def keys(self): """Method to return the "keys" of the HVSRData object. For HVSRData objects, these are the attributes and parameters of the object. Functions similar to dict.keys(). Returns ------- dict_keys A dict_keys object of the HVSRData objects attributes, parameters, etc. """ keyList = [] for k in dir(self): if not k.startswith('_'): keyList.append(k) return keyListMethod to return the "keys" of the HVSRData object. For HVSRData objects, these are the attributes and parameters of the object. Functions similar to dict.keys().
Returns
dict_keys- A dict_keys object of the HVSRData objects attributes, parameters, etc.
def plot(self, **kwargs)-
Expand source code
def plot(self, **kwargs): """Method to plot data, wrapper of sprit.plot_hvsr() Returns ------- matplotlib.Figure, matplotlib.Axis (if return_fig=True) See Also -------- plot_hvsr plot_azimuth """ if 'close_figs' not in kwargs.keys(): kwargs['close_figs']=True plot_return = plot_hvsr(self, **kwargs) plt.show() return plot_returnMethod to plot data, wrapper of sprit.plot_hvsr()
Returns
matplotlib.Figure, matplotlib.Axis (if return_fig=True)
See Also
def report(self, **kwargs)-
Expand source code
def report(self, **kwargs): """Wrapper of get_report() See Also -------- get_report """ report_return = get_report(hvsr_results=self, **kwargs) return report_return def select(self, **kwargs)-
Expand source code
def select(self, **kwargs): """Wrapper for obspy select method on 'stream' attribute of HVSRData object""" if hasattr(self, 'stream'): stream = self['stream'].select(**kwargs) return stream else: warnings.Warn("HVSRData.select() method applied, but 'stream' attribute (obspy.Stream object) not found")Wrapper for obspy select method on 'stream' attribute of HVSRData object
def to_json(self, json_filepath=None, export_json=True, return_json=False, **kwargs)-
Expand source code
def to_json(self, json_filepath=None, export_json=True, return_json=False, **kwargs): """Not yet supported, will export HVSRData object to json""" class_keys_to_convert = (datetime.date, obspy.UTCDateTime, datetime.time, CRS, obspy.Inventory) def iterative_json_parser(input_attrib=self, level=0): outValue = input_attrib if isinstance(input_attrib, dict): # simplified condition for demo # if isinstance(input_attrib, (dict, sprit.HVSRData)): # use this line instead outValue = {} level += 1 for i, (key, value) in enumerate(input_attrib.items()): outKey = key print(level, "".join([' ']*level), outKey) if not isinstance(outKey, (str, int, float, bool, type(None))): outKey = str(outKey) # Recursively process the value processed_value = iterative_json_parser(value, level) # Apply string conversion if needed if isinstance(processed_value, class_keys_to_convert): processed_value = str(processed_value) outValue[outKey] = processed_value return outValue elif isinstance(input_attrib, list): outValue = [] for item in input_attrib: if isinstance(item, np.ndarray): outValue.append(item.tolist()) else: # Recursively process list items outValue.append(iterative_json_parser(item, level)) return outValue elif isinstance(input_attrib, np.ndarray): outValue = input_attrib.tolist() return outValue elif isinstance(input_attrib, pd.DataFrame): # Convert DataFrame to dict, but then recursively process it dict_value = input_attrib.to_dict() return iterative_json_parser(dict_value, level) elif isinstance(input_attrib, class_keys_to_convert): return str(input_attrib) else: return input_attrib sKeys = True if 'sort_keys' in kwargs: sKeys = kwargs['sort_keys'] del kwargs['sort_keys'] indent = 4 if 'indent' in kwargs: indent = kwargs['indent'] del kwargs['indent'] if export_json and json_filepath is not None: with open(json_filepath, 'w') as f: # dump the JSON string to the file json.dump(self, fp=f, default=iterative_json_parser, sort_keys=True, indent=indent, **kwargs) if return_json or json_filepath is None: return json.dumps(self, default=iterative_json_parser, sort_keys=True, indent=indent, **kwargs)Not yet supported, will export HVSRData object to json