import os import numpy as np import scipy.io import glob import sys from datetime import datetime, timezone from netCDF4 import Dataset from collections import Counter ############################################################################### # This file contains the following functions: # write_output() # find_last_time() ############################################################################### def write_output(vip, globatt, xret, dindices, prior, fsample, exectime, nfilename, shour, verbose): success = 0 # If fsample is zero, then we will create the netCDF file if fsample == 0: dt = datetime.utcfromtimestamp(xret['secs']) hh = datetime.utcfromtimestamp(xret['secs']).hour nn = datetime.utcfromtimestamp(xret['secs']).minute ss = datetime.utcfromtimestamp(xret['secs']).second hms = hh*10000 + nn*100 + ss nfilename = vip['output_path'] + '/' + vip['output_rootname'] + '.' + dt.strftime('%Y%m%d.%H%M%S') + '.nc' if ((os.path.exists(nfilename)) and (vip['output_clobber'] == 0)): print('Error: output file exists -- aborting (' + nfilename + ')') return success, nfilename elif os.path.exists(nfilename): print('Warning: clobbering existing output file (' + nfilename + ')') fid = Dataset(nfilename, 'w') tdim = fid.createDimension('time', None) nht = len(xret['z']) hdim = fid.createDimension('height',nht) ddim = fid.createDimension('dfs', len(xret['dfs'])) base_time = fid.createVariable('base_time', 'i4') base_time.long_name = 'Epoch time' base_time.units = 's since 1970/01/01 00:00:00 UTC' time_offset = fid.createVariable('time_offset', 'f8', ('time',)) time_offset.long_name = 'Time offset from base_time' time_offset.units = 's' hour = fid.createVariable('hour', 'f8', ('time',)) hour.long_name = 'Time' hour.units = 'Hours from 00:00 UTC' qc_flag = fid.createVariable('qc_flags', 'i2', ('time',)) qc_flag.long_name = 'Manual QC flag' qc_flag.units = 'unitless' qc_flag.comment = 'value of 0 implies quality is ok; non-zero values indicate that the sample has suspect quality' qc_flag.value_2 = 'Implies retrieval did not converge' qc_flag.value_3 = 'Implies retrieval converged but RMS between the observed and computed spectrum is too large' qc_flag.RMS_threshold_used_for_QC = str(vip['qc_rms_value']) height = fid.createVariable('height', 'f4', ('height',)) height.long_name = 'height' height.units = 'km AGL' u_wind = fid.createVariable('u_wind', 'f4', ('time','height',)) u_wind.long_name = 'U-component of wind' u_wind.units = 'm/s' v_wind = fid.createVariable('v_wind', 'f4', ('time','height',)) v_wind.long_name = 'V-component of wind' v_wind.units = 'm/s' w_wind = fid.createVariable('w_wind', 'f4', ('time','height',)) w_wind.long_name = 'W-component of wind' w_wind.units = 'm/s' sigmaU = fid.createVariable('sigma_u', 'f4', ('time','height',)) sigmaU.long_name = '1-sigma uncertainty in U-component of wind' sigmaU.units = 'm/s' sigmaV = fid.createVariable('sigma_v', 'f4', ('time','height',)) sigmaV.long_name = '1-sigma uncertainty in V-component of wind' sigmaV.units = 'm/s' sigmaW = fid.createVariable('sigma_w', 'f4', ('time','height',)) sigmaW.long_name = '1-sigma uncertainty in W-component of wind' sigmaW.units = 'm/s' converged_flag = fid.createVariable('converged_flag', 'i2', ('time',)) converged_flag.long_name = 'convergence flag' converged_flag.units = 'unitless' converged_flag.value_0 = '0 indicates no convergence' converged_flag.value_1 = '1 indicates convergence in Rodgers sense (i.e., di2n << nX)' converged_flag.value_2 = '2 indicates convergence (best rms after rms increased drastically' converged_flag.value_3 = '3 indicates convergence (best rms after max_iter)' converged_flag.value_4 = '4 indicates linear forward model so no iterations. Ignore rmsa, rmsp, chi2.' gamma = fid.createVariable('gamma', 'f4', ('time',)) gamma.long_name = 'gamma parameter' gamma.units = 'unitless' n_iter = fid.createVariable('n_iter', 'i2', ('time',)) n_iter.long_name = 'number of iterations performed' n_iter.units = 'unitless' rmsa = fid.createVariable('rmsa', 'f4', ('time',)) rmsa.long_name = 'root mean square error between AERI and MWR obs in the observation vector and forward calculation' rmsa.units = 'unitless' rmsa.comment1 = 'Computed as sqrt( sum_over_i[ ((Y_i - F(Xn_i)) / Y_i)^2 ] / sizeY)' rmsa.comment2 = 'Entire observation vector used in this calculation' rmsp = fid.createVariable('rmsp', 'f4', ('time',)) rmsp.long_name = 'root mean square error between prior T/q profile and the retrieved T/q profile' rmsp.units = 'unitless' rmsp.comment1 = 'Computed as sqrt( mean[ ((Xa - Xn) / sigma_Xa)^2 ] )' chi2 = fid.createVariable('chi2', 'f4', ('time',)) chi2.long_name = 'Chi-square statistic of Y vs. F(Xn)' chi2.units = 'unitless' chi2.comment = 'Computed as sqrt( sum_over_i[ ((Y_i - F(Xn_i)) / Y_i)^2 ] / sizeY)' convergence_criteria = fid.createVariable('convergence_criteria', 'f4', ('time',)) convergence_criteria.long_name = 'convergence criteria di^2' convergence_criteria.units = 'unitless' dfs = fid.createVariable('dfs', 'f4', ('time','dfs',)) dfs.long_name = 'degrees of freedom of signal' dfs.units = 'unitless' dfs.comment = 'total DFS, then DFS for U and V' sic = fid.createVariable('sic', 'f4', ('time',)) sic.long_name = 'Shannon information content' sic.units = 'unitless' vres_u = fid.createVariable('vres_U', 'f4', ('time','height',)) vres_u.long_name = 'Vertical resolution of the U-wind profile' vres_u.units = 'km' vres_v = fid.createVariable('vres_V', 'f4', ('time','height',)) vres_v.long_name = 'Vertical resolution of the V-wind profile' vres_v.units = 'km' vres_w = fid.createVariable('vres_W', 'f4', ('time','height',)) vres_w.long_name = 'Vertical resolution of the W-wind profile' vres_w.units = 'km' cdfs_u = fid.createVariable('cdfs_U', 'f4', ('time','height',)) cdfs_u.long_name = 'Vertical profile of the cumulative degrees of freedom of signal for U-wind' cdfs_u.units = 'unitless' cdfs_v = fid.createVariable('cdfs_V', 'f4', ('time','height',)) cdfs_v.long_name = 'Vertical profile of the cumulative degrees of freedom of signal for V-wind' cdfs_v.units = 'unitless' cdfs_w = fid.createVariable('cdfs_W', 'f4', ('time','height',)) cdfs_w.long_name = 'Vertical profile of the cumulative degrees of freedom of signal for W-wind' cdfs_w.units = 'unitless' srh1 = fid.createVariable('srh_1km', 'f4', ('time',)) srh1.long_name = '0-1 km storm relative helicity' srh1.units = 'm^2/s^2' srh3 = fid.createVariable('srh_3km', 'f4', ('time',)) srh3.long_name = '0-3 km storm relative helicity' srh3.units = 'm^2/s^2' sigma_srh1 = fid.createVariable('sigma_srh_1km', 'f4', ('time',)) sigma_srh1.long_name = '1-sigma uncertainty for 0-1 km storm relative helicity' sigma_srh1.units = 'm^2/s^2' sigma_srh3 = fid.createVariable('sigma_srh_3km', 'f4', ('time',)) sigma_srh3.long_name = '1-sigma uncertainty for 0-3 km storm relative helicity' sigma_srh3.units = 'm^2/s^2' obs_max = fid.createVariable('obs_hgt_max', 'f4', ('time',)) obs_max.long_name = 'Height of highest observation' obs_max.units = 'km' obs_min = fid.createVariable('obs_hgt_min', 'f4', ('time',)) obs_min.long_name = 'Height of lowest observation' obs_min.units = 'km' for i in xret['max_heights']: if xret['max_heights'][i] != -999: max_height = fid.createVariable(f'{i}_hgt_max', 'f4', ('time',)) max_height.long_name = f'Maximum height of {i} data' max_height.units = 'km' if vip['raw_lidar_number'] > 0: rdim = fid.createDimension('raw_dim', vip['raw_lidar_number']) raw_timedelta = fid.createVariable('raw_lidar_timedelta', 'f4', ('raw_dim',)) raw_timedelta.long_name = 'Time window for raw lidar data to be included in retrieval for each data source' raw_timedelta.units = 'min' raw_type = fid.createVariable('raw_lidar_type', 'i2', ('raw_dim',)) raw_type.long_name = 'Type of raw lidar data used' raw_type.comment1 = '1 - CLAMPS Halo data' raw_type.comment2 = '2 - Windcube 200s lidar' if vip['proc_lidar_number'] > 0: pdim = fid.createDimension('proc_dim', vip['proc_lidar_number']) proc_timedelta = fid.createVariable('proc_lidar_timedelta', 'f4', ('proc_dim',)) proc_timedelta.long_name = 'Time window for processed lidar data to be included in retrieval for each data source' proc_timedelta.units = 'min' proc_type = fid.createVariable('proc_lidar_type', 'i2', ('proc_dim',)) proc_type.long_name = 'Type of processed lidar data used' proc_type.comment1 = '1 - CLAMPS VAD' proc_type.comment2 = '2 - ARM/NCAR VAD' if vip['cons_profiler_number'] > 0: pdim = fid.createDimension('cons_dim', vip['cons_profiler_number']) profiler_timedelta = fid.createVariable('wind_profiler_timedelta', 'f4',('cons_dim',)) profiler_timedelta.long_name = 'Time window for wind profiler data to be included in the retrieval' profiler_timedelta.units = 'min' prof_type = fid.createVariable('wind_profiler_type', 'i2', ('cons_dim',)) prof_type.long_name = 'Type wind profiler data used' prof_type.comment1 = '1 - NCAR 449 Mhz Profiler' prof_type.comment2 = '2 - NOAA 915 Mhz Profiler high-res' prof_type.comment2 = '3 - NOAA 915 Mhz Profiler low-res' # B. Adler: count how many observations per type are used at each time stamp obsflag_dim = fid.createDimension('obsflag_dim',None) obscount_flag = fid.createVariable('obscount_flag','i2',('time','obsflag_dim',)) obscount_flag.long_name = 'Number of different observation types per profile' obscount_flag.comment1 = 'Counts how many observations of a certain platform are used per profile' obsunique_flag = fid.createVariable('obsunique_flag','i2',('time','obsflag_dim',)) obsunique_flag.long_name = 'Unique observations per profile' obsunique_flag.comment1 = 'Flags of unique observations per profile' if vip['keep_file_small'] == 0: nht2 = fid.createDimension('nht2', (nht * 2)) cov = fid.createVariable('cov', 'f4', ('time', 'nht2', 'nht2')) cov.long_name = 'Covariance matrix' # B. Adler: in addition to covariance matrix, save observations from individual inputs # save observation vector, etc, if it is not lidar raw data # obsvecidx = np.where(xret['flagY']>1)[0] # save observation vector for all data, can get very large if not averaged obsvecidx = np.where(xret['flagY']>0)[0] if len(obsvecidx)>0: obs_dim = fid.createDimension('obs_dim',None) #obs_dim = fid.createDimension('obs_dim',len(obsvecidx)) obs_vector = fid.createVariable('obs_vector','f4',('time','obs_dim')) obs_vector.long_name = 'Observation vector Y' obs_vector.comment1 = 'mixed units -- see obs_flag field above' obs_flag = fid.createVariable('obs_flag', 'i2', ('time','obs_dim',)) obs_flag.long_name = 'Flag indicating type of observation for each vector element' obs_flag.comment1 = 'unitless' obs_dimension = fid.createVariable('obs_dimension', 'f8', ('time','obs_dim',)) obs_dimension.long_name = 'Dimension of the observation vector' obs_dimension.comment1 = 'mixed units -- see obs_flag field above' obs_vector_uncertainty = fid.createVariable('obs_vector_uncertainty', 'f4', ('time','obs_dim',)) obs_vector_uncertainty.long_name = '1-sigma uncertainty in the observation vector (sigY)' obs_vector_uncertainty.comment1 = 'mixed units -- see obs_flag field above' forward_calc = fid.createVariable('forward_calc', 'f4', ('time','obs_dim',)) forward_calc.long_name = 'Forward calculation from state vector (i.e., F(Xn))' forward_calc.comment1 = 'mixed units -- see obs_flag field above' # for prior informatoin arb_dim1 = fid.createDimension('arb_dim1',None) Xa = fid.createVariable('Xa', 'f8', ('arb_dim1',)) Xa.long_name = 'Prior mean state' Xa.units = 'm/s' Sa = fid.createVariable('Sa', 'f8', ('arb_dim1','arb_dim1',)) Sa.long_name = 'Prior covariance' Sa.units = 'm2/s2' # These should be the last three variables in the file lat = fid.createVariable('lat', 'f4') lat.long_name = 'latitude' lat.units = 'degrees north' lon = fid.createVariable('lon', 'f4') lon.long_name = 'longitude' lon.units = 'degrees east' alt = fid.createVariable('alt', 'f4') alt.long_name = 'altitude' alt.units = 'm above MSL' # Add some global attributes for i in range(len(list(globatt.keys()))): fid.setncattr(list(globatt.keys())[i], globatt[list(globatt.keys())[i]]) fid.Prior_dataset_comment = prior['comment'] fid.Prior_dataset_filename = prior['filename'] fid.Prior_dataset_number_profiles = prior['nsonde'] fid.Number_raw_lidar_sources = vip['raw_lidar_number'] fid.Number_proc_lidar_sources = vip['proc_lidar_number'] fid.shour = shour if vip['cons_profiler_number'] > 0: fid.Number_wind_profiler_sources = vip['cons_profiler_number'] else: fid.Number_wind_profiler_sources = 0 fid.Total_clock_execution_time_in_s = exectime # Add some of the static (non-time-dependent) data base_time[:] = xret['secs'] height[:] = xret['z'] if vip['raw_lidar_number'] > 0: raw_timedelta = np.array(vip['raw_lidar_timedelta']) raw_type = np.array(vip['raw_lidar_type']) if vip['proc_lidar_number'] > 0: proc_timedelta = np.array(vip['proc_lidar_timedelta']) proc_type = np.array(vip['proc_lidar_type']) if vip['cons_profiler_number'] > 0: profiler_timedelta = np.array(vip['cons_profiler_timedelta']) prof_type = np.array(vip['cons_profiler_type']) Xa[:] = prior['Xa'] Sa[:,:] = prior['Sa'] lat[:] = vip['station_lat'] lon[:] = vip['station_lon'] alt[:] = vip['station_alt'] fid.close() # Now append the sample from xret into the file nht = len(xret['z']) sig = np.sqrt(np.diag(xret['Sop'])) if verbose >= 3: print('Appending data to ' + nfilename) fid = Dataset(nfilename, 'a') fid.Total_clock_execution_time_in_s = str(exectime) time_offset = fid.variables['time_offset'] hour = fid.variables['hour'] qc_flag = fid.variables['qc_flags'] u_wind = fid.variables['u_wind'] v_wind = fid.variables['v_wind'] w_wind = fid.variables['w_wind'] sigmaU = fid.variables['sigma_u'] sigmaV = fid.variables['sigma_v'] sigmaW = fid.variables['sigma_w'] converged_flag = fid.variables['converged_flag'] gamma = fid.variables['gamma'] n_iter = fid.variables['n_iter'] rmsa = fid.variables['rmsa'] rmsp = fid.variables['rmsp'] chi2 = fid.variables['chi2'] convergence_criteria = fid.variables['convergence_criteria'] dfs = fid.variables['dfs'] sic = fid.variables['sic'] vres_u = fid.variables['vres_U'] vres_v = fid.variables['vres_V'] vres_w = fid.variables['vres_W'] cdfs_u = fid.variables['cdfs_U'] cdfs_v = fid.variables['cdfs_V'] cdfs_w = fid.variables['cdfs_W'] srh1 = fid.variables['srh_1km'] srh3 = fid.variables['srh_3km'] sigma_srh1 = fid.variables['sigma_srh_1km'] sigma_srh3 = fid.variables['sigma_srh_3km'] obs_max = fid.variables['obs_hgt_max'] obs_min = fid.variables['obs_hgt_min'] basetime = fid.variables['base_time'][:] time_offset[fsample] = xret['secs'] - basetime hour[fsample] = xret['hour'] qc_flag[fsample] = xret['qcflag'] did = np.where(np.array(list(fid.dimensions.keys())) == 'height')[0] if len(did) == 0: print('Whoaa -- this should not happen -- aborting') return success, nfilename if fid.dimensions['height'].size != len(xret['z']): print('Whoaa -- this should not happen size -- aborting') return success, nfilename u_wind[fsample, :] = xret['Xn'][0:nht] v_wind[fsample, :] = xret['Xn'][nht:2*nht] w_wind[fsample, :] = xret['Xn'][2*nht:3*nht] sigmaU[fsample, :] = sig[0:nht] sigmaV[fsample, :] = sig[nht:2*nht] sigmaW[fsample, :] = sig[2*nht:3*nht] converged_flag[fsample] = xret['converged'] gamma[fsample] = xret['gamma'] n_iter[fsample] = xret['niter'] rmsa[fsample] = xret['rmsa'] rmsp[fsample] = xret['rmsp'] chi2[fsample] = xret['chi2'] convergence_criteria[fsample] = xret['di2n'] dfs[fsample] = xret['dfs'] sic[fsample] = xret['sic'] vres_u[fsample,:] = xret['vres'][0,:] vres_v[fsample,:] = xret['vres'][1,:] vres_w[fsample,:] = xret['vres'][2,:] cdfs_u[fsample,:] = xret['cdfs'][0,:] cdfs_v[fsample,:] = xret['cdfs'][1,:] cdfs_w[fsample,:] = xret['cdfs'][2,:] srh1[fsample] = dindices['indices'][0] srh3[fsample] = dindices['indices'][1] sigma_srh1[fsample] = dindices['sigma_indices'][0] sigma_srh3[fsample] = dindices['sigma_indices'][1] foo = np.where((((xret['flagY'] < 8) | (xret['flagY'] > 11)) & (xret['sigY'] < 50)))[0] obs_max[fsample] = np.nanmax(xret['dimY'][foo]) obs_min[fsample] = np.nanmin(xret['dimY'][foo]) for i in xret['max_heights']: if xret['max_heights'][i] != -999: max_hgt = fid.variables[f'{i}_hgt_max'] max_hgt[fsample] = xret['max_heights'][i] # B. Adler: save how many obs per type were used c = Counter(xret['flagY']) obsflagunique = np.unique(xret['flagY']) obsflagcount = np.full((len(obsflagunique)),np.nan) for i in range(len(obsflagunique)): obsflagcount[i]=c[obsflagunique[i]] obscount_flag = fid.variables['obscount_flag'] obscount_flag[fsample,:] = obsflagcount obsunique_flag = fid.variables['obsunique_flag'] obsunique_flag[fsample,:] = obsflagunique # only add the covariance matrix if specified if vip['keep_file_small'] == 0: cov = fid.variables['cov'] cov[fsample,:,:] = xret['Sop'][:2*nht,:2*nht] # savee observations from each input if all(vip['raw_lidar_average_rv']) == 1: # for all obs obsvecidx = np.where(xret['flagY']>0)[0] else: # save observation vector and unertainty, for everything but raw lidar data # obsvecidx = np.where(xret['flagY']>1)[0] # save observation vector for all data, can get very large if not averaged obsvecidx = np.where(xret['flagY']>0)[0] if len(obsvecidx)>0: obs_vector = fid.variables['obs_vector'] obs_vector[fsample,:] = xret['Y'][obsvecidx] obs_vector_uncertainty = fid.variables['obs_vector_uncertainty'] obs_vector_uncertainty[fsample,:] = xret['sigY'][obsvecidx] obs_flag = fid.variables['obs_flag'] obs_flag[fsample,:] = xret['flagY'][obsvecidx] obs_dimension = fid.variables['obs_dimension'] obs_dimension[fsample,:] = xret['dimY'][obsvecidx] forward_calc = fid.variables['forward_calc'] forward_calc[fsample,:] = xret['FXn'][obsvecidx] fid.close() success = 1 return success, nfilename def find_last_time(date, vip, z, shour): # Find all of the output files with this date files = [] filename = vip['output_path'] + '/' + vip['output_rootname'] + '.' + str(date) + '.*.nc' files = files + (glob.glob(filename)) # If none are found, then just run the code as normal if len(files) == 0: print('The flag output_clober was set to 2 for append, but no prior file was found') print(' so code will run as normal') nfilename = ' ' return 0, -999., nfilename # Otherwise, let's initialize from the last file nfilename = files[-1] fid = Dataset(nfilename,'r+') # if the shour is not the same, create a new file if fid.shour != shour: # close the previously opened file fid.close() print('The flag output_clober was set to 2 for append, but no prior file was found') print(' so code will run as normal') nfilename = ' ' return 0, -999., nfilename # otherwise, continue initializing from the last time bt = fid.variables['base_time'][:] to = fid.variables['time_offset'][:] xz = fid.variables['height'][:] fid.close() secs = bt+to # A couple very basic checks to makes sure some of the variables in the output # file are the same as the current ones diff = np.abs(z-xz) foo = np.where(diff > 0.001)[0] if ((len(xz) != len(z)) or (len(foo) > 0)): print('Error: output_clobber is set to 2 (append), but there is a mismatch in heights') return -999, -999, nfilename # if the user wants the covariance matrix, but is appending to a file that didn't previously have the variable, exit if 'cov' not in fid.variables.keys() and vip['keep_file_small'] == 0: print('Error: output clobber is set to 2 (append), but there is no covariance matrix data for the previous timestamps') return -999, -999, nfilename # Return the fsample, last of the secs array, and nfilename fsample = len(secs) return fsample, secs[-1], nfilename