""" python plot_fitted_forecast_precip_cdfs_spline.py cmonth clead clat clon where cmonth = 01 to 12 clead = 006 to 240 every 006 clat = latitude clon = longitude, e.g., -100.0 this will plot both the forecast CDF and the cumulative hazard function. """ import os, sys from datetime import datetime import numpy as np import numpy.ma as ma import _pickle as cPickle from netCDF4 import Dataset import matplotlib.pyplot as plt from matplotlib import rcParams from mpl_toolkits.basemap import Basemap, interp from mpl_toolkits.axes_grid1 import make_axes_locatable from scipy.interpolate import LSQUnivariateSpline, splrep, splev import _pickle as cPickle import scipy.stats as stats rcParams['xtick.labelsize']='medium' rcParams['ytick.labelsize']='medium' rcParams['legend.fontsize']='medium' # ===================================================================== def set_domain_boundaries(cdomain): """ used grib file of 2.5-km blend output grid to determine bounding lat and lon, and from that, the domain bounding indices for the 0.25 GEFSv12 reforecast data that will encompass the domain. """ if cdomain == 'conus': jmin = 93 jmax = 246 imin = 368 imax = 686 elif cdomain == 'pr': jmin = 243 jmax = 256 imin = 649 imax = 667 elif cdomain == 'ak': jmin = 19 jmax = 161 imin = 201 imax = 967 else: print ('invalid domain. Exiting.') sys.exit() return jmin, jmax, imin, imax # ===================================================================== def find_nearest(vec, value): """ given a vector vec and a particular value, find the index in vec that is nearest to value""" idx = np.abs(vec-value).argmin() return idx # ===================================================================== def get_surrounding_months(cmonth): if cmonth == 'Jan': cmonth_middle = 'Jan' cmonth_early = 'Dec' cmonth_late = 'Feb' elif cmonth == 'Feb': cmonth_middle = 'Feb' cmonth_early = 'Jan' cmonth_late = 'Mar' elif cmonth == 'Mar': cmonth_middle = 'Mar' cmonth_early = 'Feb' cmonth_late = 'Apr' elif cmonth == 'Apr': cmonth_middle = 'Apr' cmonth_early = 'Mar' cmonth_late = 'May' elif cmonth == 'May': cmonth_middle = 'May' cmonth_early = 'Apr' cmonth_late = 'Jun' elif cmonth == 'Jun': cmonth_middle = 'Jun' cmonth_early = 'May' cmonth_late = 'Jul' elif cmonth == 'Jul': cmonth_middle = 'Jul' cmonth_early = 'Jun' cmonth_late = 'Aug' elif cmonth == 'Aug': cmonth_middle = 'Aug' cmonth_early = 'Jul' cmonth_late = 'Sep' elif cmonth == 'Sep': cmonth_middle = 'Sep' cmonth_early = 'Aug' cmonth_late = 'Oct' elif cmonth == 'Oct': cmonth_middle = 'Oct' cmonth_early = 'Sep' cmonth_late = 'Nov' elif cmonth == 'Nov': cmonth_middle = 'Nov' cmonth_early = 'Oct' cmonth_late = 'Dec' elif cmonth == 'Dec': cmonth_middle = 'Dec' cmonth_early = 'Nov' cmonth_late = 'Jan' else: print ('invalid month') sys.exit() return cmonth_middle, cmonth_early, cmonth_late # ===================================================================== def fraczero_possamps(nsamps, precip_ens): """ from the vector input sample precip_ens, define the fraction of samples with zero precipitation. For the positive samples, add a small random number to deal with the fact that the data was discretized to ~0.1 mm, so that when later creating CDFs we don't have empirical values with lots of tied amounts. Also, sort the nonzero amounts and return. """ number_zeros = 0 precip_ens_nonzero = np.delete(precip_ens, \ np.where(precip_ens <= 0.0)) # censor at 0.0 mm precip_ens_nonzero = precip_ens_nonzero + \ np.random.uniform(low=-0.1,high=0.1,size=len(precip_ens_nonzero)) precip_ens_nonzero = np.delete(precip_ens_nonzero, \ np.where(precip_ens_nonzero <= 0.0)) # censor at 0.0 mm nz = len(precip_ens_nonzero) precip_ens_nonzero = np.sort(precip_ens_nonzero) ntotal = len(precip_ens) nzero = ntotal - len(precip_ens_nonzero) fraction_zero = float(nzero) / float(ntotal) return fraction_zero, precip_ens_nonzero, nz # ===================================================================== def read_forecast_spline_info(ccmonth, \ master_directory_forecast_spline): """ load spline information for forecast netCDF file. For initial times other than 00 UTC, we need to find the appropriate spline file to read in based on 00 UTC forecast initialization times. Assuming that the CDF characteristics are primarily a function of the diurnal cycle, make this adjustment. """ ccycle = '00' icycle = int(ccycle) ilead = int(clead) ilead_use = icycle + ilead # diurnal cycle adjust if ilead_use > 240: ilead_use = ilead_use - 24 if ilead_use < 10: # make 3-character variable of ilead_use clead_use = '00' + str(ilead_use) elif ilead_use >= 10 and ilead_use < 100: clead_use = '0' + str(ilead_use) else: clead_use = str(ilead_use) infile = master_directory_forecast_spline + \ ccmonth + '_conus_GEFSv12_spline_info_h' + clead_use + '.nc' print ('reading forecast spline information from ', infile) nc = Dataset(infile) lons_spline_gefsv12_1d = nc.variables['lons'][:] lats_spline_gefsv12_1d = nc.variables['lats'][:] spline_info_gefsv12 = nc.variables['spline_info'][:,:,:,:] fraction_zero_gefsv12 = nc.variables['fzero'][:,:] usegamma_gefsv12 = nc.variables['usegamma'][:,:] quantile_99_gefsv12 = nc.variables['quantile_99'][:,:] number_knots_gefsv12 = nc.variables['number_knots'][:,:] ny_gefsv12, nx_gefsv12 = np.shape(usegamma_gefsv12) nc.close() lons_fcst_2d, lats_fcst_2d = \ np.meshgrid(lons_spline_gefsv12_1d,lats_spline_gefsv12_1d) return lons_spline_gefsv12_1d, lats_spline_gefsv12_1d, \ spline_info_gefsv12, fraction_zero_gefsv12, \ usegamma_gefsv12, quantile_99_gefsv12, \ number_knots_gefsv12, ny_gefsv12, nx_gefsv12, \ lons_fcst_2d, lats_fcst_2d, clead_use # ===================================================================== # ---- inputs from command line cmonth = sys.argv[1] # 'Jan', 'Feb', etc. clead = sys.argv[2] # 006, 012, 018, 024, etc. cleade = clead clat = sys.argv[3] clon = sys.argv[4] ileadb = int(clead)-6 if ileadb < 10: cleadb = '00'+str(ileadb) elif ileadb >= 10 and ileadb < 100: cleadb = '0'+str(ileadb) else: cleadb = str(ileadb) # ---- define constants. rlon_input = float(clon) rlat_input = float(clat) cdomain = 'conus' jmin, jmax, imin, imax = set_domain_boundaries(cdomain) pflag = True ndaysomo = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31] ndaysomo_leap = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31] cmonths_early = ['12','01','02','03','04','05','06','07','08','09','10','11'] cmonths_late = ['02','03','04','05','06','07','08','09','10','11','12','01'] cmonths_middle = ['01','02','03','04','05','06','07','08','09','10','11','12'] cmonths = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', \ 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] yearstart = 2002 # CCPA only available starting 2002 yearend = 2020 # companion reforecasts end at end of 2019 cmonth_middle, cmonth_early, cmonth_late = get_surrounding_months(cmonth) imonth = cmonths.index(cmonth) # --- load forecast spline info. master_directory_forecast_spline = \ '/Volumes/NBM/conus_gefsv12/CDF_spline/' # where forecast spline info is stored master_directory = '/Volumes/NBM/'+\ cdomain+'_gefsv12/precip/netcdf/' # ---- read forecast spline parameters from netCDF file print ('reading forecast spline parameters') lons_1d_realtime, lats_1d_realtime, \ spline_info_gefsv12, fraction_zero_gefsv12, \ usegamma_gefsv12, quantile_99_gefsv12, \ number_knots_gefsv12, ny_gefsv12, nx_gefsv12, \ lons_fcst_2d, lats_fcst_2d, clead_use = \ read_forecast_spline_info( \ cmonth, master_directory_forecast_spline) # --- make sure longitudes are negative for west, and # create 2D arrays of lat/lon if np.max(lons_1d_realtime) > 180.: \ lons_1d_realtime = lons_1d_realtime - 180. lons_2d, lats_2d = \ np.meshgrid(lons_1d_realtime, lats_1d_realtime) # ---- find index of nearest grid point for spline data print ('rlon_input, rlat_input = ', rlon_input, rlat_input) distx = np.abs(lons_2d - rlon_input) disty = np.abs(lats_2d - rlat_input) dist = distx**2 + disty**2 i = np.argmin(dist) print (i) jspline, ispline = np.unravel_index\ (i, shape=(ny_gefsv12, nx_gefsv12), order='C') # ---- read in the previously generated netCDF file with precipitation # for this month and lead time + surrounding months. # All members, dates for this month have been # smushed into one leading index, dimension # nsamps, since the date of the forecast within the month and # the member number is irrelevant for the distribution fitting. ncfile = master_directory + cmonth_middle + \ '_apcp_sfc_h' + clead + '.nc' print (ncfile) nc = Dataset(ncfile) lons_fcst = nc.variables['lons_fcst'][:] lats_fcst = nc.variables['lats_fcst'][:] lons_2d_fcst, lats_2d_fcst = np.meshgrid(lons_fcst, lats_fcst) nxgefs = len(lons_fcst) nygefs = len(lats_fcst) # ---- find index of nearest grid point for spline data distx = np.abs(lons_2d_fcst - rlon_input) disty = np.abs(lats_2d_fcst - rlat_input) dist = distx**2 + disty**2 i = np.argmin(dist) jgefs, igefs = np.unravel_index(i, shape=(nygefs, nxgefs), order='C') precip_middle = nc.variables['apcp_fcst'][:,jgefs, igefs] nsamps_middle = len(precip_middle) nc.close() ncfile = master_directory + cmonth_early + '_apcp_sfc_h' + clead + '.nc' print (ncfile) nc = Dataset(ncfile) precip_early = nc.variables['apcp_fcst'][:,jgefs, igefs] nsamps_early = len(precip_early) nc.close() ncfile = master_directory + cmonth_late + '_apcp_sfc_h' + clead + '.nc' print (ncfile) nc = Dataset(ncfile) precip_late = nc.variables['apcp_fcst'][:,jgefs, igefs] nsamps_late = len(precip_late) nc.close() # --- form a vector for this grid point of all 3 months of data nsamps = nsamps_middle + nsamps_early + nsamps_late precip_tseries = np.concatenate((precip_middle, \ precip_early, precip_late)) tp = np.min( [np.abs(np.min(precip_tseries)), 0.0] ) teeny_precip = tp*np.ones(nsamps) precip_tseries = precip_tseries - teeny_precip[:] # ---- determine the fraction zero, the number of nonzeros (nz) # and the sorted sample of nonzero values (precip_ens_nonzero) fraction_zero, precip_ens_nonzero, nz = \ fraczero_possamps(nsamps, precip_tseries) # return sorted # ---- build empirical CDF for this grid point's nonzero data x = np.arange(0.0,50.01,0.1) nx = len(x) len_nonzero = len(precip_ens_nonzero) fnzero = float(len_nonzero) nz = int(fnzero) print ('nz = ', nz) empirical_cdf = 1.0 / (2.0*nz) + \ np.arange(nz)/nz # "Hazen" plotting function empirical_cdf_with_zeros = \ fraction_zero + (1.-fraction_zero)*empirical_cdf empirical_hazard_fn = -np.log(1.0 - empirical_cdf) # ---- get the spline fit. numknots = number_knots_gefsv12[jspline, ispline] # now this number includes boundary knots_in = spline_info_gefsv12[jspline, ispline,0,0:numknots] bspline_coef_in = spline_info_gefsv12[jspline, ispline,1,0:numknots] precip_regular = 0.0001+np.arange(0.,401.,1.)/10. npr = len(precip_regular) nthree = 3 ier = 0 cdf_spline = np.zeros((nz), dtype=np.float64) splines_tuple = (spline_info_gefsv12[jspline, ispline,0,0:numknots], \ spline_info_gefsv12[jspline, ispline,1,0:numknots], 3) spline_hazard = splev(precip_regular, splines_tuple) qpositive = 1.0 - np.exp(-spline_hazard) cdf_spline = qpositive hazard_fn = -np.log(1.0 - cdf_spline) interior_knots_precip = knots_in[4:13] cdf_at_knots = np.zeros((9), dtype=np.float32) haz_at_knots = np.zeros((9), dtype=np.float32) precip_at_knots = interior_knots_precip for i in range(9): idx = find_nearest(precip_regular,interior_knots_precip[i]) haz_at_knots[i] = hazard_fn[idx] cdf_at_knots[i] = cdf_spline[idx] cdf_spline_with_zeros = fraction_zero + (1.-fraction_zero)*qpositive # ---- plot the CDFs, forecast and analyzed, empirical and best fitted. f = plt.figure(figsize=(6.5,6.5))# ax = f.add_axes([.13,.14,.84,.75]) ax.set_title('Spline fit to hazard function,\n'+\ cmonth+', '+clead+' h, 00 UTC IC, '+\ clon+' W '+clat+' N',fontsize=13) ax.plot(precip_ens_nonzero, empirical_hazard_fn,\ color='Red',lw=2,label='Empirical') ax.plot(precip_regular, spline_hazard,\ color='Blue',lw=2,label='Cubic spline') plt.ylabel('Cumulative hazard function',fontsize=11) ax.legend(loc=0) ax.set_ylim(0.0,10.0) plt.grid(True,lw=0.25,color='LightGray') ax.set_xlim(0,25) #ax.set_xlim(0,5) ax.set_xlabel('6-hourly total precipitation (mm)',fontsize=11) figname = 'hazard_precip_forecast_example_spline.png' plt.savefig(figname,dpi=400) print ('Plot done', figname) plt.close() # ---- plot the CDFs, forecast and analyzed, empirical and best fitted. f = plt.figure(figsize=(6.5,6.5))# ax = f.add_axes([.13,.14,.84,.75]) ax.set_title('Spline fit to positive forecast precipitation values,\n'+\ cmonth+', '+clead+' h, 00 UTC IC, '+\ clon+' W '+clat+' N',fontsize=13) ax.plot(precip_ens_nonzero, empirical_cdf_with_zeros,\ color='Red',lw=2,label='Empirical') ax.plot(precip_regular, cdf_spline_with_zeros,\ color='Blue',lw=2,label='Cubic spline') plt.ylabel('Non-exceedance probability',fontsize=11) ax.legend(loc=0) ax.set_ylim(0.0,1) plt.grid(True,lw=0.25,color='LightGray') ax.set_xlim(0,25) ax.set_xlabel('6-hourly total precipitation (mm)',fontsize=11) figname = 'CDF_precip_forecast_example_spline.png' plt.savefig(figname,dpi=400) print ('Plot done', figname) plt.close() # ---- plot the CDFs, forecast and analyzed, empirical and best fitted. f = plt.figure(figsize=(8,4.3))# ax = f.add_axes([.07,.14,.41,.75]) ax.set_title('(a) Forecast cumulative hazard function',fontsize=12.5) ax.plot(precip_ens_nonzero, empirical_hazard_fn,'--',\ color='Red',lw=2,label='Empirical') ax.plot(precip_regular, spline_hazard,\ color='RoyalBlue',lw=2,label='Cubic spline') ax.plot(precip_at_knots, haz_at_knots, 'k.') plt.ylabel('Cumulative hazard function',fontsize=9) ax.legend(loc=0) ax.set_ylim(0.0,10.0) plt.grid(True,lw=0.25,color='LightGray') ax.set_xlim(0,40) ax.set_xlabel('6-hourly total precipitation (mm)',fontsize=9) ax = f.add_axes([.57,.14,.41,.75]) ax.set_title('(b) Forecast CDF',fontsize=12.5) ax.plot(precip_ens_nonzero, empirical_cdf,\ color='Red',lw=1.6,label='Empirical') ax.plot(precip_regular, cdf_spline,\ color='Blue',lw=1.6,label='Cubic spline') ax.plot(precip_at_knots, cdf_at_knots, 'k.') plt.ylabel('Non-exceedance probability',fontsize=9) ax.legend(loc=0) ax.set_ylim(0.0,1) plt.grid(True,lw=0.25,color='LightGray') ax.set_xlim(0,40) ax.set_xlabel('6-hourly total precipitation (mm)',fontsize=9) figname = 'CDF_precip_forecast_example_spline_ITH.png' plt.savefig(figname,dpi=400) print ('Plot done', figname) plt.close() outfile = 'forecast_data_cdf.cPick' ouf = open(outfile, 'wb') cPickle.dump(precip_ens_nonzero, ouf) cPickle.dump(empirical_hazard_fn, ouf) cPickle.dump(spline_hazard, ouf) cPickle.dump(empirical_cdf, ouf) cPickle.dump(precip_regular, ouf) cPickle.dump(cdf_spline, ouf) cPickle.dump(precip_at_knots, ouf) cPickle.dump(haz_at_knots, ouf) cPickle.dump(cdf_at_knots, ouf) ouf.close()