""" estimate_distribution_parameters.py estimate, for each month of the year, parameters of fitted distributions for forecast and analyzed """ import pygrib from dateutils import daterange, dateshift, dayofyear, splitdate import os, sys from datetime import datetime import _pickle as cPickle import numpy as np import numpy.ma as ma import scipy.signal as signal import scipy.stats as stats from astropy.convolution import convolve from det_params import det_params # -------------------------------------------------------------- # ---- various initialization clead = sys.argv[1] iskip = int(clead)//24 cvariable = '2t' cpath_era5 = '/Volumes/Backup Plus/ecmwf/' cpath_gefsv12 = '/Volumes/Backup Plus/gefsv12/t2m/' date_end = dateshift('2018123100',-2*int(clead)) date_list_anal = daterange('2000010100',date_end,24) ndates = len(date_list_anal) date_list_forecast = [] for i in range(ndates): date_list_forecast.append(dateshift(date_list_anal[i], int(clead))) date_list_anal_ver = [] for i in range(ndates): date_list_anal_ver.append(dateshift(date_list_forecast[i], int(clead))) # ---- loop through dates and process day by day ndatektr = 0 ndatektr_yearly = np.zeros((18), dtype=np.int32) for idate, date in enumerate(date_list_anal): # ---- read reanalysis appropriate at the time. rem = idate%30 if rem == 0: print ('processing date = ', date) cyear = date[0:4] cmm = date[4:6] cmmdd = date[4:8] imm = int(cmm) iyear = int(cyear)-2000 datef = date_list_forecast[idate] cyearf = datef[0:4] infile = cpath_era5 +cyearf+'/t2m_era5_halfdegree_'+datef+'.cPick' fexist1 = os.path.exists(infile) if fexist1 == True: inf = open(infile, 'rb') analysis = cPickle.load(inf) - 273.16 analysis = np.flipud(analysis) if idate == 0: lats = cPickle.load(inf) lons = cPickle.load(inf) nlats, nlons = np.shape(lats) lats = np.flipud(lats) forecast_3d = np.zeros((ndates,nlats,nlons), dtype=np.float64) analysis_3d = np.zeros((ndates,nlats,nlons), dtype=np.float64) else: print ('1. did not find file ', infile) # ---- read the forecast information . cyear = date[0:4] cpath_forecast = cpath_gefsv12+cyear+'/' infile = cpath_forecast + date + '_lead'+clead+'_conus_0.5deg_hour'+clead+'.cPick' fexist2 = os.path.exists(infile) if fexist2 == True: inf = open(infile,'rb') forecast = cPickle.load(inf) inf.close() if cmmdd == '0101': infile = '/Volumes/Backup Plus/gefsv12/t2m/gefsv12_latlon_subset.cPick' inf = open(infile,'rb') latsf = cPickle.load(inf) lonsf = cPickle.load(inf) inf.close() else: print ('2. did not find file ', infile) # ---- compute the differences between the forecast and analysis, add to time series forecast_3d[idate,:,:] = forecast[:,:] analysis_3d[idate,:,:] = analysis[:,:] # ---- now estimate for each month and each grid point the distribution parameters. power_analysis = np.zeros((12,nlats,nlons), dtype=np.float64) mean_analysis = np.zeros((12,nlats,nlons), dtype=np.float64) stddev_analysis = np.zeros((12,nlats,nlons), dtype=np.float64) power_forecast = np.zeros((12,nlats,nlons), dtype=np.float64) mean_forecast = np.zeros((12,nlats,nlons), dtype=np.float64) stddev_forecast = np.zeros((12,nlats,nlons), dtype=np.float64) for imonth in range(12): now = datetime.now() current_time = now.strftime("%H:%M:%S") print ('**** PROCESSING month = ', imonth, current_time) # ---- loop through dates and process day by day datevalid_indices = np.zeros(ndates,dtype=np.int32) for idate, date in enumerate(date_list_anal): thismonth = int(str(date)[4:6]) - 1 if imonth == 0: if thismonth == imonth or thismonth == 11 or \ thismonth == imonth+1: datevalid_indices[idate] = 1 elif imonth == 11: if thismonth == imonth or thismonth == 0 or \ thismonth == imonth-1: datevalid_indices[idate] = 1 else: if thismonth == imonth or thismonth == imonth+1 or \ thismonth == imonth-1: datevalid_indices[idate] = 1 ndates_valid = np.sum(datevalid_indices) forecast_validdates = np.zeros((ndates_valid, nlats, nlons), dtype=np.float32) analysis_validdates = np.zeros((ndates_valid, nlats, nlons), dtype=np.float32) ktr = 0 for idate, date in enumerate(date_list_anal): if datevalid_indices[idate] == 1: forecast_validdates[ktr,:,:] = forecast_3d[idate,:,:] analysis_validdates[ktr,:,:] = analysis_3d[idate,:,:] ktr = ktr+1 #print ('ktr, ndates_valid = ', ktr, ndates_valid) #print ('forecast_validdates[:,nlats//2,nlons//2] = ', forecast_validdates[:,nlats//2,nlons//2]) #print ('analysis_validdates[:,nlats//2,nlons//2] = ', analysis_validdates[:,nlats//2,nlons//2]) #sys.exit() # ---- call routine to test various power transformations, seeing which gives # the best Lilliefors test. power_analysis[imonth,:,:], mean_analysis[imonth,:,:], \ stddev_analysis[imonth,:,:] = det_params(ndates_valid, \ nlats, nlons, analysis_validdates) power_forecast[imonth,:,:], mean_forecast[imonth,:,:], \ stddev_forecast[imonth,:,:] = det_params(ndates_valid, \ nlats, nlons, forecast_validdates) # ---- save the optimal distributional data outfile = cpath_gefsv12+'GEFSv12_forecast_distribution_parameters_f'+clead+'.cPick' ouf = open(outfile, 'wb') cPickle.dump(power_forecast, ouf) cPickle.dump(mean_forecast, ouf) cPickle.dump(stddev_forecast, ouf) ouf.close() outfile = cpath_era5+'ERA5_analyzed_distribution_parameters_f'+clead+'.cPick' ouf = open(outfile, 'wb') cPickle.dump(power_analysis, ouf) cPickle.dump(mean_analysis, ouf) cPickle.dump(stddev_analysis, ouf) ouf.close()