._Bbeta_model_4d.py                                                                                 000775  000765  000024  00000000412 14016263121 014331  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2        
                                      ATTR      
                             com.apple.lastuseddate#PS           com.macromates.selectionRange      	     com.macromates.visibleIndex  `    
     32:760                                                                                                                                                                                                                                                      Bbeta_model_4d.py                                                                                   000775  000765  000024  00000002226 14016263121 014121  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         def Bbeta_model_4d(nlats, nlons, ndates, npts, cyear, clead,\
    warm_or_cold, cpath_Bbeta, bias_corr_3d, already):
    
    """
    form a model for the bias-correction error covariance, 
    using a previously calculated correlation model based on 
    horizontal, vertical distance, land/water.   Covariances
    then formed using beta errors determined grid point by
    grid point from time series of previous decay avg approach
    """

    import numpy as np
    import os, sys
    import _pickle as cPickle

    # --- read correlation of bias to file

    cfile = 'correlation_bias_ERA5grid.cPick'
    inf = open(cfile, 'rb')
    correlation_bias = cPickle.load(inf)
    inf.close()

    # ---- get standard deviation across time dimension
    
    beta_stddev = np.std(bias_corr_3d, axis=0)
    Bbeta = np.zeros((nlats,nlons, nlats,nlons), dtype=np.float32)
    
    for i1 in range(nlons):
        for j1 in range(nlats):
            for i2 in range(nlons):
                for j2 in range(nlats):
                    Bbeta[j1,j2,i1,i2] = correlation_bias[j1,i1,j2,i2] # * beta_stddev[j1,i1]*beta_stddev[j2,i2]*\
                        
        
    return Bbeta                                                                                                                                                                                                                                                                                                                                                                          ._Bbeta_model_GEFSv12.py                                                                            000775  000765  000024  00000000415 14016263121 015102  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2        
                                      ATTR      
                             com.apple.lastuseddate#PS           com.macromates.selectionRange      	     com.macromates.visibleIndex  `    "     125:72012                                                                                                                                                                                                                                                   Bbeta_model_GEFSv12.py                                                                              000775  000765  000024  00000010617 14016263121 014672  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         
"""
Bbeta_model_GEFSv12.py
thin gridded bias time series data down to just the desired 3 months.  
form a model for the bias-correction error covariance, 
including covariance localization.   Save the resulting B_beta 
to file and return localized covariance.  If already, just read from file.
"""

#from fec_2D_4D_f90 import fec_2D_4D_f90a
import numpy as np
import os, sys
import _pickle as cPickle
from datetime import datetime

# ----------------------------------------------------------

def calculate_cov(x1, x2, n):   
    x1mean = np.sum(x1) / n
    x2mean = np.sum(x2) / n
    cov = np.sum( (x1-x1mean) * (x2-x2mean) ) / (n-1)
    return cov
    
# ----------------------------------------------------------

cseason = sys.argv[1]
clead = sys.argv[2]
cpath_beta = '/Volumes/Backup Plus/gefsv12/t2m/beta/'
efold = 800.0
exponenty = 2.0
already = False

if already == False:
     
    # ---- read in lat/lons
       
    infile = '/Volumes/Backup Plus/gefsv12/t2m/gefsv12_latlon_subset.cPick'
    inf = open(infile,'rb')
    latsf = cPickle.load(inf)
    lonsf = cPickle.load(inf)
    nlats, nlons = np.shape(latsf)
    npts = nlats*nlons
    inf.close()
        
    # ---- produce estimate of the localized covariance of bias-corrected 
    #      forecast errors between grid points and the localized, inverted 
    #      covariance matrix.

    if cseason == 'JFM': 
        mmddhh_begin = 10100
        mmddhh_end = 33100
    elif cseason == 'AMJ': 
        mmddhh_begin = 40100
        mmddhh_end = 63000
    elif cseason == 'JAS': 
        mmddhh_begin = 70100
        mmddhh_end = 93000
    elif cseason == 'OND': 
        mmddhh_begin = 100100
        mmddhh_end = 123100
            
    # ---- load the bias correction file

    bias_file = 'bias_correction_decayavg_lead'+clead+'.cPick'
    inf = open(bias_file, 'rb')
    bias_3d = cPickle.load(inf)
    date_list_anal = cPickle.load(inf)
    ndates = len(date_list_anal)
    inf.close()     

    # ---- make list of valid dates
        
    date_list_good = []
    index_good = []
    for idate, date in enumerate(date_list_anal):
        immddhh = int(date[2:8])
        if immddhh >= mmddhh_begin and immddhh <= mmddhh_end:
            date_list_good.append(date)
            index_good.append(idate)
    ndatesub = len(date_list_good)
                
    bias_3d_season = np.zeros((ndatesub, nlats, nlons), dtype=np.float32)
    for idate,date in enumerate(date_list_good):
        bias_3d_season[idate,:,:] = bias_3d[index_good[idate],:,:]
        
    # ---- compute covariances in 2-D space, not 4-D.   Localize
        
    Bbeta_localized = np.zeros((npts,npts), dtype=np.float64)
    Bbeta_localized_4D = np.zeros((nlats,nlons,nlats,nlons), dtype=np.float64)
    #Bbeta_localized, Bbeta_localized_4D = \
    #    fec_2D_4D_f90a(bias_3d_season, \
    #    efold, exponenty, npts, ndatesub, nlats, nlons)         
    
    ktr1 = 0
    for i1 in range(nlons):
        now = datetime.now()
        current_time = now.strftime("%H:%M:%S")
        print ('processing ',i1, current_time)
        for j1 in range(nlats):
            x1 = bias_3d_season[:,j1,i1]
            ktr2 = 0
            for i2 in range(nlons):
                for j2 in range(nlats):
                    x2 = bias_3d_season[:,j2,i2]
                    hdist = (111./2.) * np.sqrt( np.float(i1-i2)**2 + np.float(j1-j2)**2)
                    localizn_factor = np.exp(-(hdist/efold)**exponenty)
                    covv = calculate_cov(x1,x2,ndatesub)
                    Bbeta_localized[ktr1,ktr2] = covv*localizn_factor
                    Bbeta_localized[ktr2,ktr1] = Bbeta_localized[ktr1,ktr2]
                    Bbeta_localized_4D[j1,i1,j2,i2] = Bbeta_localized[ktr1,ktr2]
                    Bbeta_localized_4D[j2,i2,j1,i1] = Bbeta_localized[ktr1,ktr2]
                    ktr2 = ktr2 + 1
            ktr1 = ktr1 + 1   
                
    # ---- write the Bx_localized and Bx_localized_inverse to pickle file.

    outfile = cpath_beta+'Localized_Bbeta_'+cseason+\
            '_lead='+clead+'_'+str(efold)+'.cPick'
    print ('writing to ', outfile)
    ouf = open(outfile,'wb')
    cPickle.dump(Bbeta_localized_4D, ouf)
    cPickle.dump(Bbeta_localized, ouf)
    ouf.close()
else:
    infile = cpath_beta+'Localized_Bbeta_'+cseason+\
        '_lead='+clead+'_'+str(efold)+'.cPick'
    inf = open(infile,'rb')
    Bbeta_localized_4D = cPickle.load(inf)
    Bbeta_localized = cPickle.load(inf)
    inf.close()
        
print ('Done')                                                                                                                 ._Bbeta_model_v2.py                                                                                 000775  000765  000024  00000000412 14016263121 014351  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2        
                                      ATTR      
                             com.apple.lastuseddate#PS           com.macromates.selectionRange      	     com.macromates.visibleIndex  `         34:360                                                                                                                                                                                                                                                      Bbeta_model_v2.py                                                                                   000775  000765  000024  00000002355 14016263121 014144  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         def Bbeta_model_v2(nlats, nlons, ndates, npts, cyear, clead,\
    warm_or_cold, cpath_Bbeta, bias_corr_3d, already):
    
    """
    form a model for the bias-correction error covariance, 
    using a previously calculated correlation model based on 
    horizontal, vertical distance, land/water.   Covariances
    then formed using beta errors determined grid point by
    grid point from time series of previous decay avg approach
    """

    import numpy as np
    import os, sys
    import _pickle as cPickle

    # --- read correlation of bias to file

    cfile = 'correlation_bias_ERA5grid.cPick'
    inf = open(cfile, 'rb')
    correlation_bias = cPickle.load(inf)
    inf.close()

    # ---- get standard deviation across time dimension
    
    beta_stddev = np.std(bias_corr_3d, axis=0)
    Bbeta = np.zeros((nlats*nlons, nlats*nlons), dtype=np.float32)
    
    ktr1 = 0
    for i1 in range(nlons):
        for j1 in range(nlats):
            ktr2 = 0
            for i2 in range(nlons):
                for j2 in range(nlats):
                    Bbeta[ktr1,ktr2] = beta_stddev[j1,i1]*beta_stddev[j2,i2]*\
                        correlation_bias[j1,i1,j2,i2]
                    ktr2 = ktr2+1
            ktr1 = ktr1+1
        
    return Bbeta                                                                                                                                                                                                                                                                                   ._Bxmodel.py                                                                                        000775  000765  000024  00000000412 14016263121 013137  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2        
                                      ATTR      
                             com.apple.lastuseddate#PS           com.macromates.selectionRange           com.macromates.visibleIndex  `         44:948                                                                                                                                                                                                                                                      Bxmodel.py                                                                                          000775  000765  000024  00000003537 14016263121 012735  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         def Bxmodel(nlats, nlons, ndates, lats, lons, \
    cyear, clead, warm_or_cold, cpath_Bx, \
    efold, exponenty, forecast_3d, analyses_3d, \
    bias_corr_3d, already):
    
    """
    form a model for the (bias-corrected) forecast-error covariance, 
        including covariance localization.   Save the resulting B_x 
        to file
    """

    import numpy as np
    import os, sys
    import _pickle as cPickle
    #from forecast_error_covariance import forecast_error_covariance
    from forecast_error_covariance_f90 import forecast_error_covariance_f90
    
    npts = int(nlats*nlons)
    Bx_localized = np.zeros((npts, npts), dtype=np.float64)
    
    if already == False:
        
        # ---- bias_correct the forecast before comparing to the analyses

        difference_3d_biascorr = analyses_3d - (forecast_3d - bias_corr_3d)

        # ---- produce estimate of the localized covariance of bias-corrected 
        #      forecast errors between grid points and the localized, inverted 
        #      covariance matrix.

        Bx_localized = \
            forecast_error_covariance_f90(difference_3d_biascorr, efold, \
                exponenty, npts, ndates, nlats, nlons)

        # ---- write the Bx_localized to pickle file.

        outfile = cpath_Bx+'Localized_Bx_'+warm_or_cold+\
            'season_year'+cyear+'_lead='+clead+'_efold'+\
            str(efold)+'.cPick'
        print ('writing to ', outfile)
        ouf = open(outfile,'wb')
        cPickle.dump(Bx_localized, ouf)
        ouf.close()
        
    else:
        # ---- read the Bx_localized from pickle file.

        infile = cpath_Bx+'Localized_Bx_'+warm_or_cold+\
            'season_year'+cyear+'_lead='+clead+'_efold'+\
            str(efold)+'.cPick'
        inf = open(infile,'rb')
        Bx_localized = cPickle.load(inf)
        inf.close()
        
    return Bx_localized

    
                                                                                                                                                                 ._CDF_fitting_ccpa_precip_spline.py                                                                 000775  000765  000024  00000000471 14020175653 017603  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2       9                                      ATTR      9     !                      com.apple.lastuseddate#PS      (     com.macromates.bookmarks   0     com.macromates.selectionRange      8     com.macromates.visibleIndex  `    a     ( '95' )1:5-1:350                                                                                                                                                                                                       CDF_fitting_ccpa_precip_spline.py                                                                   000775  000765  000024  00000041733 14020175653 017374  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         def CDF_fitting_ccpa_precip_spline(cmonth, cend_hour):

    """
    CDF_fitting_ccpa_precip_spline.py cmonth cend_hour

    this python script is designed to spline fit an empirical CDF of 
    precipitation. The script is tailored to CCPA precipitation analyses over 
    one of the National Digital Forecast Database domains for the National 
    Blend of Models

    Designed by Tom Hamill, NOAA, with help from Michael Scheuerer, Jan 2021

    """

    import os, sys
    from datetime import datetime
    import numpy as np
    import _pickle as cPickle
    from netCDF4 import Dataset
    import _pickle as cPickle
    import scipy.stats as stats
    import scipy.stats as stats
    from scipy.interpolate import splrep, splev

    # =====================================================================

    def set_domain_boundaries(cdomain):
    
        """ used grib file of 2.5-km blend output grid to determine bounding 
        lat and lon, and from that, the domain bounding indices for the 
        0.25 GEFSv12 reforecast data that will encompass the domain.    
        """
        if cdomain == 'conus': 
            jmin = 93
            jmax = 246
            imin = 368
            imax = 686
        elif cdomain == 'pr':
            jmin = 243
            jmax = 256
            imin = 649
            imax = 667   
        elif cdomain == 'ak':
            jmin = 19
            jmax = 161
            imin = 201
            imax = 967
        else:
            print ('invalid domain.  Exiting.')     
            sys.exit()    
 
        return jmin, jmax, imin, imax
 
    # =====================================================================

    def find_nearest(vec, value):
    
        """ given a vector vec and a particular value, find the index in vec
        that is nearest to value"""
    
        idx = np.abs(vec-value).argmin()
        return idx

    # =====================================================================

    def fraczero_possamps(nsamps, precip_ens):
    
        """
    
        from the vector input sample precip_ens, define the fraction of
        samples with zero precipitation.   For the positive samples, add
        a small random number to deal with the fact that the data was 
        discretized to ~0.1 mm, so that when later creating CDFs we don't 
        have empirical values with lots of tied amounts.  Also, sort the 
        nonzero amounts and return.
    
        """
    
        precip_ens_nonzero = np.delete(precip_ens, \
            np.where(precip_ens <= 0.0))  # censor at 0.0 mm
        precip_ens_nonzero = precip_ens_nonzero + \
            np.random.uniform(low=-0.01,high=0.01,size=len(precip_ens_nonzero))
        precip_ens_nonzero = np.delete(precip_ens_nonzero, \
            np.where(precip_ens_nonzero <= 0.0))  # censor at 0.0 mm
        nz = len(precip_ens_nonzero)    
        
        #precip_ens_nonzero = precip_ens_nonzero + \
        #    np.random.uniform(low=-0.005,high=0.005,size=nz) 
        precip_ens_nonzero = np.sort(precip_ens_nonzero)  
        ntotal = len(precip_ens)
        nzero = ntotal - len(precip_ens_nonzero)
        fraction_zero = float(nzero) / float(ntotal)
    
        return fraction_zero, precip_ens_nonzero, nz
    
    # =====================================================================
    
    # ---- inputs from command line

    #cmonth = sys.argv[1] # '01', '02' etc.
    #cend_hour = sys.argv[2] # 06, 12, 18, 00 -- end hour of 6-h period
    print ('cmonth, cend_hour = ', cmonth, cend_hour)
    imonth = int(cmonth) - 1
    nstride = 1 # do every point
    cdomain = 'conus'

    # ---- set directories, constants

    pflag = True # for print statements
    #master_directory = '/Volumes/Backup Plus/ccpa/'
    master_directory = '/Volumes/NBM/'+cdomain+'_panal/'
    master_directory_out = '/Volumes/NBM/'+cdomain+'_panal/CDF_spline/'
    ndaysomo = [31, 28, 31,  30, 31, 30,  31, 31, 30,  31, 30, 31]
    ndaysomo_leap = [31, 28, 31,  30, 31, 30,  31, 31, 30,  31, 30, 31]
    cmonths_early = ['12','01','02','03','04','05','06','07','08','09','10','11']
    cmonths_late =  ['02','03','04','05','06','07','08','09','10','11','12','01']

    # ---- determine the overall number of daily precipitation 
    #      samples across all years for this month and the surrounding
    #      two months

    iearly = int(cmonths_early[imonth])-1
    ilate = int(cmonths_late[imonth])-1

    if imonth != 1:  # not Feb
        nsamps_mid = ndaysomo[imonth]*18
    else:
        nsamps_mid = 4*ndaysomo_leap[imonth] + 14*ndaysomo[imonth]
    
    if iearly != 1:  # not Feb    
        nsamps_early = ndaysomo[iearly]*20
    else:
        nsamps_early = 4*ndaysomo_leap[iearly] + 14*ndaysomo[iearly]
    if ilate != 1:  # not Feb    
        nsamps_late = ndaysomo[ilate]*20
    else:
        nsamps_late = 4*ndaysomo_leap[ilate] + 14*ndaysomo[ilate]
    nsamps = nsamps_mid + nsamps_early + nsamps_late
    print ('nsamps = ', nsamps)

    # ---- read in the previously generated netCDF file with precipitation
    #      for this month and lead time as well as the surrounding
    #      two months.  All dates for this month have
    #      been smushed into one leading index, dimension nsamps,
    #      since the date of the forecast within the month and 
    #      the member number is irrelevant for the distribution 
    #      fitting.
   

    ktr = 0
    for iyear in range(2002,2020):
    
        # --- loop over the month in question and the surrounding 2 months
    
        for cmo in [cmonth, cmonths_early[imonth], cmonths_late[imonth]]:
            imo = int(cmo)-1
            if iyear%4 == 0:
                ndays = ndaysomo_leap[imo]
            else:
                ndays = ndaysomo[imo]
            cyear = str(iyear)    
            infile = master_directory + cyear + cmo + \
                '_ccpa_on_ndfd_grid_6hourly.nc'
            print (infile)
            nc = Dataset(infile)
            yyyymmddhh_end = nc.variables['yyyymmddhh_end'][:]
            for iday in range(1,ndays+1):
                if iday < 10:
                    cday = '0'+str(iday)
                else:
                    cday = str(iday)
                iyyyymmddhh = int(str(iyear)+cmo+cday+cend_hour)
                idx = np.where(yyyymmddhh_end == iyyyymmddhh)[0]
                precip_in = np.squeeze(nc.variables['apcp_anal'][idx,:,:])
                if iyear == 2002 and iday == 1 and cmo == cmonth:
                    nlats_ndfd, nlons_ndfd = np.shape(precip_in)
                    precip_tseries = np.zeros((nsamps,nlats_ndfd,nlons_ndfd), \
                        dtype=np.float64)
                    missingv = -99.99*np.ones((nlats_ndfd, nlons_ndfd), \
                        dtype=np.float64)
                    lons = nc.variables['lons'][:,:]
                    lats = nc.variables['lats'][:,:]
                precip_in = np.where(precip_in < 500., precip_in, missingv)
                precip_tseries[ktr,:,:] = precip_in[:,:]
                ktr = ktr+1
            nc.close()


    # ---- loop over the grid points and estimate the spline coefficients 

    now = datetime.now()
    begin_time = now.strftime("%H:%M:%S")

    spline_info = np.zeros((17,2,nlats_ndfd,nlons_ndfd), dtype=np.float64) 
    spline_info_inv = np.zeros((17,2,nlats_ndfd,nlons_ndfd), dtype=np.float64)

    usegamma = np.zeros((nlats_ndfd,nlons_ndfd), dtype=np.int32) 
        # flag for whether to use Gamma fit (1)
    indices_to_query = np.zeros((nlats_ndfd,nlons_ndfd,9), dtype=np.float16)
    Dnstat = 0.10*np.ones((nlats_ndfd, nlons_ndfd), dtype=np.float)
    fzero = np.zeros((nlats_ndfd, nlons_ndfd), dtype=np.float)
    cdf_at_indices = np.asarray([ 0.1, 0.25, 0.33333, 0.5, 0.65, 0.8, 0.85, 0.9, 0.95])
        # these are interior knots where to calculate the cum hazard fn.

    print ('******** COMPUTING SPLINE COEFFICIENTS (wetter) or GAMMA PARAMETERS (dry) *********')
    #for jy in range(0,nlats_ndfd,nstride):
    for jy in range(498,499):

        now = datetime.now()
        current_time = now.strftime("%H:%M:%S")
        if jy%10 == 0: print ('   ***** begin time, current time, jy, nlats_ndfd, lat = ',\
            begin_time, current_time, jy, nlats_ndfd, lats[jy,0])
            
        #for ix in range(0,nlons_ndfd, nstride):
        for ix in range (521,522):
        
            # ---- there is a grib round-off error that can give negative
            #      values slightly smaller than teeny precip.  to make sure 
            #      that we don't have either negative values or lots of the 
            #      same tiny values, subtractoff teeny_precip
        
            if pflag == True: print ('******* ',jy,ix)
            precip_ens_1d = precip_tseries[:,jy,ix]
            tp = np.min( [np.abs(np.min(precip_ens_1d)), 0.0] )
            teeny_precip = tp*np.ones(nsamps) # make sure that samples >= 0.0
            precip_ens_1d = precip_ens_1d - teeny_precip[:]
            if pflag == True: print ('min, max precip_ens_1d = ',\
                np.min(precip_ens_1d), np.max(precip_ens_1d))
            # take this grid point's sample, calc fraction_zero, and return
            # the number of nonzero (nz) samples and their sorted values
            fraction_zero, precip_ens_nonzero, nz = \
                fraczero_possamps(nsamps, precip_ens_1d) # return sorted
            if pflag == True and nz > 0 : print ('   precip_ens_nonzero[0:-1] = ',\
                precip_ens_nonzero[0:-1])
            if pflag == True: print ('   nz = ',nz)
            if nz > 40:
            
                # ---- spline fit the CDF to the precipitation values via 
                #      Michael Scheuerer's hazard function (see Fig 3 in
                #      https://doi.org/10.1175/MWR-D-20-0096.1. )
            
                fzero[jy,ix] = fraction_zero 
                empirical_cdf = 1.0/(2.0*nz) + np.arange(nz)/nz
                # ---- set where spline knots are.
                query_these_indices = [ nz//10, nz//4, nz//3, nz//2, (3*nz)//5, \
                    (4*nz)//5, (17*nz)//20, (9*nz)//10, (19*nz)//20]
                indices_to_query[jy,ix,:] = query_these_indices[:]
                empirical_precipvals = precip_ens_nonzero[query_these_indices]
                hazard_function_empirical = -np.log(1.0-empirical_cdf)  
                # ---- compute spline coefficients/  
                spltemp = splrep(precip_ens_nonzero, hazard_function_empirical, \
                    xb=0., task=-1, t = empirical_precipvals)  
                # ---- compute the fitted CDF 
                spline_hazard = splev(precip_ens_nonzero, spltemp)
                spline_cdf = 1.0 - np.exp(-spline_hazard)
            
                # ---- save spline information to numpy array, 
        
                spline_info[:,0,jy,ix] = spltemp[0]
                spline_info[:,1,jy,ix] = spltemp[1]
            
                # ---- in the subsequent quantile mapping, we will want the
                #      analyzed precipitation amount given the quantile.
                #      Accordingly, let's also reverse the data in the spline
                #      and get the spline fits of precipitation amount 
                #      (y) to the cdf (x).
            
                hazard_function_at_indices = -np.log(1.0-cdf_at_indices)
                spltemp_inv = splrep(hazard_function_empirical, precip_ens_nonzero, \
                    xb=0., task=-1, t = cdf_at_indices)
                spline_info_inv[:,0,jy,ix] = spltemp_inv[0]
                spline_info_inv[:,1,jy,ix] = spltemp_inv[1]

                # --- evaluate Dn statistic, goodness of fit.
            
                diff = np.abs(empirical_cdf - spline_cdf)
                Dnstat[jy,ix] = np.max(diff) 
                usegamma[jy,ix] = 0
            
            elif nz < 10:
            
                # --- over the whole training sample, there were not enough 
                #     nonzero precip values recorded.   Flag this as missing data.
            
                spline_info_inv[:,0,jy,ix] = -99.99
                spline_info_inv[:,1,jy,ix] = -99.99
                Dnstat[jy,ix] = -99.99
                fzero[jy,ix] = -99.99
                indices_to_query[jy,ix,:] = -99 
                usegamma[jy,ix] = 0
            
            else:
            
                # ---- too few samples to use splines; fit a single-parameter Gamma 
                #      using Thom (1958) estimator described in Wilks textbook,
                #      Statistical Methods in the Atmospheric Sciences.
            
                empirical_cdf = 1.0/(2.0*nz) + np.arange(nz)/nz
                fzero[jy,ix] = fraction_zero 
                pmean = np.mean(precip_ens_nonzero)
                lnxbar = np.log(pmean)
                if pflag == True: print ('pmean, lnxbar = ', pmean, lnxbar)
                meanlnxi = np.mean(np.log(precip_ens_nonzero))
                if pflag == True: print ('meanlnxi = ', meanlnxi)
                D = lnxbar - meanlnxi
                alpha_hat = (1.0 + np.sqrt(1.0+4.0*D/3.0)) / (4.0*D)
                beta_hat = pmean / alpha_hat
                if pflag == True: print('alpha_hat, beta_hat = ', alpha_hat, beta_hat)
                indices_to_query[jy,ix,:] = -1 # flag for using Gamma

                spline_info_inv[:,0,jy,ix] = alpha_hat  # smoosh into the spline array
                spline_info_inv[:,1,jy,ix] = beta_hat # smoosh into the spline array
                usegamma[jy,ix] = 1
            
                # --- evaluate Dn statistic, goodness of fit.
            
                y0 = precip_ens_nonzero / beta_hat
                fitted_CDF = stats.gamma.cdf(y0, alpha_hat)
                diff = np.abs(empirical_cdf - fitted_CDF)
                Dnstat[jy,ix] = np.max(diff) 
            
           
           
    sys.exit()
            
    # --- save to cPickle file

    #outfile = master_directory_out + cmonth+'_'+cdomain+\
    #    '_CCPA_spline_info_h' + cend_hour + '.cPick'
    #print ('writing to ', outfile)
    #ouf = open(outfile, 'wb')
    #cPickle.dump(spline_info, ouf)
    #cPickle.dump(spline_info_inv, ouf)
    #cPickle.dump(fzero, ouf)
    #cPickle.dump(usegamma, ouf)
    #cPickle.dump(indices_to_query, ouf)
    #ouf.close()

    outfile = master_directory_out + cmonth+'_'+cdomain+\
        '_CCPA_Dnstat_h' + cend_hour + '.cPick'
    print ('writing to ', outfile)
    ouf = open(outfile, 'wb')
    cPickle.dump(Dnstat, ouf)
    cPickle.dump(lons, ouf)
    cPickle.dump(lats, ouf)
    ouf.close()

    # ---- save to netCDF file

    outfile = master_directory_out + cmonth+'_'+cdomain+\
        '_CCPA_spline_info_h' + cend_hour + 'UTC.nc' 
    print ('writing to ',outfile)
    ncout = Dataset(outfile,'w',format='NETCDF4_CLASSIC')

    xf = ncout.createDimension('xf',nlons_ndfd)
    xvf = ncout.createVariable('xf','i4',('xf',))
    xvf.long_name = "eastward grid point number on NDFD grid"
    xvf.units = "n/a"

    yf = ncout.createDimension('yf',nlats_ndfd)
    yvf = ncout.createVariable('yf','i4',('yf',))
    yvf.long_name = "northward grid point number on NDFD grid"
    yvf.units = "n/a"

    xspd = ncout.createDimension('xspd',17)
    xspdf = ncout.createVariable('xspd','i4',('xspd',))
    xspdf.long_name = "index for spline dimension"
    xspdf.units = "n/a"

    x2 = ncout.createDimension('x2',2)
    x2f = ncout.createVariable('x2','i4',('x2',))
    x2f.long_name = "2nd index for spline dimension"
    x2f.units = "n/a"

    lonsa = ncout.createVariable('lons','f4',('yf','xf',))
    lonsa.long_name = "longitude"
    lonsa.units = "degrees_east"

    latsa = ncout.createVariable('lats','f4',('yf','xf',))
    latsa.long_name = "latitude"
    latsa.units = "degrees_north"

    spline_info_inv_out = ncout.createVariable('spline_info_inv',\
        'f4',('xspd','x2','yf','xf',),
        zlib=True,least_significant_digit=6)
    spline_info_inv_out.units = "n/a"
    spline_info_inv_out.long_name = \
        "Information for computing precipitation from"+\
        "spline inverse (or Gamma CDF for dry points)"
    spline_info_inv_out.missing_value = \
        np.array(-9999.99,dtype=np.float32)

    fzero_out = ncout.createVariable('fzero','f4',('yf','xf',),
        zlib=True,least_significant_digit=6)
    fzero_out.units = "n/a"
    fzero_out.long_name = "fraction_zero"
    fzero_out.missing_value = np.array(-9999.99,dtype=np.float32)

    usegamma_out = ncout.createVariable('usegamma',\
        'i4',('yf','xf',), zlib=True)
    usegamma_out.units = "n/a"
    usegamma_out.long_name = "1 if fit CDF via Gamma distribution, 0 if not"
    usegamma_out.missing_value = np.array(-99,dtype=np.int32)

    # ---- metadata

    ncout.title = "NDFD domain spline inverse coefficients / Gamma parameters "
    ncout.history = "from CDF fitting code by Tom Hamill, PSL"
    ncout.institution =  "psl.noaa.gov"
    ncout.platform = "n/a"
    ncout.references = "n/a"
    
    # ---- copy the outputs to netCDF structures.

    xvf[:] = range(nlons_ndfd)
    yvf[:] = range(nlats_ndfd)
    xspdf[:] = range(17)
    x2f[:] = range(2)
    lonsa[:] = lons[:,:]
    latsa[:] = lats[:,:]
    spline_info_inv_out[:] = spline_info_inv[:,:,:,:]
    fzero_out[:] = fzero[:,:]
    usegamma_out[:] = usegamma[:,:]

    ncout.close()


    istat = 0
    return istat                                     ._CDF_fitting_ccpa_precip_spline_flexiknot.py                                                       000755  000765  000024  00000000473 14073573306 021673  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2  	     ;                                      ATTR      ;     #                      com.apple.lastuseddate#PS      (     com.macromates.bookmarks   0     com.macromates.selectionRange      6     com.macromates.visibleIndex  `    xH     ( '59' )255:7313139                                                                                                                                                                                                     CDF_fitting_ccpa_precip_spline_flexiknot.py                                                         000755  000765  000024  00000044625 14073573306 021465  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         def CDF_fitting_ccpa_precip_spline_flexiknot(cmonth, cend_hour):

    """
    CDF_fitting_ccpa_precip_spline_flexiknot.py cmonth cend_hour
    
    where cmonth = '01' to '12' and cend_hour is '00','06','12', or '18'
    
    this python script is designed to spline fit an empirical CDF of 6-h accumulated
    precipitation. The script is tailored to merged CCPA/MSWEP precipitation  
    analyses over the National Digital Forecast Database CONUS domain for the National 
    Blend of Models.
    
    the "flexiknot" version here makes a modification on the number of knots used
    in the spline fit, which is related to the number of samples with positive
    precipitation; more samples, more knots.

    Designed by Tom Hamill, NOAA, with help from Michael Scheuerer, July 2021

    """

    import os, sys
    from datetime import datetime
    import numpy as np
    import _pickle as cPickle
    from netCDF4 import Dataset
    import _pickle as cPickle
    import scipy.stats as stats
    from scipy.interpolate import splrep, splev
 
    # =====================================================================

    def fraczero_possamps(nsamps, precip_samples):
    
        """
    
        from the vector input sample precip_samples, define the fraction of
        samples with effectively zero precipitation. Add a 
        small random number to deal with the fact that the data was 
        discretized to ~0.1 mm, so that when later creating CDFs we don't 
        have empirical values with lots of tied amounts.  Also, sort the 
        nonzero amounts and return.
    
        """

        precip_samples_nonzero = np.delete(precip_samples, \
            np.where(precip_samples <= 0.0))  
        precip_samples_nonzero = precip_samples_nonzero + \
            np.random.uniform(low=-0.0001,high=0.0001,size=len(precip_samples_nonzero))
        precip_samples_nonzero = np.delete(precip_samples_nonzero, \
            np.where(precip_samples_nonzero <= 0.0))  # censor at 0.0 mm
        nz = len(precip_samples_nonzero)   # number non-zero 
        
        precip_samples_nonzero = np.sort(precip_samples_nonzero)  
        ntotal = len(precip_samples)
        nzero = ntotal - len(precip_samples_nonzero)
        fraction_zero = float(nzero) / float(ntotal)
    
        return fraction_zero, precip_samples_nonzero, nz
    
    # =====================================================================
    
    def define_knot_locations(nz, precip_samples_nonzero):
        
        # define_knot_locations:  choose the number of knots and the indices 
        # in the sorted precipitation samples according to the number of
        # positive precipitation amounts.   We want fewer knots for small
        # samples, and we want the precipitation values of the chosen knots
        # to emphasize the upper quantiles of the distribution, as that's where
        # we care most about an accurate fit.

        # ---- these beta parameters be used to create a beta distribution 
        #      that will emphasize the upper quantiles of the distribution
        #      
        
        rp = 3.5
        rq = 1.0
        
        # ---- the number of (interior) knots in the cubic spline will
        #      be set to be no greater than 9.0, and for a sample size
        #      of 100 will be 3.   If less than 3, set to 3.   If even
        #      number, increase by 1 per instructions in 
        #      https://numpy.org/doc/stable/reference/generated/numpy.remainder.html
        
        nknots = max(min([9,nz//30]),3)
        remainder = nknots % 2
        if remainder == 0: nknots = nknots+1
        
        query_these_indices = []
        cdf_at_indices = []
        for iknot in range (1,nknots+1):
            rknot = float(iknot)/(nknots+1)
            xloc = stats.beta.ppf(rknot, rp, rq)
            c = stats.beta.cdf(xloc, rp, rq)
            iloc = int(nz*xloc)
            query_these_indices.append(iloc)
            c = (1./(2.*nz)) + float(iloc)/float(nz)
            cdf_at_indices.append(c)
        
        return nknots, query_these_indices, cdf_at_indices 

    # =====================================================================

    # ---- set directories, constants
    
    print ('cmonth, cend_hour = ', cmonth, cend_hour)
    imonth = int(cmonth) - 1
    nstride = 1 # do every point
    cdomain = 'conus'
    pflag = False # for print statements
    #master_directory = '/Volumes/Backup Plus/ccpa/'
    master_directory = '/Volumes/NBM/'+cdomain+'_panal/'
    master_directory_out = '/Volumes/NBM/'+cdomain+'_panal/CDF_spline/'
    ndaysomo = [31, 28, 31,  30, 31, 30,  31, 31, 30,  31, 30, 31]
    ndaysomo_leap = [31, 28, 31,  30, 31, 30,  31, 31, 30,  31, 30, 31]
    cmonths_early = ['12','01','02','03','04','05','06','07','08','09','10','11']
    cmonths_late =  ['02','03','04','05','06','07','08','09','10','11','12','01']
    
    cmonths = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', \
        'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
        
    yearstart = 2002 # CCPA only available starting 2002
    yearend = 2020 # companion reforecasts end at end of 2019

    # ---- determine the overall number of daily precipitation 
    #      samples across all years for this month and the surrounding
    #      two months

    iearly = int(cmonths_early[imonth])-1
    ilate = int(cmonths_late[imonth])-1

    if imonth != 1:  # not Feb
        nsamps_mid = ndaysomo[imonth]*18
    else:
        nsamps_mid = 4*ndaysomo_leap[imonth] + 14*ndaysomo[imonth]
    
    if iearly != 1:  # not Feb    
        nsamps_early = ndaysomo[iearly]*20
    else:
        nsamps_early = 4*ndaysomo_leap[iearly] + 14*ndaysomo[iearly]
    if ilate != 1:  # not Feb    
        nsamps_late = ndaysomo[ilate]*20
    else:
        nsamps_late = 4*ndaysomo_leap[ilate] + 14*ndaysomo[ilate]
    nsamps = nsamps_mid + nsamps_early + nsamps_late
    print ('nsamps = ', nsamps)

    # ---- read in the previously generated netCDF file with precipitation
    #      for this month and lead time as well as the surrounding
    #      two months.  All dates for this month have
    #      been smushed into one leading index, dimension nsamps,
    #      since the date of the forecast within the month and 
    #      the member number is irrelevant for the distribution 
    #      fitting.
   
    ktr = 0
    for iyear in range(yearstart, yearend):
    
        # --- loop over the month in question and the surrounding 2 months
    
        for cmo in [cmonth, cmonths_early[imonth], cmonths_late[imonth]]:
            imo = int(cmo)-1
            if iyear%4 == 0:
                ndays = ndaysomo_leap[imo]
            else:
                ndays = ndaysomo[imo]
            cyear = str(iyear)    
            infile = master_directory + cyear + cmo + \
                '_ccpa_on_ndfd_grid_6hourly.nc'
            print (infile)
            nc = Dataset(infile)
            yyyymmddhh_end = nc.variables['yyyymmddhh_end'][:]
            for iday in range(1,ndays+1):
                if iday < 10:
                    cday = '0'+str(iday)
                else:
                    cday = str(iday)
                iyyyymmddhh = int(str(iyear)+cmo+cday+cend_hour)
                idx = np.where(yyyymmddhh_end == iyyyymmddhh)[0]
                precip_in = np.squeeze(nc.variables['apcp_anal'][idx,:,:])
                if iyear == 2002 and iday == 1 and cmo == cmonth:
                    nlats_ndfd, nlons_ndfd = np.shape(precip_in)
                    precip_tseries = np.zeros((nsamps,nlats_ndfd,nlons_ndfd), \
                        dtype=np.float64)
                    missingv = -99.99*np.ones((nlats_ndfd, nlons_ndfd), \
                        dtype=np.float64)
                    lons = nc.variables['lons'][:,:]
                    lats = nc.variables['lats'][:,:]
                precip_in = np.where(precip_in < 500., precip_in, missingv)
                precip_tseries[ktr,:,:] = precip_in[:,:]
                ktr = ktr+1
            nc.close()

    # ---- loop over the grid points and estimate the inverse spline coefficients 

    now = datetime.now()
    begin_time = now.strftime("%H:%M:%S")

    spline_info = np.zeros((nlats_ndfd,nlons_ndfd,2,17), dtype=np.float64)
    spline_info_inv = np.zeros((nlats_ndfd,nlons_ndfd,2,17), dtype=np.float64)
    number_knots = np.zeros((nlats_ndfd,nlons_ndfd), dtype=np.int32)
    usegamma = np.zeros((nlats_ndfd,nlons_ndfd), dtype=np.int32) 
        # flag for whether to use Gamma fit (1) or spline (0) or missing data (-1)
    fzero = np.zeros((nlats_ndfd, nlons_ndfd), dtype=np.float)

    print ('******** COMPUTING SPLINE COEFFICIENTS (wetter) or GAMMA PARAMETERS (drier) *********')

    for jy in range(0, nlats_ndfd, nstride):
        #for jy in range(528,529):

        now = datetime.now()
        current_time = now.strftime("%H:%M:%S")
        if jy%10 == 0: print ('   ***** begin time, current time, jy, nlats_ndfd, lat = ',\
            begin_time, current_time, jy, nlats_ndfd, lats[jy,0])
            
        for ix in range(0,nlons_ndfd, nstride):
        #for ix in range (465,466):
        
            # ---- there is a grib round-off error that can give negative
            #      values slightly smaller than teeny precip.  to make sure 
            #      that we don't have either negative values or lots of the 
            #      same tiny values, subtractoff teeny_precip
        
            if pflag == True: print ('******* ',jy,ix)
            precip_samples_1d = precip_tseries[:,jy,ix]
            
            # ---- take this grid point's sample, calc fraction_zero, and return
            #      the number of nonzero (nz) samples and their sorted values
            
            fraction_zero, precip_samples_nonzero, nz = \
                fraczero_possamps(nsamps, precip_samples_1d) # return sorted
            if pflag == True and nz > 0 : print ('   precip_samples_nonzero[0:-1] = ',\
                precip_samples_nonzero[0:-1])
            if pflag == True: print ('   nz = ',nz)
            if nz > 50:
            
                # ---- spline fit the CDF to the precipitation values via 
                #      Michael Scheuerer's hazard function (see Fig 3 in
                #      https://doi.org/10.1175/MWR-D-20-0096.1. )
            
                usegamma[jy,ix] = 0 # a flag to use spline inverse
                fzero[jy,ix] = fraction_zero 
                empirical_cdf = 1.0/(2.0*nz) + np.arange(nz)/nz
                
                # ---- set the indices in the sorted precipitation sample 
                #      where spline knots are, the CDF at these values,
                #      the number of knots.
                    
                nknots, query_these_indices, cdf_at_indices = \
                    define_knot_locations(nz, precip_samples_nonzero) 
                
                # ---- determine the precipitation values at the indices of knots.
                #      Also transform to a hazard function.
                
                empirical_precipvals = precip_samples_nonzero[query_these_indices]
                hazard_function_empirical = -np.log(1.0 - empirical_cdf)
                
                # ---- in the subsequent quantile mapping, we will want the
                #      analyzed precipitation amount given the quantile.
                #      Accordingly, let's reverse the data in the spline
                #      and get the spline fits of precipitation amount 
                #      (y) to the cdf (x).  It seems the weight w is important in the
                #      quality of the fit.   Smaller precipitation amounts have
                #      smaller errors in general, and the weight is supposed to 
                #      be a crude estimation of the standard deviation.
            
                hazard_function_at_indices = -np.log(1.0-np.asarray(cdf_at_indices))
                if pflag == True: 
                    print ('   query_these_indices = ',query_these_indices)
                    print ('   hazard_function_at_indices = ',hazard_function_at_indices)
                    print ('   empirical_precipvals = ',empirical_precipvals)
                    print ('   hazard_function_empirical = ',hazard_function_empirical)
                w = 1./precip_samples_nonzero**0.5
                spltemp_inv = splrep(hazard_function_empirical, precip_samples_nonzero, \
                    xb=0., task=-1, t = hazard_function_at_indices, k=3, w = w)    
                lspline = len(spltemp_inv[0])  
                
                # --- set the number of knots and splines coefficients into arrays to
                #     be dumped to netCDF file.
                    
                spline_info_inv[jy,ix,0,0:lspline] = spltemp_inv[0]
                spline_info_inv[jy,ix,1,0:lspline] = spltemp_inv[1]
                number_knots[jy,ix] = lspline     
                
            elif nz < 10:
            
                # --- over the whole training sample, there were not enough 
                #     nonzero precip values recorded.   Flag this as missing data.
                
                usegamma[jy,ix] = -1   
                number_knots[jy,ix] = 0         
                spline_info_inv[jy,ix,0,:] = -99.99
                spline_info_inv[jy,ix,1,:] = -99.99
                fzero[jy,ix] = -99.99

            else:
            
                # ---- too few samples to use splines; fit a single-parameter Gamma 
                #      using Thom (1958) estimator described in Wilks textbook,
                #      Statistical Methods in the Atmospheric Sciences.

                usegamma[jy,ix] = 1  
                number_knots[jy,ix] = 0          
                empirical_cdf = 1.0/(2.0*nz) + np.arange(nz)/nz
                fzero[jy,ix] = fraction_zero 
                pmean = np.mean(precip_samples_nonzero)
                lnxbar = np.log(pmean)
                if pflag == True: print ('pmean, lnxbar = ', pmean, lnxbar)
                meanlnxi = np.mean(np.log(precip_samples_nonzero))
                if pflag == True: print ('meanlnxi = ', meanlnxi)
                D = lnxbar - meanlnxi
                alpha_hat = (1.0 + np.sqrt(1.0+4.0*D/3.0)) / (4.0*D)
                beta_hat = pmean / alpha_hat
                if pflag == True: print('alpha_hat, beta_hat = ', alpha_hat, beta_hat)
                spline_info_inv[jy,ix,0,:] = alpha_hat  # smoosh into the spline array
                spline_info_inv[jy,ix,1,:] = beta_hat # smoosh into the spline array   

    # ---- save to netCDF file
    
    outfile = master_directory_out + cmonths[imonth]+'_'+cdomain+\
        '_CCPA_spline_info_h' + cend_hour + 'UTC.nc' 
    print ('writing to ',outfile)
    ncout = Dataset(outfile,'w',format='NETCDF4_CLASSIC')

    xf = ncout.createDimension('xf',nlons_ndfd)
    xvf = ncout.createVariable('xf','i4',('xf',))
    xvf.long_name = "eastward grid point number on NDFD grid"
    xvf.units = "n/a"

    yf = ncout.createDimension('yf',nlats_ndfd)
    yvf = ncout.createVariable('yf','i4',('yf',))
    yvf.long_name = "northward grid point number on NDFD grid"
    yvf.units = "n/a"

    xspd = ncout.createDimension('xspd',17)
    xspdf = ncout.createVariable('xspd','i4',('xspd',))
    xspdf.long_name = "index for spline dimension"
    xspdf.units = "n/a"

    x2 = ncout.createDimension('x2',2)
    x2f = ncout.createVariable('x2','i4',('x2',))
    x2f.long_name = "2nd index for spline dimension"
    x2f.units = "n/a"

    lonsa = ncout.createVariable('lons','f4',('yf','xf',))
    lonsa.long_name = "longitude"
    lonsa.units = "degrees_east"

    latsa = ncout.createVariable('lats','f4',('yf','xf',))
    latsa.long_name = "latitude"
    latsa.units = "degrees_north"

    spline_info_inv_out = ncout.createVariable('spline_info_inv',\
        'f4',('yf','xf','x2','xspd'),
        zlib=True,least_significant_digit=6)
    spline_info_inv_out.units = "n/a"
    spline_info_inv_out.long_name = \
        "Information for computing quantile-mapped precipitation from "+\
        "spline inverse (or Gamma CDF for dry points).   When given a "+\
        "forecast quantile, this will predict the analyzed precipitation amt. "+\
        "x2=0 is for knots, x2=1 for spline coefficients.  Splines used "+\
        "only if there are sufficient samples, > 40.   If the sample size is "+\
        "between 10 and 40, fit a Gamma distribution instead, and insert the "+\
        "alpha and beta parameters into this variable, alpha in x2=0, "+\
        "beta in x2=1.  If less than 10 samples, don't try to do anything."
    spline_info_inv_out.missing_value = \
        np.array(-9999.99,dtype=np.float32)
          
    spline_info_out = ncout.createVariable('spline_info',\
        'f4',('yf','xf','x2','xspd'),
        zlib=True,least_significant_digit=6)
    spline_info_out.units = "n/a"
    spline_info_out.long_name = \
        "Information for computing precipitation from spline "+\
        "Diagnostic, and only valid at points that are reasonably moist."
    spline_info_out.missing_value = \
        np.array(-9999.99,dtype=np.float32)

    fzero_out = ncout.createVariable('fzero','f4',('yf','xf',),
        zlib=True,least_significant_digit=6)
    fzero_out.units = "n/a"
    fzero_out.long_name = "fraction_zero"
    fzero_out.missing_value = np.array(-9999.99,dtype=np.float32)

    usegamma_out = ncout.createVariable('usegamma',\
        'i4',('yf','xf',), zlib=True)
    usegamma_out.units = "n/a"
    usegamma_out.long_name = "1 if fit CDF via Gamma distribution, 0 if not"
    usegamma_out.missing_value = np.array(-99,dtype=np.int32)
    
    numberknots_out = ncout.createVariable('number_knots',\
        'i4',('yf','xf',), zlib=True)
    numberknots_out.units = "n/a"
    numberknots_out.long_name = "number of knots when using spline"
    numberknots_out.missing_value = np.array(-99,dtype=np.int32)

    # ---- metadata

    ncout.title = "NDFD domain spline inverse coefficients / Gamma parameters "
    ncout.history = "from CDF fitting code by Tom Hamill, PSL"
    ncout.institution =  "psl.noaa.gov"
    ncout.platform = "n/a"
    ncout.references = "n/a"
    
    # ---- copy the outputs to netCDF data structures.

    xvf[:] = range(nlons_ndfd)
    yvf[:] = range(nlats_ndfd)
    xspdf[:] = range(17)
    x2f[:] = range(2)
    lonsa[:] = lons[:,:]
    latsa[:] = lats[:,:]
    spline_info_inv_out[:] = spline_info_inv[:,:,:,:]
    spline_info_out[:] = spline_info[:,:,:,:]
    fzero_out[:] = fzero[:,:]
    usegamma_out[:] = usegamma[:,:]
    numberknots_out[:] = number_knots[:,:]
    
    # ---- close the netCDF file

    ncout.close()

    print ('spltemp[0] = ',spline_info_inv[528,465,0,:])
    print ('spltemp[1] = ',spline_info_inv[528,465,1,:])
                        

    istat = 0
    return istat                                                                                                           ._CDF_fitting_ccpa_precip_spline_v2.py                                                              000775  000765  000024  00000000476 14070406543 020217  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2       >                                      ATTR      >     &                      com.apple.lastuseddate#PS      (     com.macromates.bookmarks   0   
  com.macromates.selectionRange      :     com.macromates.visibleIndex  `    xH     ( '56' )68:5-68:727226                                                                                                                                                                                                  CDF_fitting_ccpa_precip_spline_v2.py                                                                000775  000765  000024  00000041721 14070406543 020000  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         def CDF_fitting_ccpa_precip_spline_v2(cmonth, cend_hour):

    """
    CDF_fitting_ccpa_precip_spline.py cmonth cend_hour
    
    where cmonth = '01' to '12' and cend_hour is '00','06','12', or '18'
    
    this python script is designed to spline fit an empirical CDF of 6-h accumulated
    precipitation. The script is tailored to merged CCPA/MSWEP precipitation  
    analyses over the National Digital Forecast Database CONUS domain for the National 
    Blend of Models

    Designed by Tom Hamill, NOAA, with help from Michael Scheuerer, Jan 2021

    """

    import os, sys
    from datetime import datetime
    import numpy as np
    import _pickle as cPickle
    from netCDF4 import Dataset
    import _pickle as cPickle
    import scipy.stats as stats
    import scipy.stats as stats
    from scipy.interpolate import splrep, splev
 
    # =====================================================================

    def fraczero_possamps(nsamps, precip_samples):
    
        """
    
        from the vector input sample precip_samples, define the fraction of
        samples with effectively zero precipitation. Add a 
        small random number to deal with the fact that the data was 
        discretized to ~0.1 mm, so that when later creating CDFs we don't 
        have empirical values with lots of tied amounts.  Also, sort the 
        nonzero amounts and return.
    
        """

        precip_samples_nonzero = np.delete(precip_samples, \
            np.where(precip_samples <= 0.0))  
        precip_samples_nonzero = precip_samples_nonzero + \
            np.random.uniform(low=-0.01,high=0.01,size=len(precip_samples_nonzero))
        precip_samples_nonzero = np.delete(precip_samples_nonzero, \
            np.where(precip_samples_nonzero <= 0.0))  # censor at 0.0 mm
        nz = len(precip_samples_nonzero)   # number non-zero 
        
        precip_samples_nonzero = np.sort(precip_samples_nonzero)  
        ntotal = len(precip_samples)
        nzero = ntotal - len(precip_samples_nonzero)
        fraction_zero = float(nzero) / float(ntotal)
    
        return fraction_zero, precip_samples_nonzero, nz
    
    # =====================================================================

    # ---- set directories, constants
    
    print ('cmonth, cend_hour = ', cmonth, cend_hour)
    imonth = int(cmonth) - 1
    nstride = 1 # do every point
    cdomain = 'conus'
    pflag = False # True # for print statements
    #master_directory = '/Volumes/Backup Plus/ccpa/'
    master_directory = '/Volumes/NBM/'+cdomain+'_panal/'
    master_directory_out = '/Volumes/NBM/'+cdomain+'_panal/CDF_spline/'
    ndaysomo = [31, 28, 31,  30, 31, 30,  31, 31, 30,  31, 30, 31]
    ndaysomo_leap = [31, 28, 31,  30, 31, 30,  31, 31, 30,  31, 30, 31]
    cmonths_early = ['12','01','02','03','04','05','06','07','08','09','10','11']
    cmonths_late =  ['02','03','04','05','06','07','08','09','10','11','12','01']
    
    cmonths = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', \
        'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
        
    yearstart = 2002 # CCPA only available starting 2002
    yearend = 2020 # companion reforecasts end at end of 2019

    # ---- determine the overall number of daily precipitation 
    #      samples across all years for this month and the surrounding
    #      two months

    iearly = int(cmonths_early[imonth])-1
    ilate = int(cmonths_late[imonth])-1

    if imonth != 1:  # not Feb
        nsamps_mid = ndaysomo[imonth]*18
    else:
        nsamps_mid = 4*ndaysomo_leap[imonth] + 14*ndaysomo[imonth]
    
    if iearly != 1:  # not Feb    
        nsamps_early = ndaysomo[iearly]*20
    else:
        nsamps_early = 4*ndaysomo_leap[iearly] + 14*ndaysomo[iearly]
    if ilate != 1:  # not Feb    
        nsamps_late = ndaysomo[ilate]*20
    else:
        nsamps_late = 4*ndaysomo_leap[ilate] + 14*ndaysomo[ilate]
    nsamps = nsamps_mid + nsamps_early + nsamps_late
    print ('nsamps = ', nsamps)

    # ---- read in the previously generated netCDF file with precipitation
    #      for this month and lead time as well as the surrounding
    #      two months.  All dates for this month have
    #      been smushed into one leading index, dimension nsamps,
    #      since the date of the forecast within the month and 
    #      the member number is irrelevant for the distribution 
    #      fitting.
   
    ktr = 0
    for iyear in range(yearstart, yearend):
    
        # --- loop over the month in question and the surrounding 2 months
    
        for cmo in [cmonth, cmonths_early[imonth], cmonths_late[imonth]]:
            imo = int(cmo)-1
            if iyear%4 == 0:
                ndays = ndaysomo_leap[imo]
            else:
                ndays = ndaysomo[imo]
            cyear = str(iyear)    
            infile = master_directory + cyear + cmo + \
                '_ccpa_on_ndfd_grid_6hourly.nc'
            print (infile)
            nc = Dataset(infile)
            yyyymmddhh_end = nc.variables['yyyymmddhh_end'][:]
            for iday in range(1,ndays+1):
                if iday < 10:
                    cday = '0'+str(iday)
                else:
                    cday = str(iday)
                iyyyymmddhh = int(str(iyear)+cmo+cday+cend_hour)
                idx = np.where(yyyymmddhh_end == iyyyymmddhh)[0]
                precip_in = np.squeeze(nc.variables['apcp_anal'][idx,:,:])
                if iyear == 2002 and iday == 1 and cmo == cmonth:
                    nlats_ndfd, nlons_ndfd = np.shape(precip_in)
                    precip_tseries = np.zeros((nsamps,nlats_ndfd,nlons_ndfd), \
                        dtype=np.float64)
                    missingv = -99.99*np.ones((nlats_ndfd, nlons_ndfd), \
                        dtype=np.float64)
                    lons = nc.variables['lons'][:,:]
                    lats = nc.variables['lats'][:,:]
                precip_in = np.where(precip_in < 500., precip_in, missingv)
                precip_tseries[ktr,:,:] = precip_in[:,:]
                ktr = ktr+1
            nc.close()


    #precip_samples_1d = precip_tseries[:,784,930]
    #fraction_zero, precip_samples_nonzero, nz = \
    #    fraczero_possamps(nsamps, precip_samples_1d)
    #print ('precip_samples_nonzero = ', precip_samples_nonzero[0:-1:10])

    # ---- loop over the grid points and estimate the inverse spline coefficients 

    now = datetime.now()
    begin_time = now.strftime("%H:%M:%S")

    spline_info = np.zeros((nlats_ndfd,nlons_ndfd,2,17), dtype=np.float64)
    spline_info_inv = np.zeros((nlats_ndfd,nlons_ndfd,2,17), dtype=np.float64)
    usegamma = np.zeros((nlats_ndfd,nlons_ndfd), dtype=np.int32) 
        # flag for whether to use Gamma fit (1) or spline (0) or missing data (-1)
    fzero = np.zeros((nlats_ndfd, nlons_ndfd), dtype=np.float)
    #cdf_at_indices = np.asarray([ 0.1, 0.25, 0.33333, 0.5, 0.65, 0.8, 0.85, 0.93, 0.98])
    #    # these are interior knots where to calculate the cum hazard fn.
    cdf_at_indices = np.asarray([ 0.1, 0.25, 0.5, 0.6, 0.8, 0.85, 0.9, 0.96, 0.98])
        # these are interior knots where to calculate the cum hazard fn.

    print ('******** COMPUTING SPLINE COEFFICIENTS (wetter) or GAMMA PARAMETERS (dry) *********')

    for jy in range(0, nlats_ndfd, nstride):
    #for jy in range(784,785):

        now = datetime.now()
        current_time = now.strftime("%H:%M:%S")
        if jy%10 == 0: print ('   ***** begin time, current time, jy, nlats_ndfd, lat = ',\
            begin_time, current_time, jy, nlats_ndfd, lats[jy,0])
            
        for ix in range(0,nlons_ndfd, nstride):
        #for ix in range (930, 931):
        
            # ---- there is a grib round-off error that can give negative
            #      values slightly smaller than teeny precip.  to make sure 
            #      that we don't have either negative values or lots of the 
            #      same tiny values, subtractoff teeny_precip
        
            if jy == 784 and ix == 930:
                pflag = True
            else:
                pflag = False
            if pflag == True: print ('******* ',jy,ix)
            precip_samples_1d = precip_tseries[:,jy,ix]
            
            # take this grid point's sample, calc fraction_zero, and return
            # the number of nonzero (nz) samples and their sorted values
            fraction_zero, precip_samples_nonzero, nz = \
                fraczero_possamps(nsamps, precip_samples_1d) # return sorted
            if pflag == True and nz > 0 : print ('   precip_samples_nonzero[0:-1] = ',\
                precip_samples_nonzero[0:-1])
            if pflag == True: print ('   nz = ',nz)
            if nz > 50:
            
                # ---- spline fit the CDF to the precipitation values via 
                #      Michael Scheuerer's hazard function (see Fig 3 in
                #      https://doi.org/10.1175/MWR-D-20-0096.1. )
            
                usegamma[jy,ix] = 0 # a flag to use spline inverse
                fzero[jy,ix] = fraction_zero 
                empirical_cdf = 1.0/(2.0*nz) + np.arange(nz)/nz
                
                # ---- set where spline knots are.
                
                query_these_indices = [ nz//10, nz//4, nz//3, nz//2, (3*nz)//5, \
                    (4*nz)//5, (17*nz)//20, (9*nz)//10, (19*nz)//20]   
                
                cdf_at_indices = np.asarray(\
                    [ 0.1, 0.25, 0.5, 0.6, 0.8, 0.85, 0.9, 0.96, 0.98])
                    
                #query_these_indices = [ nz//10, nz//4, nz//2, (3*nz)//5, \
                #    (4*nz)//5, (17*nz)//20, (9*nz)//10, (24*nz)//25, (49*nz)//50]
                    
                    
                if pflag == True: \
                    print ('   query_these_indices = ',query_these_indices)
                empirical_precipvals = precip_samples_nonzero[query_these_indices]
                if pflag == True: \
                    print ('   empirical_precipvals = ',empirical_precipvals)
                hazard_function_empirical = -np.log(1.0 - empirical_cdf)  
                
                # ---- in the subsequent quantile mapping, we will want the
                #      analyzed precipitation amount given the quantile.
                #      Accordingly, let's reverse the data in the spline
                #      and get the spline fits of precipitation amount 
                #      (y) to the cdf (x).
            
                hazard_function_at_indices = -np.log(1.0-cdf_at_indices)
                if pflag == True: print \
                    ('   hazard_function_empirical = ',hazard_function_empirical)
                #spltemp_inv = splrep(hazard_function_empirical, precip_samples_nonzero, \
                #    xb=0., task=-1, t = cdf_at_indices)
                spltemp_inv = splrep(hazard_function_empirical, precip_samples_nonzero, \
                    xb=0., task=-1, t = hazard_function_at_indices)    
                    
                spline_info_inv[jy,ix,0,:] = spltemp_inv[0]
                if pflag == True: print \
                    ('   spline_info_inv[jy,ix,0,:] = ',spline_info_inv[jy,ix,0,:])
                spline_info_inv[jy,ix,1,:] = spltemp_inv[1]
                if pflag == True: print \
                    ('   spline_info_inv[jy,ix,1,:] = ',spline_info_inv[jy,ix,1,:])
                
                # ---- spline fit to anal CDF with the focus on knots at higher quantiles.
                #      these data are saved not for quantile mapping but for diagnostics.

                spltemp = splrep(precip_samples_nonzero, hazard_function_empirical, \
                    xb=0., task=-1, t = empirical_precipvals)   
                spline_info[jy,ix,0,:] = spltemp[0]
                spline_info[jy,ix,1,:] = spltemp[1]
            
                fitted_precip = splev(hazard_function_empirical, spltemp)
                if pflag == True: print ('fitted precip = ', fitted_precip)
                
            elif nz < 10:
            
                # --- over the whole training sample, there were not enough 
                #     nonzero precip values recorded.   Flag this as missing data.
                
                usegamma[jy,ix] = -1            
                spline_info_inv[jy,ix,0,:] = -99.99
                spline_info_inv[jy,ix,1,:] = -99.99
                fzero[jy,ix] = -99.99

            else:
            
                # ---- too few samples to use splines; fit a single-parameter Gamma 
                #      using Thom (1958) estimator described in Wilks textbook,
                #      Statistical Methods in the Atmospheric Sciences.

                usegamma[jy,ix] = 1            
                empirical_cdf = 1.0/(2.0*nz) + np.arange(nz)/nz
                fzero[jy,ix] = fraction_zero 
                pmean = np.mean(precip_samples_nonzero)
                lnxbar = np.log(pmean)
                if pflag == True: print ('pmean, lnxbar = ', pmean, lnxbar)
                meanlnxi = np.mean(np.log(precip_samples_nonzero))
                if pflag == True: print ('meanlnxi = ', meanlnxi)
                D = lnxbar - meanlnxi
                alpha_hat = (1.0 + np.sqrt(1.0+4.0*D/3.0)) / (4.0*D)
                beta_hat = pmean / alpha_hat
                if pflag == True: print('alpha_hat, beta_hat = ', alpha_hat, beta_hat)
                spline_info_inv[jy,ix,0,:] = alpha_hat  # smoosh into the spline array
                spline_info_inv[jy,ix,1,:] = beta_hat # smoosh into the spline array   

    # ---- save to netCDF file
    

    outfile = master_directory_out + cmonths[imonth]+'_'+cdomain+\
        '_CCPA_spline_info_h' + cend_hour + 'UTC.nc' 
    print ('writing to ',outfile)
    ncout = Dataset(outfile,'w',format='NETCDF4_CLASSIC')

    xf = ncout.createDimension('xf',nlons_ndfd)
    xvf = ncout.createVariable('xf','i4',('xf',))
    xvf.long_name = "eastward grid point number on NDFD grid"
    xvf.units = "n/a"

    yf = ncout.createDimension('yf',nlats_ndfd)
    yvf = ncout.createVariable('yf','i4',('yf',))
    yvf.long_name = "northward grid point number on NDFD grid"
    yvf.units = "n/a"

    xspd = ncout.createDimension('xspd',17)
    xspdf = ncout.createVariable('xspd','i4',('xspd',))
    xspdf.long_name = "index for spline dimension"
    xspdf.units = "n/a"

    x2 = ncout.createDimension('x2',2)
    x2f = ncout.createVariable('x2','i4',('x2',))
    x2f.long_name = "2nd index for spline dimension"
    x2f.units = "n/a"

    lonsa = ncout.createVariable('lons','f4',('yf','xf',))
    lonsa.long_name = "longitude"
    lonsa.units = "degrees_east"

    latsa = ncout.createVariable('lats','f4',('yf','xf',))
    latsa.long_name = "latitude"
    latsa.units = "degrees_north"

    spline_info_inv_out = ncout.createVariable('spline_info_inv',\
        'f4',('yf','xf','x2','xspd'),
        zlib=True,least_significant_digit=6)
    spline_info_inv_out.units = "n/a"
    spline_info_inv_out.long_name = \
        "Information for computing quantile-mapped precipitation from "+\
        "spline inverse (or Gamma CDF for dry points).   When given a "+\
        "forecast quantile, this will predict the analyzed precipitation amt. "+\
        "x2=0 is for knots, x2=1 for spline coefficients.  Splines used "+\
        "only if there are sufficient samples, > 40.   If the sample size is "+\
        "between 10 and 40, fit a Gamma distribution instead, and insert the "+\
        "alpha and beta parameters into this variable, alpha in x2=0, "+\
        "beta in x2=1.  If less than 10 samples, don't try to do anything."
    spline_info_inv_out.missing_value = \
        np.array(-9999.99,dtype=np.float32)
          
    spline_info_out = ncout.createVariable('spline_info',\
        'f4',('yf','xf','x2','xspd'),
        zlib=True,least_significant_digit=6)
    spline_info_out.units = "n/a"
    spline_info_out.long_name = \
        "Information for computing precipitation from spline "+\
        "Diagnostic, and only valid at points that are reasonably moist."
    spline_info_out.missing_value = \
        np.array(-9999.99,dtype=np.float32)

    fzero_out = ncout.createVariable('fzero','f4',('yf','xf',),
        zlib=True,least_significant_digit=6)
    fzero_out.units = "n/a"
    fzero_out.long_name = "fraction_zero"
    fzero_out.missing_value = np.array(-9999.99,dtype=np.float32)

    usegamma_out = ncout.createVariable('usegamma',\
        'i4',('yf','xf',), zlib=True)
    usegamma_out.units = "n/a"
    usegamma_out.long_name = "1 if fit CDF via Gamma distribution, 0 if not"
    usegamma_out.missing_value = np.array(-99,dtype=np.int32)

    # ---- metadata

    ncout.title = "NDFD domain spline inverse coefficients / Gamma parameters "
    ncout.history = "from CDF fitting code by Tom Hamill, PSL"
    ncout.institution =  "psl.noaa.gov"
    ncout.platform = "n/a"
    ncout.references = "n/a"
    
    # ---- copy the outputs to netCDF data structures.

    xvf[:] = range(nlons_ndfd)
    yvf[:] = range(nlats_ndfd)
    xspdf[:] = range(17)
    x2f[:] = range(2)
    lonsa[:] = lons[:,:]
    latsa[:] = lats[:,:]
    spline_info_inv_out[:] = spline_info_inv[:,:,:,:]
    spline_info_out[:] = spline_info[:,:,:,:]
    fzero_out[:] = fzero[:,:]
    usegamma_out[:] = usegamma[:,:]
    
    # ---- close the netCDF file

    ncout.close()


    istat = 0
    return istat                                               ._CDF_fitting_forecast_precip.py                                                                    000775  000765  000024  00000000416 14016263124 017124  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2                                              ATTR                                   com.apple.lastuseddate#PS           com.macromates.selectionRange      
     com.macromates.visibleIndex  `         129:364127                                                                                                                                                                                                                                                  CDF_fitting_forecast_precip.py                                                                      000775  000765  000024  00000037024 14016263124 016714  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         """
CDF_fitting_forecast_precip.py cmonth clead 

"""
import pygrib
from dateutils import daterange, dateshift, dayofyear, splitdate
import os, sys
from datetime import datetime
import numpy as np
import _pickle as cPickle
import matplotlib.pyplot as plt
from matplotlib import rcParams
from mpl_toolkits.basemap import Basemap, interp
from mpl_toolkits.axes_grid1 import make_axes_locatable
from netCDF4 import Dataset
import rpy2.robjects as robjects
from rpy2.robjects.packages import importr
import _pickle as cPickle
import scipy.stats as stats
#from numba import jit
base = importr('base')
mixtools = importr('mixtools')


rcParams['xtick.labelsize']='medium'
rcParams['ytick.labelsize']='medium'
rcParams['legend.fontsize']='medium'

# =====================================================================

def find_nearest(vec, value):
    idx = np.abs(vec-value).argmin()
    return idx

# =====================================================================

def fraczero_possamps(nsamps, nxin, nyin, precip_ens):
    number_zeros = 0
    precip_ens_nonzero = np.delete(precip_ens, \
        np.where(precip_ens <= 0.0))  # censor at 0.1 mm
    nz = len(precip_ens_nonzero)
    # data discretized, so add random component of this magnitude
    precip_ens_nonzero = precip_ens_nonzero + \
        np.random.uniform(low=-0.005,high=0.005,size=nz) 
    precip_ens_nonzero = np.sort(precip_ens_nonzero)  
    #print (precip_ens_nonzero[0:10]) 
    ntotal = len(precip_ens)
    nzero = ntotal - len(precip_ens_nonzero)
    fraction_zero = float(nzero) / float(ntotal)
    return fraction_zero, precip_ens_nonzero, nz

# =====================================================================

def one_parameter_gamma(nz, precip_ens_nonzero, pflag):

    """ 
    
    along with some other ancillary stuff, return the fitted Gamma distribution
    alpha and beta values, along with an evaluation (Dnstat) of how close the fitted
    CDF matches the empirical CDF 
    
    inputs:  
    
    nz: number of nonzero samples
    precip_ens_nonzero[nz]:   sorted vector of nonzero precip amounts (mm)
    
    """

    # ---- define the indices in the previously sorted precip_ens_nonzero vector
    #      that are at the 0.05, 0.10, .... , 0.95 quantiles of the sorted vector
    
    query_these_indices = [ nz//20, nz//10, (3*nz)//20, nz//5, nz//4, (3*nz)//10, \
        (7*nz)//20, (2*nz)//5, (9*nz)//20, nz//2, (11*nz)//20, (3*nz)//5, (13*nz)//20, \
        (7*nz)//10, (3*nz)//4, (4*nz)//5, (17*nz)//20, (9*nz)//10, (19*nz)//20]
    if pflag == True: print ('   nz, query_these_indices = ', nz, query_these_indices)
    
    # ---- convert the query_these_indices into the cumulative probability
    
    empirical_CDF = np.array(query_these_indices, dtype=np.float) / float(nz) 
    
    if pflag == True: print ('   empirical_CDF = ',empirical_CDF)
    
    # ---- extract the quantiles at these cumulative probabilities
    
    empirical_precipvals = precip_ens_nonzero[query_these_indices] 
    
    if pflag == True: print ('   empirical_precipvals = ', empirical_precipvals)
    if pflag == True: print ('   precip_ens_nonzero[0:-1:10] = ',precip_ens_nonzero[0:-1:10] )
    
    # ---- See Wilks Statistical Meteorology Text, section on Gamma Distribution.   Use the
    #      Thom (1958) method of maximum-likelihood estimator, and from this estimate
    #      the Gamma distribution Parameters.   Ref: Statistical Methods in the Atmospheric
    #      Sciences (3rd Ed), 2011, Daniel S. Wilks (Academic Press)
    
    pmean = np.mean(precip_ens_nonzero)
    lnxbar = np.log(pmean)
    meanlnxi = np.mean(np.log(precip_ens_nonzero))
    D = lnxbar - meanlnxi
    alpha_hat = (1.0 + np.sqrt(1.0+4.0*D/3.0)) / (4.0*D)
    beta_hat = pmean / alpha_hat
    
    # ---- Now we evaluate how good the fitted distribution matches the empirical one.
    #      Because errors in fit are largely irrelevant for the quantile mapping at very small
    #      values, we will evaluate the goodness of fit only at the minimum of: (a) 
    #      where there is precip > 1.0 mm, or (b) the 90th percentile of the empirical 
    #      distribution, whichever is smaller.   
    
    if pflag == True: print ('   D = ',D,' alpha_hat = ', alpha_hat,' beta_hat = ', beta_hat)
    y0 = empirical_precipvals / beta_hat
    fitted_CDF = stats.gamma.cdf(y0, alpha_hat)
    if pflag == True: print ('   fitted_CDF = ', fitted_CDF)
    if pflag == True: print ('   CDF differences: ', np.abs(fitted_CDF - empirical_CDF))
    i1 = np.min([17,np.argmin(np.abs(empirical_precipvals-1.0))]) 
    Dnstat = np.max(np.abs(fitted_CDF[i1:] - empirical_CDF[i1:]))
    if pflag == True: print ('   Dnstat for 1 gamma mixture = ', Dnstat)
    
    return query_these_indices, empirical_CDF, empirical_precipvals, \
        pmean, alpha_hat, beta_hat, i1, Dnstat



# =====================================================================
    
# ---- inputs from command line

cmonth = sys.argv[1] # 'Jan', etdc
clead = sys.argv[2] # 06, etc.

pflag = True # for print statements
master_directory = '/Volumes/Backup Plus/gefsv12/precip/netcdf/'
nmembers = 5

now = datetime.now()
current_time = now.strftime("%H:%M:%S")
print ('before read ', current_time)    
ncfile = master_directory + cmonth + '_apcp' '_h' + clead + '.nc'
print (ncfile)
nc = Dataset(ncfile)
precip_ens = nc.variables['apcp_fcst'][:,:,:]
nsamps, nyin, nxin = np.shape(precip_ens)
lons_1d = nc.variables['lons_fcst'][:]
lats_1d = nc.variables['lats_fcst'][:]
nc.close()
now = datetime.now()
current_time = now.strftime("%H:%M:%S")
print ('after read ', current_time) 
weights = np.zeros((3,nyin, nxin), dtype=np.float)
alpha = np.zeros((3,nyin,nxin), dtype=np.float)
beta = np.zeros((3,nyin,nxin), dtype=np.float)
fzero = np.zeros((nyin, nxin), dtype=np.float)
teeny_precip = 0.06*np.ones(nsamps)
        
tktr = 0
for jy in range(0,nyin,5):
    for ix in range(0,nxin,5):
        
#for jy in range(5,6):
#    for ix in range(844,845):
        now = datetime.now()
        current_time = now.strftime("%H:%M:%S")
        tktr = tktr+1
        print ('*** time, jy, ix, lon, lat = ',current_time, jy, ix, tktr, nyin*nxin, lons_1d[ix], lats_1d[jy])
        print ('   Before fraczero_possamps, jy,ix, nsamps, time = ', jy,ix, nxin*nyin, current_time)
        # there is a grib compaction error that can give negative values slightly smaller than teeny precip.  
        # to make sure that we don't have either negative values or lots of the same tiny values, subtract
        # off teeny_precip
        precip_ens_1d = precip_ens[:,jy,ix] - teeny_precip[:]
        fraction_zero, precip_ens_nonzero, nz = \
            fraczero_possamps(nsamps, nxin, nyin, precip_ens_1d) # return sorted
        fzero[jy,ix] = fraction_zero 
        print ('   After fraczero_possamps, jy,ix = ', jy,ix, nxin*nyin, current_time)
        print ('   number of samples with positive precip = ', nz)
        print ('   precip_ens_nonzero[-20:] = ', precip_ens_nonzero[-20:])
        if nz > 21 and precip_ens_nonzero[-1] > 3.0:
            

        
            # --- first fit a Gamma distribution per Wilks; determine the quantiles every 1/20th
        

            one_parameter_gamma(nz, precip_ens_nonzero, pflag):
            
            # ---- decide if Dn excessive.   If so, then try a Gamma mixture model.
            
            excessive_threshold = 1.0 / np.sqrt(np.float(nz))   # inspired by Wilks table 5.2, assume alpha=1
            if Dnstat > excessive_threshold:
                
                try:
                
                    # ---- try 2-parameter Gamma
                
                    now = datetime.now()
                    current_time = now.strftime("%H:%M:%S")
                    print ('   Dnstat excessive!  try Gamma mixture. ', Dnstat, current_time)
                    precip_nonzero_R = robjects.FloatVector(precip_ens_nonzero)
                    result_R = mixtools.gammamixEM(precip_nonzero_R, k=2, maxit=1000 ) #, \
                        #alpha <- c(alpha_hat, alpha_hat), beta <- c(0.7*beta_hat, 1.3*beta_hat) )
                    now = datetime.now()
                    current_time = now.strftime("%H:%M:%S")
                    #print ('   After mixtools.gammamixEM, jy,ix = ', jy,ix, nxin*nyin, current_time)
                    result_np = np.asarray(result_R, dtype=object)
                    result_weights_np = np.asarray(result_R[1])
                    result_alpha_beta_np = np.asarray(result_R[2])
                    weights[0:2,jy,ix] = result_weights_np[:]
                    alpha[0:2,jy,ix] = result_alpha_beta_np[0,:]
                    beta[0:2,jy,ix] = result_alpha_beta_np[1,:]
                    weights[2,jy,ix] = 0.0
                    alpha[2,jy,ix] = 1.0
                    beta[2,jy,ix] = 1.0
                    print ('   weights = ', weights[:,jy,ix])
                    print ('   alpha = ', alpha[:,jy,ix])
                    print ('   beta = ', beta[:,jy,ix])
            
                    y0 = empirical_precipvals / beta[0,jy,ix]
                    y1 = empirical_precipvals / beta[1,jy,ix]
                    fitted_CDF0 = stats.gamma.cdf(y0, alpha[0,jy,ix])
                    fitted_CDF1 = stats.gamma.cdf(y1, alpha[1,jy,ix])
                    fitted_CDF = weights[0,jy,ix]*fitted_CDF0 + weights[1,jy,ix]*fitted_CDF1
                    print ('   fitted_CDF = ',fitted_CDF)
                    Dnstat = np.max(np.abs(fitted_CDF[i1:] - empirical_CDF[i1:]))
                    print ('   np.abs(fitted_CDF - empirical_CDF) = ', np.abs(fitted_CDF - empirical_CDF))
                    print ('   Dnstat for 2 gamma mixture = ', Dnstat)
                    print ('   weights = ', result_weights_np[:])
                    print ('   result_alpha_beta_np[0,:] = ', result_alpha_beta_np[0,:])
                    print ('   result_alpha_beta_np[1,:] = ', result_alpha_beta_np[1,:])
                    if Dnstat > excessive_threshold:
                        print ('   need to try 3 gamma mixture')
                        result_R = mixtools.gammamixEM(precip_nonzero_R, k=3, maxit=1000) #, 
                            #alpha<-c(result_alpha_beta_np[0,0],result_alpha_beta_np[0,1], 1.0), 
                            #beta<-c(result_alpha_beta_np[1,0],result_alpha_beta_np[1,1], 1.0))
                        now = datetime.now()
                        current_time = now.strftime("%H:%M:%S")
                        #print ('   After mixtools.gammamixEM, jy,ix = ', jy,ix, nxin*nyin, current_time)
                        result_np = np.asarray(result_R, dtype=object)
                        result_weights_np = np.asarray(result_R[1])
                        result_alpha_beta_np = np.asarray(result_R[2])
                        weights[:,jy,ix] = result_weights_np[:]
                        alpha[:,jy,ix] = result_alpha_beta_np[0,:]
                        beta[:,jy,ix] = result_alpha_beta_np[1,:]
                        print ('   weights = ', weights[:,jy,ix])
                        print ('   alpha = ', alpha[:,jy,ix])
                        print ('   beta = ', beta[:,jy,ix])
            
                        y0 = empirical_precipvals / beta[0,jy,ix]
                        y1 = empirical_precipvals / beta[1,jy,ix]
                        y2 = empirical_precipvals / beta[2,jy,ix]
                        fitted_CDF0 = stats.gamma.cdf(y0, alpha[0,jy,ix])
                        fitted_CDF1 = stats.gamma.cdf(y1, alpha[1,jy,ix])
                        fitted_CDF2 = stats.gamma.cdf(y2, alpha[2,jy,ix])
                        fitted_CDF = weights[0,jy,ix]*fitted_CDF0 + weights[1,jy,ix]*fitted_CDF1 + \
                            weights[2,jy,ix]*fitted_CDF2
                        print ('   fitted_CDF = ',fitted_CDF)
                        Dnstat = np.max(np.abs(fitted_CDF[i1:] - empirical_CDF[i1:]))
                        print ('   np.abs(fitted_CDF - empirical_CDF) = ', np.abs(fitted_CDF - empirical_CDF))
                        print ('   Dnstat for 3 gamma mixture = ', Dnstat)
            
                except: # bombed off with 2-gamma mixture.  Try three.
                        
                    try:
                        result_R = mixtools.gammamixEM(precip_nonzero_R, k=3, maxit=1000) #, 
                        now = datetime.now()
                        current_time = now.strftime("%H:%M:%S")
                        #print ('   After mixtools.gammamixEM, jy,ix = ', jy,ix, nxin*nyin, current_time)
                        result_np = np.asarray(result_R, dtype=object)
                        result_weights_np = np.asarray(result_R[1])
                        result_alpha_beta_np = np.asarray(result_R[2])
                        weights[:,jy,ix] = result_weights_np[:]
                        alpha[:,jy,ix] = result_alpha_beta_np[0,:]
                        beta[:,jy,ix] = result_alpha_beta_np[1,:]
                        print ('   weights = ', weights[:,jy,ix])
                        print ('   alpha = ', alpha[:,jy,ix])
                        print ('   beta = ', beta[:,jy,ix])
            
                        y0 = empirical_precipvals / beta[0,jy,ix]
                        y1 = empirical_precipvals / beta[1,jy,ix]
                        y2 = empirical_precipvals / beta[2,jy,ix]
                        fitted_CDF0 = stats.gamma.cdf(y0, alpha[0,jy,ix])
                        fitted_CDF1 = stats.gamma.cdf(y1, alpha[1,jy,ix])
                        fitted_CDF2 = stats.gamma.cdf(y2, alpha[2,jy,ix])
                        fitted_CDF = weights[0,jy,ix]*fitted_CDF0 + weights[1,jy,ix]*fitted_CDF1 + \
                            weights[2,jy,ix]*fitted_CDF2
                        print ('   fitted_CDF = ',fitted_CDF)
                        Dnstat = np.max(np.abs(fitted_CDF[i1:] - empirical_CDF[i1:]))
                        print ('   np.abs(fitted_CDF - empirical_CDF) = ', np.abs(fitted_CDF - empirical_CDF))
                        print ('   Dnstat for 3 gamma mixture = ', Dnstat)
                    except: # bombed off with 3-parameter.   Revert to one.
                        weights[0,jy,ix] = 1.0
                        alpha[0,jy,ix] = alpha_hat
                        beta[0,jy,ix] = beta_hat
                        weights[1:,jy,ix] = 0.0
                        alpha[1:,jy,ix] = 1.0
                        beta[1:,jy,ix] = 1.0
                    
            else:
                weights[0,jy,ix] = 1.0
                alpha[0,jy,ix] = alpha_hat
                beta[0,jy,ix] = beta_hat
                weights[1:,jy,ix] = 0.0
                alpha[1:,jy,ix] = 1.0
                beta[1:,jy,ix] = 1.0
            
        else: 
            
            # --- very few positive samples; fit an simple maximum likelihood distribution
            
            pmean = np.mean(precip_ens_1d)
            if pmean == 0.0:  # not long enough sample to find some nonzero precip amounts
                alpha_hat = 1.0
                beta_hat = 100.0
            else:  # use maximum likelihood method of Wilks text to fit non-mixture distribution.
                lnxbar = np.log(pmean)
                meanlnxi = np.mean(np.sum(np.log(precip_ens_1d)))
                D = lnxbar - meanlnxi
                alpha_hat = (1.0 + np.sqrt(1.0+4.0*D/3.0)) / (4.0*D)
                beta_hat = pmean / alpha_hat
                weights[0,jy,ix] = 1.0
                alpha[0,jy,ix] = alpha_hat
                beta[0,jy,ix] = beta_hat
                weights[1:,jy,ix] = 0.0
                alpha[1:,jy,ix] = 1.0
                beta[1:,jy,ix] = 1.0

# --- save to cPickle file

outfile = master_directory + cmonth+ '_apcp_gamma_parameters_h' + clead + '.cPick'
print ('writing to ', outfile)
ouf = open(outfile, 'wb')
cPickle.dump(weights, ouf)
cPickle.dump(alpha, ouf)
cPickle.dump(beta, ouf)
cPickle.dump(fzero, ouf)
ouf.close()



                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            ._CDF_fitting_forecast_precip_1point.py                                                             000775  000765  000024  00000000501 14016263124 020411  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2       A                                      ATTR      A     )                      com.apple.lastuseddate#PS      (   	  com.macromates.bookmarks   1     com.macromates.selectionRange      =     com.macromates.visibleIndex  `    T     ( '359' )151-152:48+44126                                                                                                                                                                                               CDF_fitting_forecast_precip_1point.py                                                               000775  000765  000024  00000072655 14016263124 020217  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         """
CDF_fitting_forecast_precip_1point.py cmonth clead cjy cix

this python script is designed to fit a mixture of Gamma CDFs to closely
match an empirical distribution of precipitation.   Up to 3 Gamma distributions
can be mixed, though 1 or 2 is preferable if they provide a sufficiently
close fit.   The script is tailored to GEFSv12 forecasts over one of the
National Digital Forecast Database domains for the National Blend of
Models

Designed by Tom Hamill, NOAA, with help from Michael Scheuerer, Dec 2020

"""

import os, sys
from datetime import datetime
import numpy as np
import _pickle as cPickle
from netCDF4 import Dataset
import _pickle as cPickle
import scipy.stats as stats
from gammamix import gammamix_em

# =====================================================================

def set_domain_boundaries(cdomain):
    
    """ used grib file of 2.5-km blend output grid to determine bounding 
        lat and lon, and from that, the domain bounding indices for the 
        0.25 GEFSv12 reforecast data that will encompass the domain.    
    """
    if cdomain == 'conus': 
        jmin = 93
        jmax = 246
        imin = 368
        imax = 686
    elif cdomain == 'pr':
        jmin = 243
        jmax = 256
        imin = 649
        imax = 667   
    elif cdomain == 'ak':
        jmin = 19
        jmax = 161
        imin = 201
        imax = 967
    else:
        print ('invalid domain.  Exiting.')     
        sys.exit()    
 
    return jmin, jmax, imin, imax
 
# =====================================================================

def find_nearest(vec, value):
    
    """ given a vector vec and a particular value, find the index in vec
    that is nearest to value"""
    
    idx = np.abs(vec-value).argmin()
    return idx

# =====================================================================

def fraczero_possamps(nsamps, precip_ens):
    
    """
    
    from the vector input sample precip_ens, define the fraction of
    samples with zero precipitation.   For the positive samples, add
    a small random number to deal with the fact that the data was 
    discretized to ~0.1 mm, so that when later creating CDFs we don't 
    have empirical values with lots of tied amounts.  Also, sort the 
    nonzero amounts and return.
    
    """
    
    number_zeros = 0
    precip_ens_nonzero = np.delete(precip_ens, \
        np.where(precip_ens <= 0.006))  # censor at 0.006 mm
    nz = len(precip_ens_nonzero)
    # data discretized, so add random component of this magnitude
    precip_ens_nonzero = precip_ens_nonzero + \
        np.random.uniform(low=-0.005,high=0.005,size=nz) 
    precip_ens_nonzero = np.sort(precip_ens_nonzero)  
    ntotal = len(precip_ens)
    nzero = ntotal - len(precip_ens_nonzero)
    fraction_zero = float(nzero) / float(ntotal)
    return fraction_zero, precip_ens_nonzero, nz

# =====================================================================

def one_parameter_gamma(nz, precip_ens_nonzero, pflag, fraction_zero):

    """ 
    Along with some other ancillary stuff, return the fitted Gamma distribution
    alpha and beta values, along with an evaluation (Dnstat) of how 
    closely the fitted CDF matches the empirical CDF.   Dnstat follows
    the Dn statistics discussed in Wilks (2011) Statistical Methods in
    the Atmospheric Sciences text for distribution fitting. 
    
    inputs:  
    
    nz: number of nonzero samples
    precip_ens_nonzero[nz]:   sorted vector of nonzero precip amounts (mm)
    pflag: print flag, true if diagnostic printing desired
    fraction_zero: fraction of samples that are zero
    
    """

    # ---- define the indices in the previously sorted precip_ens_nonzero vector
    #      that are at the 0.05, 0.10, .... , 0.95 quantiles of the sorted vector, + a few 
    #      others at high quantiles.   Less if there are few samples
    
    query_these_indices = [ nz//20, nz//10, (3*nz)//20, nz//5, nz//4, (3*nz)//10, \
        (7*nz)//20, (2*nz)//5, (9*nz)//20, nz//2, (11*nz)//20, (3*nz)//5, (13*nz)//20, \
        (7*nz)//10, (3*nz)//4, (4*nz)//5, (17*nz)//20, (35*nz)//40,(9*nz)//10, \
        (37*nz)//40, (19*nz)//20, (39*nz)//40, (79*nz)//80]
    empirical_precipvals = precip_ens_nonzero[query_these_indices] 
    if pflag == True: print ('   nz, query_these_indices = ', nz, query_these_indices)
    
    # ---- convert the query_these_indices into the cumulative probability
    
    empirical_CDF = np.array(query_these_indices, dtype=np.float) / float(nz) 
    
    if pflag == True: print ('   empirical_CDF = ',empirical_CDF)
    
    # ---- extract the quantiles at these cumulative probabilities
    
    if pflag == True: print ('   empirical_precipvals = ', empirical_precipvals)
    
    # ---- See Wilks Statistical Meteorology Text, section on Gamma Distribution.   Use the
    #      Thom (1958) method of maximum-likelihood estimator, and from this estimate
    #      the Gamma distribution Parameters.   Ref: Statistical Methods in the Atmospheric
    #      Sciences (3rd Ed), 2011, Daniel S. Wilks (Academic Press)
    
    pmean = np.mean(precip_ens_nonzero)
    lnxbar = np.log(pmean)
    meanlnxi = np.mean(np.log(precip_ens_nonzero))
    D = lnxbar - meanlnxi
    alpha_hat = (1.0 + np.sqrt(1.0+4.0*D/3.0)) / (4.0*D)
    beta_hat = pmean / alpha_hat
    
    # ---- Now we evaluate how good the fitted distribution matches the empirical one.
    #      Because errors in fit are largely irrelevant for the quantile mapping at very small
    #      values, we will evaluate the goodness of fit only at the minimum of: (a) 
    #      where there is precip > 1.0 mm, or (b) the 90th percentile of the empirical 
    #      distribution, whichever is smaller.   
    
    if pflag == True: print ('   D = ',D,' alpha_hat = ', alpha_hat,' beta_hat = ', beta_hat)
    y0 = empirical_precipvals / beta_hat
    fitted_CDF = stats.gamma.cdf(y0, alpha_hat)
    if pflag == True: print ('   fitted_CDF = ', fitted_CDF)
    if pflag == True: print ('   CDF differences: ', np.abs(fitted_CDF - empirical_CDF))
    
    # --- only bother evaluating Dn statistic either for quantiles associated with precip
    #     > 1.0 mm, or above the 35/40th quantile, whichever is smaller.
    
    i1 = np.min([17,np.argmin(np.abs(empirical_precipvals-1.0))])
    Dnstat = np.max(np.abs(fitted_CDF[i1:] - empirical_CDF[i1:]))
    print ('   Dnstat for 1 gamma mixture = ', Dnstat)
    weight_save_1m = 1.0
    alpha_save_1m = alpha_hat
    beta_save_1m = beta_hat
    
    return query_these_indices, empirical_CDF, empirical_precipvals, \
        pmean, alpha_hat, beta_hat, i1, Dnstat, weight_save_1m, \
        alpha_save_1m, beta_save_1m
        
# =====================================================================

def two_parameter_gamma(jy, ix, precip_ens_nonzero, empirical_precipvals, \
    empirical_CDF, weights, alpha, beta, i1, pflag, weight_save_2m, \
    alpha_save_2m, beta_save_2m, nmixture, fraction_zero, nstride, \
    Dnstat2, excessive_threshold):
    
    """
    
    Call an R gamma mixture routine to estimate the weights and parameters of
    a mixture of two Gamma distributions.  Evaluate how well it fits the empirical
    data and return weights, parameters, and a fitting statistic.
    
    inputs:
    jy, ix: grid indices
    precip_ens_nonzero: vector of nonzero precip values
    empirical_precipvals : 0.05, 0.1, ... 0.95 empirical quantiles
    empirical_CDF: roughly 0.05, 0.1, ... 0.95 but subject to number of samples
    weights: input and output array of Gamma mixture weights
    alpha: input and output array of Gamma fitted alphas
    beta: input and output array of Gamma fitted betas
    i1: index of minimum quantile to evaluate when determining goodness of fit.
    pflag: true if printing desired
    weight_save_2m: 2d-vector of saved weights from last call
    alpha_save_2m: 2d-vector of saved alpha estimates from last call
    beta_save_2m: 2d-vector of saved beta estimates from last call
    nmixture: indicates how many parameters are used in the mix
    fraction_zero: fraction of points with zero precip
    nstride: how many grid points to step in each direction
    Dnstat2: previous grid point's goodness of fit statistic
    excessive_threshold: threshold value for Dnstat that triggers action
    
    """

    two_parameter_fail = False # in case routine bombs off
    try:
        now = datetime.now()
        current_time = now.strftime("%H:%M:%S")
        
        # --- call the R-inspired routine that estimates the weights and a mixture 
        #     of two Gamma distributions. 
        
        if pflag == True: print ('calling gammamix_em 2 parameter')
        if pflag == True: print ('precip_ens_nonzero[0], [-1] = ', \
            precip_ens_nonzero[0], precip_ens_nonzero[-1] )
        if pflag == True: print ('weight_save_2m = ', weight_save_2m)
        if pflag == True: print ('alpha_save_2m = ', alpha_save_2m)
        if pflag == True: print ('beta_save_2m = ', beta_save_2m)
            
        if ix == 0: 
            best_result = gammamix_em(precip_ens_nonzero, \
                mix_prop=None, alpha=None, invbeta=None,\
                k=2, epsilon=0.003, maxit=60, maxrestarts=20, verb=False)
        #elif nmixture[jy,ix-nstride] == 2 and Dnstat2 < excessive_threshold*1.5:
        elif Dnstat2 < excessive_threshold*1.5:
            best_result = gammamix_em(precip_ens_nonzero, \
                mix_prop=weight_save_2m, alpha=alpha_save_2m, invbeta=beta_save_2m,\
                k=2, epsilon=0.003, maxit=60, maxrestarts=20, verb=False)
        else: # better to restart with new values
            best_result = gammamix_em(precip_ens_nonzero, \
                mix_prop=None, alpha=None, invbeta=None,\
                k=2, epsilon=0.003, maxit=60, maxrestarts=20, verb=False)
        
        params = best_result.params
        alpha[0:2,jy,ix] = np.array(params.alpha[:])
        beta[0:2,jy,ix] = np.array(params.invbeta[:])
        weights[0:2,jy,ix] = np.array(params.mix_prop[:])
        weights[2,jy,ix] = 0.0
        alpha[2,jy,ix] = 1.0
        beta[2,jy,ix] = 1.0
        nmixture[jy,ix] = 2
        weight_save_2m = np.array(params.mix_prop[:])
        alpha_save_2m = np.array(params.alpha[:])
        beta_save_2m = np.array(params.invbeta[:])
        
        if pflag == True: print ('   weights = ', weights[:,jy,ix])
        if pflag == True: print ('   alpha = ', alpha[:,jy,ix])
        if pflag == True: print ('   beta = ', beta[:,jy,ix])
                
        #  ---- estimate a CDF from a weighted mixture of the two distributions.  
        
        y0 = empirical_precipvals / beta[0,jy,ix]
        y1 = empirical_precipvals / beta[1,jy,ix]
        fitted_CDF0 = stats.gamma.cdf(y0, alpha[0,jy,ix])
        fitted_CDF1 = stats.gamma.cdf(y1, alpha[1,jy,ix])
        fitted_CDF = weights[0,jy,ix]*fitted_CDF0 + weights[1,jy,ix]*fitted_CDF1
        if pflag == True: print ('   fitted_CDF = ',fitted_CDF)
        
        # ---- calculate a statistic for how far off the fitted CDF is from the 
        #      empirical CDF.   Do this only for either the higher quantiles of the
        #      distribution or where the quantile exceeds 1 mm (the i1 index)
        
        Dnstat2 = np.max(np.abs(fitted_CDF[i1:] - empirical_CDF[i1:]))
        if pflag == True: print ('   np.abs(fitted_CDF - empirical_CDF) = ', \
            np.abs(fitted_CDF - empirical_CDF))
        print ('   Dnstat for 2 gamma mixture = ', Dnstat2)
    except: # something went wrong ...
        print ('   two_parameter_fail')
        two_parameter_fail = True
        Dnstat2 = 0.10

    return two_parameter_fail, weights, alpha, beta, Dnstat2, weight_save_2m, \
        alpha_save_2m, beta_save_2m, nmixture

    
# =====================================================================

def three_parameter_gamma(jy, ix, precip_ens_nonzero, empirical_precipvals, \
    empirical_CDF, weights, alpha, beta, i1, pflag, weight_save_3m, \
    alpha_save_3m, beta_save_3m, nmixture, fraction_zero, nstride, mop_up,\
    Dnstat3, excessive_threshold):    
    
    """
    
    Call an R-inspired gamma mixture routine to estimate the weights and parameters of
    a mixture of three Gamma distributions.  Evaluate how well it fits the empirical
    data and return weights, parameters, and a fitting statistic.
    
    inputs:
    jy, ix: grid indices
    precip_ens_nonzero: vector of nonzero precip values
    empirical_precipvals : 0.05, 0.1, ... 0.95 empirical quantiles
    empirical_CDF: roughly 0.05, 0.1, ... 0.95 but subject to number of samples
    weights: input and output array of Gamma mixture weights
    alpha: input and output array of Gamma fitted alphas [3, ny, nx]
    beta: input and output array of Gamma fitted betas
    i1: index of minimum quantile to evaluate when determining goodness of fit.
    pflag: true if printing desired
    weight_save_3m: first guess for weights from previous call
    alpha_save_3m: first guess for alpha
    beta_save_3m: first guess for beta
    nmixture: indicates how many parameters are used in the mix
    fraction_zero: fraction of points with zero precip
    nstride: how many grid points to step in each direction
    mop_up: flag for whether we're having a 2nd go at things to address points
        where first time through, no good fit.
    Dnstat3: previous grid point's goodness of fit statistic
    excessive_threshold: threshold value for Dnstat that triggers action
    
    """

    three_parameter_fail = False # in case routine bombs off
    try:    
    
        if pflag == True: print ('   Need to try 3 gamma mixture.')
        
        # --- call the R routine that estimates the weights and a mixture 
        #     of 3 Gamma distributions. Must convert back from R data format.
        
        if pflag == True: print ('   calling gammamix_em 3 parameter')
        if pflag == True: print ('   precip_ens_nonzero[0], [-1] = ', \
            precip_ens_nonzero[0], precip_ens_nonzero[-1] )
        if pflag == True: print ('   weight_save_3m = ', weight_save_3m)
        if pflag == True: print ('   alpha_save_3m = ', alpha_save_3m)
        if pflag == True: print ('   beta_save_3m = ', beta_save_3m)    
            
        if ix == 0 or mop_up == True:
            print ('   resetting 3-parameter Gamma mixture')
            best_result = gammamix_em(precip_ens_nonzero, \
                mix_prop=None, alpha=None, invbeta=None,\
                k=3, epsilon=0.001, maxit=100, maxrestarts=20, verb=False)
        #elif nmixture[jy,ix-nstride] == 3 and Dnstat3 < excessive_threshold*1.5:
        elif Dnstat3 < excessive_threshold*1.5:
            best_result = gammamix_em(precip_ens_nonzero, \
                mix_prop=weight_save_3m, alpha=alpha_save_3m, invbeta=beta_save_3m,\
                k=3, epsilon=0.003, maxit=60, maxrestarts=20, verb=False)
        else: 
            best_result = gammamix_em(precip_ens_nonzero, \
                mix_prop=None, alpha=None, invbeta=None,\
                k=3, epsilon=0.003, maxit=60, maxrestarts=20, verb=False) 
         
        params = best_result.params
        alpha[:,jy,ix] = np.array(params.alpha[:])
        beta[:,jy,ix] = np.array(params.invbeta[:])
        weights[:,jy,ix] = np.array(params.mix_prop[:])
                
        weight_save_3m = np.array(params.mix_prop[:])
        alpha_save_3m = np.array(params.alpha[:])
        beta_save_3m = np.array(params.invbeta[:])

        if pflag == True: print ('   weights = ', weights[:,jy,ix])
        if pflag == True: print ('   alpha = ', alpha[:,jy,ix])
        if pflag == True: print ('   beta = ', beta[:,jy,ix])
        
        #  ---- estimate a CDF from a weighted mixture of the 3 distributions.  
        
        y0 = empirical_precipvals / beta[0,jy,ix]
        y1 = empirical_precipvals / beta[1,jy,ix]
        y2 = empirical_precipvals / beta[2,jy,ix]
        fitted_CDF0 = stats.gamma.cdf(y0, alpha[0,jy,ix])
        fitted_CDF1 = stats.gamma.cdf(y1, alpha[1,jy,ix])
        fitted_CDF2 = stats.gamma.cdf(y2, alpha[2,jy,ix])
        fitted_CDF = weights[0,jy,ix]*fitted_CDF0 + weights[1,jy,ix]*fitted_CDF1 + \
            weights[2,jy,ix]*fitted_CDF2
        if pflag == True: print ('   fitted_CDF = ',fitted_CDF)
        
        # ---- calculate a statistic for how far off the fitted CDF is from the 
        #      empirical CDF.   Do this only for either the higher quantiles of the
        #      distribution or where the quantile exceeds 1 mm (the i1 index)
        
        Dnstat3 = np.max(np.abs(fitted_CDF[i1:] - empirical_CDF[i1:]))
        if pflag == True: print ('   np.abs(fitted_CDF - empirical_CDF) = ', \
            np.abs(fitted_CDF - empirical_CDF))
        print ('   Dnstat for 3 gamma mixture = ', Dnstat3)
        nmixture[jy,ix] = 3
    except:
        print ('   three_parameter_fail')
        three_parameter_fail = True
        Dnstat3 = 0.10    
    
    return three_parameter_fail, weights, alpha, beta, Dnstat3, \
        weight_save_3m, alpha_save_3m, beta_save_3m, nmixture
    
# =====================================================================

def decide_which_mixture(jy, ix, weights, alpha, beta, nmixture, pflag, \
    Dnstat1, Dnstat2, Dnstat3, alpha_save_1m, beta_save_1m, \
    weight_save_1m, alpha_save_2m, beta_save_2m, \
    weight_save_2m, alpha_save_3m, beta_save_3m, \
    weight_save_3m):

    """ based on the Dn statistics for 1, 2, 3 Gamma mixtures,
        decide which mixture to use (the one with lowest Dn)
    """

    Dnstats = np.array([Dnstat1, Dnstat2, Dnstat3])
    #if pflag == True: print ('Dnstats 1,2,3,4 = ', Dnstats)
    print ('   Dnstats 1,2,3  = ', Dnstats)
    imin = np.argmin(Dnstats)
    if imin == 0:
        if pflag == True: print ('selected 1-Gamma mixture')
        weights[0,jy,ix] = 1.0
        alpha[0,jy,ix] = alpha_save_1m
        beta[0,jy,ix] = beta_save_1m
        weights[1:,jy,ix] = 0.0
        alpha[1:,jy,ix] = 1.0
        beta[1:,jy,ix] = 1.0
        nmixture[jy,ix] = 1
    elif imin == 1:
        if pflag == True: print ('selected 2-Gamma mixture')
        weights[0:2,jy,ix] = weight_save_2m[:]
        alpha[0:2,jy,ix] = alpha_save_2m[:]
        beta[0:2,jy,ix] = beta_save_2m[:]
        weights[2,jy,ix] = 0.0
        alpha[2,jy,ix] = 1.0
        beta[2,jy,ix] = 1.0
        nmixture[jy,ix] = 2
    elif imin == 2:
        if pflag == True: print ('selected 3-Gamma mixture')
        weights[:,jy,ix] = weight_save_3m[:]
        alpha[:,jy,ix] = alpha_save_3m[:]
        beta[:,jy,ix] = beta_save_3m[:]
        nmixture[jy,ix] = 3
    return weights, alpha, beta, nmixture
    
# =====================================================================
    
# ---- inputs from command line

cmonth = sys.argv[1] # 'Jan', 'Feb', etc.
clead = sys.argv[2] # 03, 06, 12, etc.
cjy = sys.argv[3]
cix = sys.argv[4]
jyin = int(cjy)
ixin = int(cix)
cdomain = 'conus'

jmin, jmax, imin, imax = set_domain_boundaries(cdomain)

# ---- set parameters

pflag = False # for print statements
master_directory = '/Volumes/Backup Plus/gefsv12/precip/netcdf/'
nmembers = 5

# ---- read in the previously generated netCDF file with precipitation
#      for this month and lead time.  All members, dates for this 
#      month have been smushed into one leading index, dimension
#      nsamps, since the date of the forecast within the month and 
#      the member number is irrelevant for the distribution fitting.
   
ncfile = master_directory + cmonth + '_apcp_h' + clead + '.nc'
if pflag == True: print (ncfile)
nc = Dataset(ncfile)
precip_ens = nc.variables['apcp_fcst'][:,jmin:jmax,imin:imax]
nsamps, nyin, nxin = np.shape(precip_ens)
lons_1d = nc.variables['lons_fcst'][imin:imax]
lats_1d = nc.variables['lats_fcst'][jmin:jmax]
nc.close()

# ---- more initialization of output storage arrays now that 
#      we know the array dimensions

weights = np.zeros((3,nyin, nxin), dtype=np.float)
alpha = np.zeros((3,nyin,nxin), dtype=np.float)
beta = np.zeros((3,nyin,nxin), dtype=np.float)
fzero = np.zeros((nyin, nxin), dtype=np.float)
nmixture = np.zeros((nyin, nxin), dtype=np.int)
Dnstat1a = 0.10*np.ones((nyin, nxin), dtype=np.float)
Dnstat2a = 0.10*np.ones((nyin, nxin), dtype=np.float)
Dnstat3a = 0.10*np.ones((nyin, nxin), dtype=np.float)

weight_save_1m = 1.0
alpha_save_1m = 1.0
beta_save_1m = 1.0
   
# ---- loop over the grid points and estimate the Gamma distributions
#      for each parameter.  First see if a single Gamma distribution
#      is appropriate; if not, try a mixture of two.   If that still
#      doesn't fit well, try a mixture of three.   
        
tktr = 0
weight_save_2m = 0.5*np.ones((2), dtype=np.float)
alpha_save_2m = np.random.uniform(low=0.5, high=1.5, size=2)
beta_save_2m =  np.random.uniform(low=0.5, high=1.5, size=2)
weight_save_3m = 0.33333*np.ones((3), dtype=np.float) # bullshirt values
alpha_save_3m = np.random.uniform(low=0.5, high=1.5, size=3) 
beta_save_3m = np.random.uniform(low=0.5, high=1.5, size=3) 
weight_save_3mop = 0.33333*np.ones((3), dtype=np.float) # bullshirt values
alpha_save_3mop = np.random.uniform(low=0.5, high=1.5, size=3) 
beta_save_3mop = np.random.uniform(low=0.5, high=1.5, size=3) 
nstride = 1
Dnstat1 = 0.10
Dnstat2 = 0.10
Dnstat3 = 0.10

now = datetime.now()
begin_time = now.strftime("%H:%M:%S")


for jy in range(jyin, jyin+1):
        
    for ix in range (0,ixin+1):
        
        mop_up = False
        now = datetime.now()
        current_time = now.strftime("%H:%M:%S")
        tktr = tktr+1  # number of grid points processed
        print ('***** begin time, current time, jy, ix, nyin, nxin, lon, lat = ',\
            begin_time, current_time, jy, ix, nyin, nxin, lons_1d[ix], lats_1d[jy])
            
        # ---- there is a grib round-off error that can give negative
        #      values slightly smaller than teeny precip.  to make sure 
        #      that we don't have either negative values or lots of the 
        #      same tiny values, subtractoff teeny_precip
        
        precip_ens_1d = precip_ens[:,jy,ix]
        tp = np.min( [np.abs(np.min(precip_ens_1d)), 0.0] )
        teeny_precip = tp*np.ones(nsamps)
        if pflag == True: print ('   tp = ', tp)
        precip_ens_1d = precip_ens_1d - teeny_precip[:]

        fraction_zero, precip_ens_nonzero, nz = \
            fraczero_possamps(nsamps, precip_ens_1d) # return sorted
        if pflag == True: print ('   precip_ens_nonzero[-400:-1] = ',\
            precip_ens_nonzero[-400:-1])
        fzero[jy,ix] = fraction_zero 
        if nz > 0:
            pmean = np.mean(precip_ens_nonzero)
        else:
            pmean = 0.0
        if pflag == True: print ('   After fraczero_possamps, jy,ix = ', \
            jy,ix, nxin*nyin, current_time)
        if pflag == True: print ('   number of samples with positive precip = ', nz)
        if pflag == True: print ('   precip_ens_nonzero[-10:] = ', \
            precip_ens_nonzero[-20:])
        if nz > 40 and precip_ens_nonzero[-1] > 2.0:
        
            # --- first fit a single Gamma distribution per Wilks; determine 
            #     the quantiles associated with every 1/20th percentile 
            #     (0.05 to 0.95) with a few extras, if there are enough samples.
            #     With smaller number of samples, do the sampling at a fewer
            #     number of quantiles
        
            query_these_indices, empirical_CDF, empirical_precipvals, \
                pmean, alpha_hat, beta_hat, i1, Dnstat1, \
                weight_save_1m, alpha_save_1m, beta_save_1m = \
                one_parameter_gamma(nz, precip_ens_nonzero, pflag, fraction_zero)
            Dnstat1a[jy,ix] = Dnstat1
            
            # ---- decide if Dn excessive.   If so, then try a 2-component 
            #      Gamma mixture model.  excessive_threshold inspired 
            #      by Wilks textbook, Table 5.2, assume for simplicity alpha=1
            #      since there is only a weak dependence on alpha.
            
            excessive_threshold = 0.025
            if pflag == True: print ('excessive_threshold = ', excessive_threshold)  
            if Dnstat1 > excessive_threshold:
                
                two_parameter_fail, weights, alpha, beta, Dnstat2, \
                    weight_save_2m, alpha_save_2m, beta_save_2m, nmixture = \
                    two_parameter_gamma(jy, ix, precip_ens_nonzero, \
                    empirical_precipvals, empirical_CDF, weights, \
                    alpha, beta, i1, pflag, weight_save_2m, alpha_save_2m, \
                    beta_save_2m, nmixture, fraction_zero, nstride, \
                    Dnstat2, excessive_threshold)
                Dnstat2a[jy,ix] = Dnstat2
                if pflag == True:
                    print ('   two_parameter_fail, Dnstat2 = ', two_parameter_fail, Dnstat2)
                    
                if two_parameter_fail == True or Dnstat2 > excessive_threshold:
                
                    # ---- try 3-parameter Gamma
                
                    three_parameter_fail, weights, alpha, beta, Dnstat3,\
                        weight_save_3m, alpha_save_3m, beta_save_3m, nmixture = \
                        three_parameter_gamma(jy, ix, precip_ens_nonzero, \
                        empirical_precipvals, empirical_CDF, weights, \
                        alpha, beta, i1, pflag, weight_save_3m, alpha_save_3m, \
                        beta_save_3m, nmixture, fraction_zero, nstride, mop_up, \
                        Dnstat3, excessive_threshold)
                    Dnstat3a[jy,ix] = Dnstat3
                    
                    if three_parameter_fail == True or Dnstat3 > excessive_threshold: 
                        
                        weights, alpha, beta, nmixture = decide_which_mixture(\
                            jy, ix, weights, alpha, beta, nmixture, pflag, \
                            Dnstat1, Dnstat2, Dnstat3, alpha_save_1m, beta_save_1m, \
                            weight_save_1m, alpha_save_2m, beta_save_2m, \
                            weight_save_2m, alpha_save_3m, beta_save_3m, \
                            weight_save_3m)

            else: # ok to use simple single Gamma
            
                weights[0,jy,ix] = 1.0
                alpha[0,jy,ix] = alpha_hat
                beta[0,jy,ix] = beta_hat
                weights[1:,jy,ix] = 0.0
                alpha[1:,jy,ix] = 1.0
                beta[1:,jy,ix] = 1.0
                nmixture[jy,ix] = 1
            
        else: 
            
            # --- very few positive samples, or light precip; fit an simple maximum
            #      likelihood distribution to single mode Gamma distribution
            
            if pflag == True: print ('   very light precipitation at this grid point. ', pmean)
            if nz < 2:  # not long enough sample to find some nonzero precip amounts
                alpha_hat = 1.0
                beta_hat = 100.0
            else:  # use maximum likelihood method of Wilks text to fit non-mixture distribution.
                pmean = np.mean(precip_ens_nonzero)
                lnxbar = np.log(pmean)
                meanlnxi = np.mean(np.sum(np.log(precip_ens_nonzero)))
                D = lnxbar - meanlnxi
                alpha_hat = (1.0 + np.sqrt(1.0+4.0*D/3.0)) / (4.0*D)
                beta_hat = pmean / alpha_hat
                if pflag == True: print('   single distribution alpha, beta = ', alpha_hat, beta_hat)
            weights[0,jy,ix] = 1.0
            alpha[0,jy,ix] = alpha_hat
            beta[0,jy,ix] = beta_hat
            weights[1:,jy,ix] = 0.0
            alpha[1:,jy,ix] = 1.0
            beta[1:,jy,ix] = 1.0
            nmixture[jy,ix] = 1

        # ----- sometimes, by using the previous grid point's estimate as a prior mixture, it can
        #       go off into non-sensical parameter space.   Check for this, and if so, initiate 
        #       a final 3-parameter mixture without using prior grid point's saved values.

        if Dnstat1a[jy,ix] > excessive_threshold and \
        Dnstat2a[jy,ix] > excessive_threshold and \
        Dnstat3a[jy,ix] > excessive_threshold:
        
            mop_up = True
            print ('   mopping up! ')
            three_parameter_fail, weights, alpha, beta, Dnstat3mop,\
                weight_save_3mop, alpha_save_3mop, beta_save_3mop, nmixture = \
                three_parameter_gamma(jy, ix, precip_ens_nonzero, \
                empirical_precipvals, empirical_CDF, weights, \
                alpha, beta, i1, pflag, weight_save_3mop, alpha_save_3mop, \
                beta_save_3mop, nmixture, fraction_zero, nstride, mop_up, \
                Dnstat3, excessive_threshold)

            if Dnstat3mop < Dnstat3a[jy,ix]:
                Dnstat3 = Dnstat3mop
                weight_save_3m = weight_save_3mop
                alpha_save_3m = alpha_save_3mop
                beta_save_3m = beta_save_3mop
                                
            weights, alpha, beta, nmixture = decide_which_mixture(\
                jy, ix, weights, alpha, beta, nmixture, pflag, \
                Dnstat1, Dnstat2, Dnstat3, alpha_save_1m, beta_save_1m, \
                weight_save_1m, alpha_save_2m, beta_save_2m, \
                weight_save_2m, alpha_save_3m, beta_save_3m, \
                weight_save_3m)
            
# --- save to cPickle file

outfile = cmonth+'_'+cdomain+'_jy'+cjy+'_ix'+cix+\
    '_singlepoint_apcp_gamma_parameters_h' + clead + '.cPick'

print (jy,ix)
print ('writing to ', outfile)
ouf = open(outfile, 'wb')
cPickle.dump(fraction_zero, ouf)
cPickle.dump(weight_save_1m, ouf)
cPickle.dump(alpha_save_1m, ouf)
cPickle.dump(beta_save_1m, ouf)
cPickle.dump(weight_save_2m, ouf)
cPickle.dump(alpha_save_2m, ouf)
cPickle.dump(beta_save_2m, ouf)
cPickle.dump(weight_save_3m, ouf)
cPickle.dump(alpha_save_3m, ouf)
cPickle.dump(beta_save_3m, ouf)
cPickle.dump(Dnstat1, ouf)
cPickle.dump(Dnstat2, ouf)
cPickle.dump(Dnstat3, ouf)
cPickle.dump(precip_ens_nonzero, ouf)
cPickle.dump(lons_1d[ix], ouf)
cPickle.dump(lats_1d[jy], ouf)
ouf.close()



                                                                                   ._CDF_fitting_forecast_precip_v2.py                                                                 000775  000765  000024  00000000416 14016263124 017533  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2                                              ATTR                                   com.apple.lastuseddate#PS           com.macromates.selectionRange      
     com.macromates.visibleIndex  `    v     240:639814                                                                                                                                                                                                                                                  CDF_fitting_forecast_precip_v2.py                                                                   000775  000765  000024  00000047301 14016263124 017322  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         """
CDF_fitting_forecast_precip_v2.py cmonth clead 

"""

import os, sys
from datetime import datetime
import numpy as np
import _pickle as cPickle
from netCDF4 import Dataset
import rpy2.robjects as robjects
from rpy2.robjects.packages import importr
import _pickle as cPickle
import scipy.stats as stats

base = importr('base')
mixtools = importr('mixtools')

# =====================================================================

def find_nearest(vec, value):
    idx = np.abs(vec-value).argmin()
    return idx

# =====================================================================

def fraczero_possamps(nsamps, precip_ens):
    """
    from the vector input sample precip_ens, define the fraction of
    samples with zero precipitation.   For the positive samples, add
    a small random number to deal with the fact that the data was 
    discretized to 0.1 mm, so that when later creating CDFs we don't 
    have values with lots of tied amounts.   Sort the nonzero amounts 
    and return.
    """
    number_zeros = 0
    precip_ens_nonzero = np.delete(precip_ens, \
        np.where(precip_ens <= 0.0))  # censor at 0.1 mm
    nz = len(precip_ens_nonzero)
    # data discretized, so add random component of this magnitude
    precip_ens_nonzero = precip_ens_nonzero + \
        np.random.uniform(low=-0.005,high=0.005,size=nz) 
    precip_ens_nonzero = np.sort(precip_ens_nonzero)  
    #print (precip_ens_nonzero[0:10]) 
    ntotal = len(precip_ens)
    nzero = ntotal - len(precip_ens_nonzero)
    fraction_zero = float(nzero) / float(ntotal)
    return fraction_zero, precip_ens_nonzero, nz

# =====================================================================

def one_parameter_gamma(nz, precip_ens_nonzero, pflag):

    """ 
    Along with some other ancillary stuff, return the fitted Gamma distribution
    alpha and beta values, along with an evaluation (Dnstat) of how 
    closely the fitted CDF matches the empirical CDF 
    
    inputs:  
    
    nz: number of nonzero samples
    precip_ens_nonzero[nz]:   sorted vector of nonzero precip amounts (mm)
    pflag: print flag, true if diagnostic printing desired
    
    """

    # ---- define the indices in the previously sorted precip_ens_nonzero vector
    #      that are at the 0.05, 0.10, .... , 0.95 quantiles of the sorted vector
    
    query_these_indices = [ nz//20, nz//10, (3*nz)//20, nz//5, nz//4, (3*nz)//10, \
        (7*nz)//20, (2*nz)//5, (9*nz)//20, nz//2, (11*nz)//20, (3*nz)//5, (13*nz)//20, \
        (7*nz)//10, (3*nz)//4, (4*nz)//5, (17*nz)//20, (35*nz)//40,(9*nz)//10, \
        (37*nz)//40, (19*nz)//20, (39*nz)//40, (79*nz)//80]
    if pflag == True: print ('   nz, query_these_indices = ', nz, query_these_indices)
    
    # ---- convert the query_these_indices into the cumulative probability
    
    empirical_CDF = np.array(query_these_indices, dtype=np.float) / float(nz) 
    
    if pflag == True: print ('   empirical_CDF = ',empirical_CDF)
    
    # ---- extract the quantiles at these cumulative probabilities
    
    empirical_precipvals = precip_ens_nonzero[query_these_indices] 
    
    if pflag == True: print ('   empirical_precipvals = ', empirical_precipvals)
    if pflag == True: print ('   precip_ens_nonzero[0:-1:10] = ',precip_ens_nonzero[0:-1:10] )
    
    # ---- See Wilks Statistical Meteorology Text, section on Gamma Distribution.   Use the
    #      Thom (1958) method of maximum-likelihood estimator, and from this estimate
    #      the Gamma distribution Parameters.   Ref: Statistical Methods in the Atmospheric
    #      Sciences (3rd Ed), 2011, Daniel S. Wilks (Academic Press)
    
    pmean = np.mean(precip_ens_nonzero)
    lnxbar = np.log(pmean)
    meanlnxi = np.mean(np.log(precip_ens_nonzero))
    D = lnxbar - meanlnxi
    alpha_hat = (1.0 + np.sqrt(1.0+4.0*D/3.0)) / (4.0*D)
    beta_hat = pmean / alpha_hat
    
    # ---- Now we evaluate how good the fitted distribution matches the empirical one.
    #      Because errors in fit are largely irrelevant for the quantile mapping at very small
    #      values, we will evaluate the goodness of fit only at the minimum of: (a) 
    #      where there is precip > 1.0 mm, or (b) the 90th percentile of the empirical 
    #      distribution, whichever is smaller.   
    
    if pflag == True: print ('   D = ',D,' alpha_hat = ', alpha_hat,' beta_hat = ', beta_hat)
    y0 = empirical_precipvals / beta_hat
    fitted_CDF = stats.gamma.cdf(y0, alpha_hat)
    if pflag == True: print ('   fitted_CDF = ', fitted_CDF)
    if pflag == True: print ('   CDF differences: ', np.abs(fitted_CDF - empirical_CDF))
    i1 = np.min([17,np.argmin(np.abs(empirical_precipvals-1.0))]) 
    Dnstat = np.max(np.abs(fitted_CDF[i1:] - empirical_CDF[i1:]))
    if pflag == True: print ('   Dnstat for 1 gamma mixture = ', Dnstat)
    
    return query_these_indices, empirical_CDF, empirical_precipvals, \
        pmean, alpha_hat, beta_hat, i1, Dnstat
        
# =====================================================================

def two_parameter_gamma(jy, ix, precip_ens_nonzero, empirical_precipvals, \
    empirical_CDF, weights, alpha, beta, i1, pflag):
    
    """
    
    Call an R gamma mixture routine to estimate the weights and parameters of
    a mixture of two Gamma distributions.  Evaluate how well it fits the empirical
    data and return weights, parameters, and a fitting statistic.
    
    inputs:
    jy, ix: grid indices
    precip_ens_nonzero: vector of nonzero precip values
    empirical_precipvals : 0.05, 0.1, ... 0.95 empirical quantiles
    empirical_CDF: roughly 0.05, 0.1, ... 0.95 but subject to number of samples
    weights: input and output array of Gamma mixture weights
    alpha: input and output array of Gamma fitted alphas
    beta: input and output array of Gamma fitted betas
    i1: index of minimum quantile to evaluate when determining goodness of fit.
    pflag: true if printing desired
    
    """

    two_parameter_fail = False # in case routine bombs off
    try:
        now = datetime.now()
        current_time = now.strftime("%H:%M:%S")
        
        # --- call the R routine that estimates the weights and a mixture 
        #     of two Gamma distributions. Must convert back from R data format..
        
        precip_nonzero_R = robjects.FloatVector(precip_ens_nonzero)
        result_R = mixtools.gammamixEM(precip_nonzero_R, k=2, maxit=1000 ) #, \
        now = datetime.now()
        current_time = now.strftime("%H:%M:%S")
        if pflag == True: print ('   2 Gammas. After mixtools.gammamixEM, jy,ix = ', \
            jy,ix, nxin*nyin, current_time)
        result_np = np.asarray(result_R, dtype=object)
        result_weights_np = np.asarray(result_R[1])
        result_alpha_beta_np = np.asarray(result_R[2])
        weights[0:2,jy,ix] = result_weights_np[:]
        alpha[0:2,jy,ix] = result_alpha_beta_np[0,:]
        beta[0:2,jy,ix] = result_alpha_beta_np[1,:]
        weights[2,jy,ix] = 0.0
        alpha[2,jy,ix] = 1.0
        beta[2,jy,ix] = 1.0
        if pflag == True: print ('   weights = ', weights[:,jy,ix])
        if pflag == True: print ('   alpha = ', alpha[:,jy,ix])
        if pflag == True: print ('   beta = ', beta[:,jy,ix])
                
        #  ---- estimate a CDF from a weighted mixture of the two distributions.  
        
        y0 = empirical_precipvals / beta[0,jy,ix]
        y1 = empirical_precipvals / beta[1,jy,ix]
        fitted_CDF0 = stats.gamma.cdf(y0, alpha[0,jy,ix])
        fitted_CDF1 = stats.gamma.cdf(y1, alpha[1,jy,ix])
        fitted_CDF = weights[0,jy,ix]*fitted_CDF0 + weights[1,jy,ix]*fitted_CDF1
        if pflag == True: print ('   fitted_CDF = ',fitted_CDF)
        
        # ---- calculate a statistic for how far off the fitted CDF is from the 
        #      empirical CDF.   Do this only for either the higher quantiles of the
        #      distribution or where the quantile exceeds 1 mm (the i1 index)
        
        Dnstat = np.max(np.abs(fitted_CDF[i1:] - empirical_CDF[i1:]))
        if pflag == True: print ('   np.abs(fitted_CDF - empirical_CDF) = ', \
            np.abs(fitted_CDF - empirical_CDF))
        if pflag == True: print ('   Dnstat for 2 gamma mixture = ', Dnstat)
        if pflag == True: print ('   weights = ', result_weights_np[:])
        if pflag == True: print ('   result_alpha_beta_np[0,:] = ', \
            result_alpha_beta_np[0,:])
        if pflag == True: print ('   result_alpha_beta_np[1,:] = ', \
            result_alpha_beta_np[1,:])
    except:
        two_parameter_fail = True
        Dnstat = 0.10

    return two_parameter_fail, weights, alpha, beta, Dnstat

    
# =====================================================================

def three_parameter_gamma(jy, ix, precip_ens_nonzero, empirical_precipvals, \
    empirical_CDF, weights, alpha, beta, i1, pflag, weight_save, alpha_save, \
    beta_save):    
    
    """
    
    Call an R gamma mixture routine to estimate the weights and parameters of
    a mixture of three Gamma distributions.  Evaluate how well it fits the empirical
    data and return weights, parameters, and a fitting statistic.
    
    inputs:
    jy, ix: grid indices
    precip_ens_nonzero: vector of nonzero precip values
    empirical_precipvals : 0.05, 0.1, ... 0.95 empirical quantiles
    empirical_CDF: roughly 0.05, 0.1, ... 0.95 but subject to number of samples
    weights: input and output array of Gamma mixture weights
    alpha: input and output array of Gamma fitted alphas [3, ny, nx]
    beta: input and output array of Gamma fitted betas
    i1: index of minimum quantile to evaluate when determining goodness of fit.
    pflag: true if printing desired
    weight_save: first guess for weights
    alpha_save: first guess for alpha
    beta_save: first guess for beta
    
    """

    three_parameter_fail = False # in case routine bombs off
    try:    
    
        if pflag == True: print ('   Need to try 3 gamma mixture.')
        
        # --- call the R routine that estimates the weights and a mixture 
        #     of 3 Gamma distributions. Must convert back from R data format.
        
        precip_nonzeroR = robjects.FloatVector(precip_ens_nonzero)
        weightsR = robjects.FloatVector(weight_save)
        alphaR = robjects.FloatVector(alpha_save)
        betaR = robjects.FloatVector(beta_save)
        #result_R = mixtools.gammamixEM(precip_nonzero_R, k=3, maxit=1000) #, 
        result_R = mixtools.gammamixEM(precip_nonzeroR,lambda==weightsR,alpha=alphaR,beta=betaR,k=3, maxit=1000) 
        
        now = datetime.now()
        current_time = now.strftime("%H:%M:%S")
        result_np = np.asarray(result_R, dtype=object)
        result_weights_np = np.asarray(result_R[1])
        result_alpha_beta_np = np.asarray(result_R[2])
        weights[:,jy,ix] = result_weights_np[:]
        alpha[:,jy,ix] = result_alpha_beta_np[0,:]
        beta[:,jy,ix] = result_alpha_beta_np[1,:]
        if pflag == True: print ('   weights = ', weights[:,jy,ix])
        if pflag == True: print ('   alpha = ', alpha[:,jy,ix])
        if pflag == True: print ('   beta = ', beta[:,jy,ix])
        
        #  ---- estimate a CDF from a weighted mixture of the 3 distributions.  
        
        y0 = empirical_precipvals / beta[0,jy,ix]
        y1 = empirical_precipvals / beta[1,jy,ix]
        y2 = empirical_precipvals / beta[2,jy,ix]
        fitted_CDF0 = stats.gamma.cdf(y0, alpha[0,jy,ix])
        fitted_CDF1 = stats.gamma.cdf(y1, alpha[1,jy,ix])
        fitted_CDF2 = stats.gamma.cdf(y2, alpha[2,jy,ix])
        fitted_CDF = weights[0,jy,ix]*fitted_CDF0 + weights[1,jy,ix]*fitted_CDF1 + \
            weights[2,jy,ix]*fitted_CDF2
        if pflag == True: print ('   fitted_CDF = ',fitted_CDF)
        
        # ---- calculate a statistic for how far off the fitted CDF is from the 
        #      empirical CDF.   Do this only for either the higher quantiles of the
        #      distribution or where the quantile exceeds 1 mm (the i1 index)
        
        Dnstat = np.max(np.abs(fitted_CDF[i1:] - empirical_CDF[i1:]))
        if pflag == True: print ('   np.abs(fitted_CDF - empirical_CDF) = ', \
            np.abs(fitted_CDF - empirical_CDF))
        if pflag == True: print ('   Dnstat for 3 gamma mixture = ', Dnstat)
    except:
        three_parameter_fail = True
        Dnstat = 0.10    
    
    return three_parameter_fail, weights, alpha, beta, Dnstat
    
# =====================================================================
# =====================================================================
# =====================================================================

    
# ---- inputs from command line

cmonth = sys.argv[1] # 'Jan', 'Feb', etc.
clead = sys.argv[2] # 03, 06, 12, etc.

# ---- set parameters

pflag = True # for print statements
master_directory = '/Volumes/Backup Plus/gefsv12/precip/netcdf/'
nmembers = 5

# ---- read in the previously generated netCDF file with precipitation
#      for this month and lead time.  All members, dates for this 
#      month have been smushed into one leading index, dimension
#      nsamps, since the date of the forecast within the month and 
#      the member number is irrelevant for the distribution fitting.

now = datetime.now()
current_time = now.strftime("%H:%M:%S")
if pflag == True: print ('before read ', current_time)    
ncfile = master_directory + cmonth + '_apcp' '_h' + clead + '.nc'
if pflag == True: print (ncfile)
nc = Dataset(ncfile)
precip_ens = nc.variables['apcp_fcst'][:,:,:]
nsamps, nyin, nxin = np.shape(precip_ens)
lons_1d = nc.variables['lons_fcst'][:]
lats_1d = nc.variables['lats_fcst'][:]
nc.close()
now = datetime.now()
current_time = now.strftime("%H:%M:%S")
if pflag == True: print ('after read ', current_time) 

# ---- more initialization of output storage arrays now that 
#      we know the array dimensions

weights = np.zeros((3,nyin, nxin), dtype=np.float)
alpha = np.zeros((3,nyin,nxin), dtype=np.float)
beta = np.zeros((3,nyin,nxin), dtype=np.float)
fzero = np.zeros((nyin, nxin), dtype=np.float)
teeny_precip = 0.06*np.ones(nsamps) 
weight_save = np.zeros((3), dtype=np.float)
alpha_save = np.zeros((3), dtype=np.float)
beta_save = np.zeros((3), dtype=np.float)
   
   
# ---- loop over the grid points and estimate the Gamma distributions
#      for each parameter.  First see if a single Gamma distribution
#      is appropriate; if not, try a mixture of two.   If that still
#      doesn't fit well, try a mixture of three.   
        
tktr = 0
#for jy in range(0,nyin,5):
#    for ix in range(0,nxin,5):
        
for jy in range(200,201):
    for ix in range(600,601):

        
        now = datetime.now()
        current_time = now.strftime("%H:%M:%S")
        print ('jy, ix = ', jy, ix, nyin, nxin, current_time)
        tktr = tktr+1  # number of grid points processed
        if pflag == True: print ('*** time, jy, ix, lon, lat = ',\
            current_time, jy, ix, tktr, nyin*nxin, lons_1d[ix], lats_1d[jy])
        if pflag == True: print ('   Before fraczero_possamps, jy,ix, \
            nsamps, time = ', jy,ix, nxin*nyin, current_time)
            
        # ---- there is a grib compaction error that can give negative
        #      values slightly smaller than teeny precip.  to make sure 
        #      that we don't have either negative values or lots of the 
        #      same tiny values, subtractoff teeny_precip
        
        precip_ens_1d = precip_ens[:,jy,ix] - teeny_precip[:]
        fraction_zero, precip_ens_nonzero, nz = \
            fraczero_possamps(nsamps, precip_ens_1d) # return sorted
        fzero[jy,ix] = fraction_zero 
        if pflag == True: print ('   After fraczero_possamps, jy,ix = ', \
            jy,ix, nxin*nyin, current_time)
        if pflag == True: print ('   number of samples with positive precip = ', nz)
        if pflag == True: print ('   precip_ens_nonzero[-20:] = ', \
            precip_ens_nonzero[-20:])
        if nz > 21 and precip_ens_nonzero[-1] > 3.0:
        
            # --- first fit a single Gamma distribution per Wilks; determine 
            #     the quantiles associated with every 1/20th percentile 
            #     (0.05 to 0.95)
        
            query_these_indices, empirical_CDF, empirical_precipvals, \
                pmean, alpha_hat, beta_hat, i1, Dnstat = \
                one_parameter_gamma(nz, precip_ens_nonzero, pflag)
            
            # ---- decide if Dn excessive.   If so, then try a 2-component 
            #      Gamma mixture model.  excessive_threshold inspired 
            #      by Wilks textbook, Table 5.2, assume for simplicity alpha=1
            #      since there is only a weak dependence on alpha.
            
            excessive_threshold = 0.3 / np.sqrt(np.float(nz))   
            if Dnstat > excessive_threshold:
                
                two_parameter_fail, weights, alpha, beta, Dnstat = \
                    two_parameter_gamma(jy, ix, precip_ens_nonzero, \
                    empirical_precipvals, empirical_CDF, weights, \
                    alpha, beta, i1, pflag)
                    
                if two_parameter_fail == True or Dnstat > excessive_threshold:
                
                    # ---- try 3-parameter Gamma
                
                    three_parameter_fail, weights, alpha, beta, Dnstat = \
                        three_parameter_gamma(jy, ix, precip_ens_nonzero, \
                        empirical_precipvals, empirical_CDF, weights, \
                        alpha, beta, i1, pflag, weight_save, alpha_save, \
                        beta_save)
                    weight_save[:] = weights[:,jy,ix]
                    alpha_save[:] = alpha[:,jy,ix]
                    beta_save[:] = beta[:,jy,ix]
                    if three_parameter_fail == True: # bombed off w. 3-param.  
                        weights[0,jy,ix] = 1.0       # Revert to one parameter
                        alpha[0,jy,ix] = alpha_hat
                        beta[0,jy,ix] = beta_hat
                        weights[1:,jy,ix] = 0.0
                        alpha[1:,jy,ix] = 1.0
                        beta[1:,jy,ix] = 1.
                    
            else: # ok to use simple single Gamma
            
                weights[0,jy,ix] = 1.0
                alpha[0,jy,ix] = alpha_hat
                beta[0,jy,ix] = beta_hat
                weights[1:,jy,ix] = 0.0
                alpha[1:,jy,ix] = 1.0
                beta[1:,jy,ix] = 1.0
            
        else: 
            
            # --- very few positive samples; fit an simple maximum likelihood distribution
            #     to single mode Gamma distribution
            
            if pflag == True: print ('   very light precipitation at this grid point. ', pmean)
            if nz < 2:  # not long enough sample to find some nonzero precip amounts
                alpha_hat = 1.0
                beta_hat = 100.0
            else:  # use maximum likelihood method of Wilks text to fit non-mixture distribution.
                pmean = np.mean(precip_ens_nonzero)
                lnxbar = np.log(pmean)
                meanlnxi = np.mean(np.sum(np.log(precip_ens_nonzero)))
                if pflag == True: print('   lnxbar, meanlnxi = ',lnxbar, meanlnxi)
                D = lnxbar - meanlnxi
                alpha_hat = (1.0 + np.sqrt(1.0+4.0*D/3.0)) / (4.0*D)
                beta_hat = pmean / alpha_hat
                if pflag == True: print('   single distribution alpha, beta = ', alpha_hat, beta_hat)
                weights[0,jy,ix] = 1.0
                alpha[0,jy,ix] = alpha_hat
                beta[0,jy,ix] = beta_hat
                weights[1:,jy,ix] = 0.0
                alpha[1:,jy,ix] = 1.0
                beta[1:,jy,ix] = 1.0

# --- save to cPickle file

#outfile = master_directory + cmonth+ '_apcp_gamma_parameters_h' + clead + '.cPick'
outfile = master_directory + cmonth+ '_apcp_gamma_parameters_v2_h' + clead + '.cPick'
print ('writing to ', outfile)
ouf = open(outfile, 'wb')
cPickle.dump(weights, ouf)
cPickle.dump(alpha, ouf)
cPickle.dump(beta, ouf)
cPickle.dump(fzero, ouf)
ouf.close()



                                                                                                                                                                                                                                                                                                                               ._CDF_fitting_forecast_precip_v3.py                                                                 000775  000765  000024  00000000474 14016263124 017540  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2  
     <                                      ATTR      <     $                      com.apple.lastuseddate#PS      (   	  com.macromates.bookmarks   1     com.macromates.selectionRange      7     com.macromates.visibleIndex  `    *     ( '288' )367:4219376                                                                                                                                                                                                    CDF_fitting_forecast_precip_v3.py                                                                   000775  000765  000024  00000077234 14016263124 017333  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         """
CDF_fitting_forecast_precip_v3.py cmonth clead 

"""

import os, sys
from datetime import datetime
import numpy as np
import _pickle as cPickle
from netCDF4 import Dataset
import _pickle as cPickle
import scipy.stats as stats
from gammamix import gammamix_em

# =====================================================================

def find_nearest(vec, value):
    idx = np.abs(vec-value).argmin()
    return idx

# =====================================================================

def fraczero_possamps(nsamps, precip_ens):
    """
    from the vector input sample precip_ens, define the fraction of
    samples with zero precipitation.   For the positive samples, add
    a small random number to deal with the fact that the data was 
    discretized to 0.1 mm, so that when later creating CDFs we don't 
    have values with lots of tied amounts.   Sort the nonzero amounts 
    and return.
    """
    number_zeros = 0
    precip_ens_nonzero = np.delete(precip_ens, \
        np.where(precip_ens <= 0.006))  # censor at 0.006 mm
    nz = len(precip_ens_nonzero)
    # data discretized, so add random component of this magnitude
    precip_ens_nonzero = precip_ens_nonzero + \
        np.random.uniform(low=-0.005,high=0.005,size=nz) 
    precip_ens_nonzero = np.sort(precip_ens_nonzero)  
    #print (precip_ens_nonzero[0:10]) 
    ntotal = len(precip_ens)
    nzero = ntotal - len(precip_ens_nonzero)
    fraction_zero = float(nzero) / float(ntotal)
    return fraction_zero, precip_ens_nonzero, nz

# =====================================================================

def one_parameter_gamma(nz, precip_ens_nonzero, pflag, fraction_zero):

    """ 
    Along with some other ancillary stuff, return the fitted Gamma distribution
    alpha and beta values, along with an evaluation (Dnstat) of how 
    closely the fitted CDF matches the empirical CDF 
    
    inputs:  
    
    nz: number of nonzero samples
    precip_ens_nonzero[nz]:   sorted vector of nonzero precip amounts (mm)
    pflag: print flag, true if diagnostic printing desired
    
    """

    # ---- define the indices in the previously sorted precip_ens_nonzero vector
    #      that are at the 0.05, 0.10, .... , 0.95 quantiles of the sorted vector
    
    query_these_indices = [ nz//20, nz//10, (3*nz)//20, nz//5, nz//4, (3*nz)//10, \
        (7*nz)//20, (2*nz)//5, (9*nz)//20, nz//2, (11*nz)//20, (3*nz)//5, (13*nz)//20, \
        (7*nz)//10, (3*nz)//4, (4*nz)//5, (17*nz)//20, (35*nz)//40,(9*nz)//10, \
        (37*nz)//40, (19*nz)//20, (39*nz)//40, (79*nz)//80]
    if pflag == True: print ('   nz, query_these_indices = ', nz, query_these_indices)
    
    # ---- convert the query_these_indices into the cumulative probability
    
    empirical_CDF = np.array(query_these_indices, dtype=np.float) / float(nz) 
    
    if pflag == True: print ('   empirical_CDF = ',empirical_CDF)
    
    # ---- extract the quantiles at these cumulative probabilities
    
    empirical_precipvals = precip_ens_nonzero[query_these_indices] 
    
    if pflag == True: print ('   empirical_precipvals = ', empirical_precipvals)
    #if pflag == True: print ('   precip_ens_nonzero[0:-1:10] = ',precip_ens_nonzero[0:-1:10] )
    
    # ---- See Wilks Statistical Meteorology Text, section on Gamma Distribution.   Use the
    #      Thom (1958) method of maximum-likelihood estimator, and from this estimate
    #      the Gamma distribution Parameters.   Ref: Statistical Methods in the Atmospheric
    #      Sciences (3rd Ed), 2011, Daniel S. Wilks (Academic Press)
    
    pmean = np.mean(precip_ens_nonzero)
    lnxbar = np.log(pmean)
    meanlnxi = np.mean(np.log(precip_ens_nonzero))
    D = lnxbar - meanlnxi
    alpha_hat = (1.0 + np.sqrt(1.0+4.0*D/3.0)) / (4.0*D)
    beta_hat = pmean / alpha_hat
    
    # ---- Now we evaluate how good the fitted distribution matches the empirical one.
    #      Because errors in fit are largely irrelevant for the quantile mapping at very small
    #      values, we will evaluate the goodness of fit only at the minimum of: (a) 
    #      where there is precip > 1.0 mm, or (b) the 90th percentile of the empirical 
    #      distribution, whichever is smaller.   
    
    if pflag == True: print ('   D = ',D,' alpha_hat = ', alpha_hat,' beta_hat = ', beta_hat)
    y0 = empirical_precipvals / beta_hat
    fitted_CDF = stats.gamma.cdf(y0, alpha_hat)
    if pflag == True: print ('   fitted_CDF = ', fitted_CDF)
    if pflag == True: print ('   CDF differences: ', np.abs(fitted_CDF - empirical_CDF))
    i1 = np.min([17,np.argmin(np.abs(empirical_precipvals-1.0))]) 
    #Dnstat = (1.0 - fraction_zero)*np.max(np.abs(fitted_CDF[i1:] - empirical_CDF[i1:]))
    Dnstat = np.max(np.abs(fitted_CDF[i1:] - empirical_CDF[i1:]))
    if pflag == True: print ('   Dnstat for 1 gamma mixture = ', Dnstat)
    weight_save_1m = 1.0
    alpha_save_1m = alpha_hat
    beta_save_1m = beta_hat
    
    
    return query_these_indices, empirical_CDF, empirical_precipvals, \
        pmean, alpha_hat, beta_hat, i1, Dnstat, weight_save_1m, \
        alpha_save_1m, beta_save_1m
        
# =====================================================================

def two_parameter_gamma(jy, ix, precip_ens_nonzero, empirical_precipvals, \
    empirical_CDF, weights, alpha, beta, i1, pflag, weight_save_2m, \
    alpha_save_2m, beta_save_2m, nmixture, fraction_zero):
    
    """
    
    Call an R gamma mixture routine to estimate the weights and parameters of
    a mixture of two Gamma distributions.  Evaluate how well it fits the empirical
    data and return weights, parameters, and a fitting statistic.
    
    inputs:
    jy, ix: grid indices
    precip_ens_nonzero: vector of nonzero precip values
    empirical_precipvals : 0.05, 0.1, ... 0.95 empirical quantiles
    empirical_CDF: roughly 0.05, 0.1, ... 0.95 but subject to number of samples
    weights: input and output array of Gamma mixture weights
    alpha: input and output array of Gamma fitted alphas
    beta: input and output array of Gamma fitted betas
    i1: index of minimum quantile to evaluate when determining goodness of fit.
    pflag: true if printing desired
    weight_save_2m: 2d-vector of saved weights from last call
    alpha_save_2m: 2d-vector of saved alpha estimates from last call
    beta_save_2m: 2d-vector of saved beta estimates from last call
    nmixture: indicates how many parameters are used in the mix
    fraction_zero: fraction of points with zero precip
    """

    two_parameter_fail = True # in case routine bombs off
    try:
        now = datetime.now()
        current_time = now.strftime("%H:%M:%S")
        
        # --- call the R routine that estimates the weights and a mixture 
        #     of two Gamma distributions. Must convert back from R data format..
        
        if pflag == True: print ('calling gammamix_em 2 parameter')
        if pflag == True: print ('precip_ens_nonzero[0], [-1] = ', \
            precip_ens_nonzero[0], precip_ens_nonzero[-1] )
        if pflag == True: print ('weight_save_2m = ', weight_save_2m)
        if pflag == True: print ('alpha_save_2m = ', alpha_save_2m)
        if pflag == True: print ('beta_save_2m = ', beta_save_2m)
        #best_result = gammamix_em(precip_ens_nonzero, \
        #    mix_prop=weight_save_2m, alpha=alpha_save_2m, invbeta=beta_save_2m,
        #    k=2, epsilon=0.001, maxit=50, maxrestarts=20, verb=False)
        best_result = gammamix_em(precip_ens_nonzero, \
            mix_prop=None, alpha=None, invbeta=None,\
            k=2, epsilon=0.002, maxit=100, maxrestarts=20, verb=False)
        
        params = best_result.params
        alpha[0:2,jy,ix] = np.array(params.alpha[:])
        beta[0:2,jy,ix] = np.array(params.invbeta[:])
        weights[0:2,jy,ix] = np.array(params.mix_prop[:])
        weights[2:3,jy,ix] = 0.0
        alpha[2:3,jy,ix] = 1.0
        beta[2:3,jy,ix] = 1.0
        nmixture[jy,ix] = 2
        weight_save_2m = np.array(params.mix_prop[:])
        alpha_save_2m = np.array(params.alpha[:])
        beta_save_2m = np.array(params.invbeta[:])
        
        if pflag == True: print ('   weights = ', weights[:,jy,ix])
        if pflag == True: print ('   alpha = ', alpha[:,jy,ix])
        if pflag == True: print ('   beta = ', beta[:,jy,ix])
                
        #  ---- estimate a CDF from a weighted mixture of the two distributions.  
        
        y0 = empirical_precipvals / beta[0,jy,ix]
        y1 = empirical_precipvals / beta[1,jy,ix]
        fitted_CDF0 = stats.gamma.cdf(y0, alpha[0,jy,ix])
        fitted_CDF1 = stats.gamma.cdf(y1, alpha[1,jy,ix])
        fitted_CDF = weights[0,jy,ix]*fitted_CDF0 + weights[1,jy,ix]*fitted_CDF1
        if pflag == True: print ('   fitted_CDF = ',fitted_CDF)
        
        # ---- calculate a statistic for how far off the fitted CDF is from the 
        #      empirical CDF.   Do this only for either the higher quantiles of the
        #      distribution or where the quantile exceeds 1 mm (the i1 index)
        
        #Dnstat2 = (1.0-fraction_zeros)*np.max(np.abs(fitted_CDF[i1:] - empirical_CDF[i1:]))
        Dnstat2 = np.max(np.abs(fitted_CDF[i1:] - empirical_CDF[i1:]))
        if pflag == True: print ('   np.abs(fitted_CDF - empirical_CDF) = ', \
            np.abs(fitted_CDF - empirical_CDF))
        if pflag == True: print ('   Dnstat for 2 gamma mixture = ', Dnstat2)
    except:
        print ('two_parameter_fail')
        two_parameter_fail = True
        Dnstat2 = 0.10

    return two_parameter_fail, weights, alpha, beta, Dnstat2, weight_save_2m, \
        alpha_save_2m, beta_save_2m, nmixture

    
# =====================================================================

def three_parameter_gamma(jy, ix, precip_ens_nonzero, empirical_precipvals, \
    empirical_CDF, weights, alpha, beta, i1, pflag, weight_save_3m, alpha_save_3m, \
    beta_save_3m, nmixture, fraction_zero):    
    
    """
    
    Call an R gamma mixture routine to estimate the weights and parameters of
    a mixture of three Gamma distributions.  Evaluate how well it fits the empirical
    data and return weights, parameters, and a fitting statistic.
    
    inputs:
    jy, ix: grid indices
    precip_ens_nonzero: vector of nonzero precip values
    empirical_precipvals : 0.05, 0.1, ... 0.95 empirical quantiles
    empirical_CDF: roughly 0.05, 0.1, ... 0.95 but subject to number of samples
    weights: input and output array of Gamma mixture weights
    alpha: input and output array of Gamma fitted alphas [3, ny, nx]
    beta: input and output array of Gamma fitted betas
    i1: index of minimum quantile to evaluate when determining goodness of fit.
    pflag: true if printing desired
    weight_save_3m: first guess for weights from previous call
    alpha_save_3m: first guess for alpha
    beta_save_3m: first guess for beta
    nmixture: indicates how many parameters are used in the mix
    fraction_zero: fraction of points with zero precip
    
    """

    three_parameter_fail = False # in case routine bombs off
    try:    
    
        if pflag == True: print ('   Need to try 3 gamma mixture.')
        
        # --- call the R routine that estimates the weights and a mixture 
        #     of 3 Gamma distributions. Must convert back from R data format.
        
        if pflag == True: print ('calling gammamix_em 3 parameter')
        if pflag == True: print ('precip_ens_nonzero[0], [-1] = ', \
            precip_ens_nonzero[0], precip_ens_nonzero[-1] )
        if pflag == True: print ('weight_save_3m = ', weight_save_3m)
        if pflag == True: print ('alpha_save_3m = ', alpha_save_3m)
        if pflag == True: print ('beta_save_3m = ', beta_save_3m)
        
        #best_result = gammamix_em(precip_ens_nonzero, \
        #    mix_prop=weight_save_3m, alpha=alpha_save_3m, invbeta=beta_save_3m,
        #    k=3, epsilon=0.001, maxit=50, maxrestarts=20, verb=False)
            
        best_result = gammamix_em(precip_ens_nonzero, \
            mix_prop=None, alpha=None, invbeta=None,\
            k=3, epsilon=0.002, maxit=100, maxrestarts=20, verb=False)
         
        params = best_result.params
        alpha[0:3,jy,ix] = np.array(params.alpha[:])
        beta[0:3,jy,ix] = np.array(params.invbeta[:])
        weights[0:3,jy,ix] = np.array(params.mix_prop[:])
        alpha[3,jy,ix] = 1.0
        beta[3,jy,ix] = 1.0
        weights[3,jy,ix] = 0.0
                
        weight_save_3m = np.array(params.mix_prop[:])
        alpha_save_3m = np.array(params.alpha[:])
        beta_save_3m = np.array(params.invbeta[:])

        if pflag == True: print ('   weights = ', weights[:,jy,ix])
        if pflag == True: print ('   alpha = ', alpha[:,jy,ix])
        if pflag == True: print ('   beta = ', beta[:,jy,ix])
        
        #  ---- estimate a CDF from a weighted mixture of the 3 distributions.  
        
        y0 = empirical_precipvals / beta[0,jy,ix]
        y1 = empirical_precipvals / beta[1,jy,ix]
        y2 = empirical_precipvals / beta[2,jy,ix]
        fitted_CDF0 = stats.gamma.cdf(y0, alpha[0,jy,ix])
        fitted_CDF1 = stats.gamma.cdf(y1, alpha[1,jy,ix])
        fitted_CDF2 = stats.gamma.cdf(y2, alpha[2,jy,ix])
        fitted_CDF = weights[0,jy,ix]*fitted_CDF0 + weights[1,jy,ix]*fitted_CDF1 + \
            weights[2,jy,ix]*fitted_CDF2
        if pflag == True: print ('   fitted_CDF = ',fitted_CDF)
        
        # ---- calculate a statistic for how far off the fitted CDF is from the 
        #      empirical CDF.   Do this only for either the higher quantiles of the
        #      distribution or where the quantile exceeds 1 mm (the i1 index)
        
        #Dnstat3 = (1.0-fraction_zero)*np.max(np.abs(fitted_CDF[i1:] - empirical_CDF[i1:]))
        Dnstat3 = np.max(np.abs(fitted_CDF[i1:] - empirical_CDF[i1:]))
        if pflag == True: print ('   np.abs(fitted_CDF - empirical_CDF) = ', \
            np.abs(fitted_CDF - empirical_CDF))
        if pflag == True: print ('   Dnstat for 3 gamma mixture = ', Dnstat3)
        nmixture[jy,ix] = 3
    except:
        print ('three_parameter_fail')
        three_parameter_fail = True
        Dnstat3 = 0.10    
    
    return three_parameter_fail, weights, alpha, beta, Dnstat3, \
        weight_save_3m, alpha_save_3m, beta_save_3m, nmixture
        
        
# =====================================================================

def four_parameter_gamma(jy, ix, precip_ens_nonzero, empirical_precipvals, \
    empirical_CDF, weights, alpha, beta, i1, pflag, weight_save_4m, alpha_save_4m, \
    beta_save_4m, nmixture, fraction_zero):    
    
    """
    
    Call an R gamma mixture routine to estimate the weights and parameters of
    a mixture of four Gamma distributions.  Evaluate how well it fits the empirical
    data and return weights, parameters, and a fitting statistic.
    
    inputs:
    jy, ix: grid indices
    precip_ens_nonzero: vector of nonzero precip values
    empirical_precipvals : 0.05, 0.1, ... 0.95 empirical quantiles
    empirical_CDF: roughly 0.05, 0.1, ... 0.95 but subject to number of samples
    weights: input and output array of Gamma mixture weights
    alpha: input and output array of Gamma fitted alphas [3, ny, nx]
    beta: input and output array of Gamma fitted betas
    i1: index of minimum quantile to evaluate when determining goodness of fit.
    pflag: true if printing desired
    weight_save_4m: first guess for weights from previous call
    alpha_save_4m: first guess for alpha
    beta_save_4m: first guess for beta
    nmixture: indicates how many parameters are used in the mix
    fraction_zero: fraction of points with zero precip
    
    """

    four_parameter_fail = False # in case routine bombs off
    try:    
    
        if pflag == True: print ('   Need to try 4 gamma mixture.')
        
        # --- call the R routine that estimates the weights and a mixture 
        #     of 3 Gamma distributions. Must convert back from R data format.
        
        if pflag == True: print ('calling gammamix_em 4 parameter')
        if pflag == True: print ('precip_ens_nonzero[0], [-1] = ', \
            precip_ens_nonzero[0], precip_ens_nonzero[-1] )
        if pflag == True: print ('weight_save = ', weight_save_4m)
        if pflag == True: print ('alpha_save = ', alpha_save_4m)
        if pflag == True: print ('beta_save = ', beta_save_4m)
        
        #best_result = gammamix_em(precip_ens_nonzero, \
        #    mix_prop=weight_save_4m, alpha=alpha_save_4m, invbeta=beta_save_4m,
        #    k=4, epsilon=0.001, maxit=50, maxrestarts=20, verb=True)
        #best_result = gammamix_em(precip_ens_nonzero, \
        #    mix_prop=None, alpha=None, invbeta=None,
        #    k=4, epsilon=0.001, maxit=50, maxrestarts=20, verb=False)
        best_result = gammamix_em(precip_ens_nonzero, \
            mix_prop=None, alpha=None, invbeta=None,\
            k=4, epsilon=0.001, maxit=100, maxrestarts=20, verb=False)
        
        weights[:,jy,ix] = 0.0
        alpha[:,jy,ix] = 0.0
        beta[:,jy,ix] = 0.0    
        
        params = best_result.params
        alpha[:,jy,ix] = np.array(params.alpha[:])
        beta[:,jy,ix] = np.array(params.invbeta[:])
        weights[:,jy,ix] = np.array(params.mix_prop[:])
        
        weight_save_4m = np.array(params.mix_prop[:])
        alpha_save_4m = np.array(params.alpha[:])
        beta_save_4m = np.array(params.invbeta[:])
        

        if pflag == True: print ('   weights = ', weights[:,jy,ix])
        if pflag == True: print ('   alpha = ', alpha[:,jy,ix])
        if pflag == True: print ('   beta = ', beta[:,jy,ix])
        
        #  ---- estimate a CDF from a weighted mixture of the 3 distributions.  
        
        y0 = empirical_precipvals / beta[0,jy,ix]
        y1 = empirical_precipvals / beta[1,jy,ix]
        y2 = empirical_precipvals / beta[2,jy,ix]
        y3 = empirical_precipvals / beta[3,jy,ix]
        fitted_CDF0 = stats.gamma.cdf(y0, alpha[0,jy,ix])
        fitted_CDF1 = stats.gamma.cdf(y1, alpha[1,jy,ix])
        fitted_CDF2 = stats.gamma.cdf(y2, alpha[2,jy,ix])
        fitted_CDF3 = stats.gamma.cdf(y2, alpha[3,jy,ix])
        fitted_CDF = weights[0,jy,ix]*fitted_CDF0 + weights[1,jy,ix]*fitted_CDF1 + \
            weights[2,jy,ix]*fitted_CDF2 + weights[3,jy,ix]*fitted_CDF3
        if pflag == True: print ('   fitted_CDF = ',fitted_CDF)
        
        # ---- calculate a statistic for how far off the fitted CDF is from the 
        #      empirical CDF.   Do this only for either the higher quantiles of the
        #      distribution or where the quantile exceeds 1 mm (the i1 index)
        
        #Dnstat4 = (1.0-fraction_zero)*np.max(np.abs(fitted_CDF[i1:] - empirical_CDF[i1:]))
        Dnstat4 = np.max(np.abs(fitted_CDF[i1:] - empirical_CDF[i1:]))
        if pflag == True: print ('   np.abs(fitted_CDF - empirical_CDF) = ', \
            np.abs(fitted_CDF - empirical_CDF))
        if pflag == True: print ('   Dnstat for 4 gamma mixture = ', Dnstat4)
        nmixture[jy,ix] = 4
    except:
        print ('four_parameter_fail')
        four_parameter_fail = True
        Dnstat4 = 0.10    
    
    return four_parameter_fail, weights, alpha, beta, Dnstat4, \
        weight_save_4m, alpha_save_4m, beta_save_4m, nmixture    
        
# =====================================================================        
        
def decide_which_mixture(jy,ix, weights, alpha, beta, pflag, \
    Dnstat1, Dnstat2, Dnstat3, Dnstat4, alpha_save_1m, beta_save_1m,\
    weight_save_1m, alpha_save_2m, beta_save_2m,\
    weight_save_2m, alpha_save_3m, beta_save_3m,\
    weight_save_3m, alpha_save_4m, beta_save_4m,\
    weight_save_4m, nmixture):
    
    """ based on the Dn statistics for 1, 2, 3, 4 Gamma mixtures, 
        decide which mixture to use (the one with lowest Dn)
    """
    
    Dnstats = np.array([Dnstat1, Dnstat2, Dnstat3, Dnstat4])
    #if pflag == True: print ('Dnstats 1,2,3,4 = ', Dnstats)
    print ('Dnstats 1,2,3,4 = ', Dnstats)
    imin = np.argmin(Dnstats)  
    if imin == 0:
        if pflag == True: print ('selected 1-Gamma mixture')
        weights[0,jy,ix] = 1.0
        alpha[0,jy,ix] = alpha_save_1m
        beta[0,jy,ix] = beta_save_1m
        weights[1:,jy,ix] = 0.0
        alpha[1:,jy,ix] = 1.0
        beta[1:,jy,ix] = 1.0
        nmixture[jy,ix] = 1
    elif imin == 1:
        if pflag == True: print ('selected 2-Gamma mixture')
        weights[0:2,jy,ix] = weight_save_2m
        alpha[0:2,jy,ix] = alpha_save_2m
        beta[0:2,jy,ix] = beta_save_2m
        weights[2:,jy,ix] = 0.0
        alpha[2:,jy,ix] = 1.0
        beta[2:,jy,ix] = 1.0 
        nmixture[jy,ix] = 2
    elif imin == 2:
        if pflag == True: print ('selected 3-Gamma mixture')
        weights[0:3,jy,ix] = weight_save_3m
        alpha[0:3,jy,ix] = alpha_save_3m
        beta[0:3,jy,ix] = beta_save_3m
        weights[3,jy,ix] = 0.0
        alpha[3,jy,ix] = 1.0
        beta[3,jy,ix] = 1.0 
        nmixture[jy,ix] = 3
    elif imin == 3:
        if pflag == True: print ('selected 4-Gamma mixture')
        weights[:,jy,ix] = weight_save_4m
        alpha[:,jy,ix] = alpha_save_4m
        beta[:,jy,ix] = beta_save_4m
        nmixture[jy,ix] = 1
                                         
    return weights, alpha, beta, nmixture  
    
# =====================================================================

def relax_toward_equal_weight(weight_save_2m, \
    weight_save_3m, weight_save_4m):
    
    w2 = np.array([0.5, 0.5])
    w3 = np.array([0.33333, 0.33333, 0.33333])
    w4 = np.array([0.25, 0.25, 0.25, 0.25])
    weight_save_2m = (weight_save_2m + w2) / 2.
    weight_save_3m = (weight_save_3m + w3) / 2.
    weight_save_4m = (weight_save_4m + w4) / 2.
    
    return weight_save_2m, weight_save_3m, weight_save_4m

# =====================================================================
# =====================================================================

    
# ---- inputs from command line

cmonth = sys.argv[1] # 'Jan', 'Feb', etc.
clead = sys.argv[2] # 03, 06, 12, etc.

# ---- set parameters

pflag = False # for print statements
master_directory = '/Volumes/Backup Plus/gefsv12/precip/netcdf/'
nmembers = 5

# ---- read in the previously generated netCDF file with precipitation
#      for this month and lead time.  All members, dates for this 
#      month have been smushed into one leading index, dimension
#      nsamps, since the date of the forecast within the month and 
#      the member number is irrelevant for the distribution fitting.

now = datetime.now()
current_time = now.strftime("%H:%M:%S")
if pflag == True: print ('before read ', current_time)    
ncfile = master_directory + cmonth + '_apcp_h' + clead + '.nc'
if pflag == True: print (ncfile)
nc = Dataset(ncfile)
precip_ens = nc.variables['apcp_fcst'][:,:,:]
nsamps, nyin, nxin = np.shape(precip_ens)
lons_1d = nc.variables['lons_fcst'][:]
lats_1d = nc.variables['lats_fcst'][:]
nc.close()
now = datetime.now()
current_time = now.strftime("%H:%M:%S")
if pflag == True: print ('after read ', current_time) 

# ---- more initialization of output storage arrays now that 
#      we know the array dimensions

weights = np.zeros((4,nyin, nxin), dtype=np.float)
alpha = np.zeros((4,nyin,nxin), dtype=np.float)
beta = np.zeros((4,nyin,nxin), dtype=np.float)
fzero = np.zeros((nyin, nxin), dtype=np.float)
nmixture = np.zeros((nyin, nxin), dtype=np.int)
Dnstat1a = -99.99*np.ones((nyin, nxin), dtype=np.float)
Dnstat2a = -99.99*np.ones((nyin, nxin), dtype=np.float)
Dnstat3a = -99.99*np.ones((nyin, nxin), dtype=np.float)
Dnstat4a = -99.99*np.ones((nyin, nxin), dtype=np.float)
teeny_precip = 0.06*np.ones(nsamps) 

weight_save_1m = 1.0
alpha_save_1m = 1.0
beta_save_1m = 1.0
   
# ---- loop over the grid points and estimate the Gamma distributions
#      for each parameter.  First see if a single Gamma distribution
#      is appropriate; if not, try a mixture of two.   If that still
#      doesn't fit well, try a mixture of three.   
        
tktr = 0
#for jy in range(0,nyin,5):
for jy in range(290,291,5):
    
    
    
    
    
    
    if jy == 290:
        
        
        
        
        
        weight_save_4m = 0.25*np.ones((4), dtype=np.float) # bullshirt values
        alpha_save_4m = np.random.uniform(low=0.5, high=1.5, size=4) 
        beta_save_4m = np.random.uniform(low=0.5, high=1.5, size=4) 
        weight_save_3m = 0.33333*np.ones((3), dtype=np.float) # bullshirt values
        alpha_save_3m = np.random.uniform(low=0.5, high=1.5, size=3) 
        beta_save_3m = np.random.uniform(low=0.5, high=1.5, size=3) 
        weight_save_2m = 0.5*np.ones((2), dtype=np.float)
        alpha_save_2m = np.random.uniform(low=0.5, high=1.5, size=2)
        beta_save_2m =  np.random.uniform(low=0.5, high=1.5, size=2)
    
    #for ix in range(0,nxin,5):
    for ix in range(170,231,5):
        
        now = datetime.now()
        current_time = now.strftime("%H:%M:%S")
        tktr = tktr+1  # number of grid points processed
        print ('*** time, jy, ix, lon, lat = ',\
            current_time, jy, ix, tktr, nyin*nxin, lons_1d[ix], lats_1d[jy])
        if pflag == True: print ('   Before fraczero_possamps, jy,ix, \
            nsamps, time = ', jy,ix, nxin*nyin, current_time)
            
        # ---- there is a grib compaction error that can give negative
        #      values slightly smaller than teeny precip.  to make sure 
        #      that we don't have either negative values or lots of the 
        #      same tiny values, subtractoff teeny_precip
        
        precip_ens_1d = precip_ens[:,jy,ix] - teeny_precip[:]
        fraction_zero, precip_ens_nonzero, nz = \
            fraczero_possamps(nsamps, precip_ens_1d) # return sorted
        fzero[jy,ix] = fraction_zero 
        if pflag == True: print ('   After fraczero_possamps, jy,ix = ', \
            jy,ix, nxin*nyin, current_time)
        if pflag == True: print ('   number of samples with positive precip = ', nz)
        #print ('   precip_ens_nonzero[-10:] = ', precip_ens_nonzero[-20:])
        if nz > 30 and precip_ens_nonzero[-1] > 4.0:
        
            # --- first fit a single Gamma distribution per Wilks; determine 
            #     the quantiles associated with every 1/20th percentile 
            #     (0.05 to 0.95)
        
            query_these_indices, empirical_CDF, empirical_precipvals, \
                pmean, alpha_hat, beta_hat, i1, Dnstat1, \
                weight_save_1m, alpha_save_1m, beta_save_1m = \
                one_parameter_gamma(nz, precip_ens_nonzero, pflag, fraction_zero)
            Dnstat1a[jy,ix] = Dnstat1
            
            # ---- decide if Dn excessive.   If so, then try a 2-component 
            #      Gamma mixture model.  excessive_threshold inspired 
            #      by Wilks textbook, Table 5.2, assume for simplicity alpha=1
            #      since there is only a weak dependence on alpha.
            
            excessive_threshold = 0.02
            if pflag == True: print ('excessive_threshold = ', excessive_threshold)  
            if Dnstat1 > excessive_threshold:
                
                two_parameter_fail, weights, alpha, beta, Dnstat2, \
                    weight_save_2m, alpha_save_2m, beta_save_2m, nmixture = \
                    two_parameter_gamma(jy, ix, precip_ens_nonzero, \
                    empirical_precipvals, empirical_CDF, weights, \
                    alpha, beta, i1, pflag, weight_save_2m, alpha_save_2m, \
                    beta_save_2m, nmixture, fraction_zero)
                    
                Dnstat2a[jy,ix] = Dnstat2
                    
                if two_parameter_fail == True or Dnstat2 > excessive_threshold:
                
                    # ---- try 3-parameter Gamma
                
                    three_parameter_fail, weights, alpha, beta, Dnstat3,\
                        weight_save_3m, alpha_save_3m, beta_save_3m, nmixture = \
                        three_parameter_gamma(jy, ix, precip_ens_nonzero, \
                        empirical_precipvals, empirical_CDF, weights, \
                        alpha, beta, i1, pflag, weight_save_3m, alpha_save_3m, \
                        beta_save_3m, nmixture, fraction_zero)
                    Dnstat3a[jy,ix] = Dnstat3
                    
                    if three_parameter_fail == True or Dnstat3 > excessive_threshold: 
                        
                        four_parameter_fail, weights, alpha, beta, Dnstat4,\
                            weight_save_4m, alpha_save_4m, beta_save_4m, nmixture = \
                            four_parameter_gamma(jy, ix, precip_ens_nonzero, \
                            empirical_precipvals, empirical_CDF, weights, \
                            alpha, beta, i1, pflag, weight_save_4m, alpha_save_4m, \
                            beta_save_4m, nmixture, fraction_zero)
                        Dnstat4a[jy,ix] = Dnstat4
                            
                        if four_parameter_fail == True or Dnstat4 > excessive_threshold: 
                            weights, alpha, beta, nmixture = decide_which_mixture(jy,ix,
                                weights, alpha, beta, pflag, Dnstat1, \
                                Dnstat2, Dnstat3, Dnstat4, alpha_save_1m, beta_save_1m,\
                                weight_save_1m, alpha_save_2m, beta_save_2m,\
                                weight_save_2m, alpha_save_3m, beta_save_3m,\
                                weight_save_3m, alpha_save_4m, beta_save_4m,\
                                weight_save_4m, nmixture)
                
                # ---- these weights are saved for the next grid point, but sometimes
                #      the weights are far from equal and not appropriate if the 
                #      next grid point has a much smaller sample.   So nudge toward
                #      equal weighting.
                
                weight_save_2m, weight_save_3m, weight_save_4m = \
                    relax_toward_equal_weight(weight_save_2m, \
                    weight_save_3m, weight_save_4m)
                    
            else: # ok to use simple single Gamma
            
                weights[0,jy,ix] = 1.0
                alpha[0,jy,ix] = alpha_hat
                beta[0,jy,ix] = beta_hat
                weights[1:,jy,ix] = 0.0
                alpha[1:,jy,ix] = 1.0
                beta[1:,jy,ix] = 1.0
                nmixture[jy,ix] = 1
            
        else: 
            
            # --- very few positive samples, or light precip; fit an simple maximum
            #      likelihood distribution to single mode Gamma distribution
            
            if pflag == True: print ('   very light precipitation at this grid point. ', pmean)
            if nz < 2:  # not long enough sample to find some nonzero precip amounts
                alpha_hat = 1.0
                beta_hat = 100.0
            else:  # use maximum likelihood method of Wilks text to fit non-mixture distribution.
                pmean = np.mean(precip_ens_nonzero)
                lnxbar = np.log(pmean)
                meanlnxi = np.mean(np.sum(np.log(precip_ens_nonzero)))
                #if pflag == True: print('   lnxbar, meanlnxi = ',lnxbar, meanlnxi)
                D = lnxbar - meanlnxi
                alpha_hat = (1.0 + np.sqrt(1.0+4.0*D/3.0)) / (4.0*D)
                beta_hat = pmean / alpha_hat
                if pflag == True: print('   single distribution alpha, beta = ', alpha_hat, beta_hat)
            weights[0,jy,ix] = 1.0
            alpha[0,jy,ix] = alpha_hat
            beta[0,jy,ix] = beta_hat
            weights[1:,jy,ix] = 0.0
            alpha[1:,jy,ix] = 1.0
            beta[1:,jy,ix] = 1.0
            nmixture[jy,ix] = 1

# --- save to cPickle file



#sys.exit()

#outfile = master_directory + cmonth+ '_apcp_gamma_parameters_h' + clead + '.cPick'
#outfile = master_directory + cmonth+ '_apcp_gamma_parameters_v2_h' + clead + '.cPick'
outfile = master_directory + cmonth+ '_apcp_gamma_parameters_v3_h' + clead + '.cPick'
print ('writing to ', outfile)
ouf = open(outfile, 'wb')
cPickle.dump(weights, ouf)
cPickle.dump(alpha, ouf)
cPickle.dump(beta, ouf)
cPickle.dump(fzero, ouf)
cPickle.dump(Dnstat1a, ouf)
cPickle.dump(Dnstat2a, ouf)
cPickle.dump(Dnstat3a, ouf)
cPickle.dump(Dnstat4a, ouf)
cPickle.dump(nmixture, ouf)
ouf.close()



                                                                                                                                                                                                                                                                                                                                                                    ._CDF_fitting_forecast_precip_v4.py                                                                 000775  000765  000024  00000000474 14016263124 017541  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2  
     <                                      ATTR      <     $                      com.apple.lastuseddate#PS      (   	  com.macromates.bookmarks   1     com.macromates.selectionRange      7     com.macromates.visibleIndex  `    J     ( '337' )530:1323656                                                                                                                                                                                                    CDF_fitting_forecast_precip_v4.py                                                                   000775  000765  000024  00000071450 14016263124 017326  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         """
CDF_fitting_forecast_precip_v4.py cmonth clead 

"""

import os, sys
from datetime import datetime
import numpy as np
import _pickle as cPickle
from netCDF4 import Dataset
import _pickle as cPickle
import scipy.stats as stats
from gammamix import gammamix_em

# =====================================================================

def find_nearest(vec, value):
    idx = np.abs(vec-value).argmin()
    return idx

# =====================================================================

def fraczero_possamps(nsamps, precip_ens):
    """
    from the vector input sample precip_ens, define the fraction of
    samples with zero precipitation.   For the positive samples, add
    a small random number to deal with the fact that the data was 
    discretized to 0.1 mm, so that when later creating CDFs we don't 
    have values with lots of tied amounts.   Sort the nonzero amounts 
    and return.
    """
    number_zeros = 0
    precip_ens_nonzero = np.delete(precip_ens, \
        np.where(precip_ens <= 0.006))  # censor at 0.006 mm
    nz = len(precip_ens_nonzero)
    # data discretized, so add random component of this magnitude
    precip_ens_nonzero = precip_ens_nonzero + \
        np.random.uniform(low=-0.005,high=0.005,size=nz) 
    precip_ens_nonzero = np.sort(precip_ens_nonzero)  
    #print (precip_ens_nonzero[0:10]) 
    ntotal = len(precip_ens)
    nzero = ntotal - len(precip_ens_nonzero)
    fraction_zero = float(nzero) / float(ntotal)
    return fraction_zero, precip_ens_nonzero, nz

# =====================================================================

def one_parameter_gamma(nz, precip_ens_nonzero, pflag, fraction_zero):

    """ 
    Along with some other ancillary stuff, return the fitted Gamma distribution
    alpha and beta values, along with an evaluation (Dnstat) of how 
    closely the fitted CDF matches the empirical CDF 
    
    inputs:  
    
    nz: number of nonzero samples
    precip_ens_nonzero[nz]:   sorted vector of nonzero precip amounts (mm)
    pflag: print flag, true if diagnostic printing desired
    
    """

    # ---- define the indices in the previously sorted precip_ens_nonzero vector
    #      that are at the 0.05, 0.10, .... , 0.95 quantiles of the sorted vector, + a few 
    #      others at high quantiles.   Less if there are few samples
    
    query_these_indices = [ nz//20, nz//10, (3*nz)//20, nz//5, nz//4, (3*nz)//10, \
        (7*nz)//20, (2*nz)//5, (9*nz)//20, nz//2, (11*nz)//20, (3*nz)//5, (13*nz)//20, \
        (7*nz)//10, (3*nz)//4, (4*nz)//5, (17*nz)//20, (35*nz)//40,(9*nz)//10, \
        (37*nz)//40, (19*nz)//20, (39*nz)//40, (79*nz)//80]
    empirical_precipvals = precip_ens_nonzero[query_these_indices] 
    i1 = np.min([17,np.argmin(np.abs(empirical_precipvals-1.0))])
    
    #if nz >= 80:
    #    query_these_indices = [ nz//20, nz//10, (3*nz)//20, nz//5, nz//4, (3*nz)//10, \
    #        (7*nz)//20, (2*nz)//5, (9*nz)//20, nz//2, (11*nz)//20, (3*nz)//5, (13*nz)//20, \
    #        (7*nz)//10, (3*nz)//4, (4*nz)//5, (17*nz)//20, (35*nz)//40,(9*nz)//10, \
    #        (37*nz)//40, (19*nz)//20, (39*nz)//40, (79*nz)//80]
    #    empirical_precipvals = precip_ens_nonzero[query_these_indices] 
    #    i1 = np.min([17,np.argmin(np.abs(empirical_precipvals-1.0))])   
    #elif nz >= 40 and nz < 80:     
    #    query_these_indices = [ nz//20, nz//10, (3*nz)//20, nz//5, nz//4, (3*nz)//10, \
    #        (7*nz)//20, (2*nz)//5, (9*nz)//20, nz//2, (11*nz)//20, (3*nz)//5, (13*nz)//20, \
    #        (7*nz)//10, (3*nz)//4, (4*nz)//5, (17*nz)//20, (35*nz)//40,(9*nz)//10, \
    #        (37*nz)//40, (39*nz)//40]
    #    empirical_precipvals = precip_ens_nonzero[query_these_indices] 
    #    i1 = np.min([15,np.argmin(np.abs(empirical_precipvals-1.0))])
    #elif nz >= 20 and nz < 40: 
    #    query_these_indices = [ nz//20, nz//10, (3*nz)//20, nz//5, nz//4, (3*nz)//10, \
    #        (7*nz)//20, (2*nz)//5, (9*nz)//20, nz//2, (11*nz)//20, (3*nz)//5, (13*nz)//20, \
    #        (7*nz)//10, (3*nz)//4, (4*nz)//5, (17*nz)//20, (9*nz)//10, (19*nz)//20 ]
    #    empirical_precipvals = precip_ens_nonzero[query_these_indices] 
    #    i1 = np.min([13,np.argmin(np.abs(empirical_precipvals-1.0))])
    #elif nz >= 10 and nz < 20: 
    #    query_these_indices = [ nz//10, nz//5, (3*nz)//10, \
    #        (2*nz)//5, nz//2, (3*nz)//5, (7*nz)//10, (4*nz)//5, (9*nz)//10 ]
    #    empirical_precipvals = precip_ens_nonzero[query_these_indices] 
    #    i1 = np.min([5,np.argmin(np.abs(empirical_precipvals-1.0))])
    #else:
    #    query_these_indices = range(1,nz)//nz 
    #    i1 = 1
        
    if pflag == True: print ('   nz, query_these_indices = ', nz, query_these_indices)
    
    # ---- convert the query_these_indices into the cumulative probability
    
    empirical_CDF = np.array(query_these_indices, dtype=np.float) / float(nz) 
    
    if pflag == True: print ('   empirical_CDF = ',empirical_CDF)
    
    # ---- extract the quantiles at these cumulative probabilities
    
    if pflag == True: print ('   empirical_precipvals = ', empirical_precipvals)
    #if pflag == True: print ('   precip_ens_nonzero[0:-1:10] = ',precip_ens_nonzero[0:-1:10] )
    
    # ---- See Wilks Statistical Meteorology Text, section on Gamma Distribution.   Use the
    #      Thom (1958) method of maximum-likelihood estimator, and from this estimate
    #      the Gamma distribution Parameters.   Ref: Statistical Methods in the Atmospheric
    #      Sciences (3rd Ed), 2011, Daniel S. Wilks (Academic Press)
    
    pmean = np.mean(precip_ens_nonzero)
    lnxbar = np.log(pmean)
    meanlnxi = np.mean(np.log(precip_ens_nonzero))
    D = lnxbar - meanlnxi
    alpha_hat = (1.0 + np.sqrt(1.0+4.0*D/3.0)) / (4.0*D)
    beta_hat = pmean / alpha_hat
    
    # ---- Now we evaluate how good the fitted distribution matches the empirical one.
    #      Because errors in fit are largely irrelevant for the quantile mapping at very small
    #      values, we will evaluate the goodness of fit only at the minimum of: (a) 
    #      where there is precip > 1.0 mm, or (b) the 90th percentile of the empirical 
    #      distribution, whichever is smaller.   
    
    if pflag == True: print ('   D = ',D,' alpha_hat = ', alpha_hat,' beta_hat = ', beta_hat)
    y0 = empirical_precipvals / beta_hat
    fitted_CDF = stats.gamma.cdf(y0, alpha_hat)
    if pflag == True: print ('   fitted_CDF = ', fitted_CDF)
    if pflag == True: print ('   CDF differences: ', np.abs(fitted_CDF - empirical_CDF))
    
    # --- only bother evaluating Dn statistic either for quantiles associated with precip
    #     > 1.0 mm, or above the 35/40th quantile, whichever is smaller.
    

    #Dnstat = (1.0 - fraction_zero)*np.max(np.abs(fitted_CDF[i1:] - empirical_CDF[i1:]))
    Dnstat = np.max(np.abs(fitted_CDF[i1:] - empirical_CDF[i1:]))
    if pflag == True: print ('   Dnstat for 1 gamma mixture = ', Dnstat)
    weight_save_1m = 1.0
    alpha_save_1m = alpha_hat
    beta_save_1m = beta_hat
    
    return query_these_indices, empirical_CDF, empirical_precipvals, \
        pmean, alpha_hat, beta_hat, i1, Dnstat, weight_save_1m, \
        alpha_save_1m, beta_save_1m
        
# =====================================================================

def two_parameter_gamma(jy, ix, precip_ens_nonzero, empirical_precipvals, \
    empirical_CDF, weights, alpha, beta, i1, pflag, weight_save_2m, \
    alpha_save_2m, beta_save_2m, nmixture, fraction_zero, nstride):
    
    """
    
    Call an R gamma mixture routine to estimate the weights and parameters of
    a mixture of two Gamma distributions.  Evaluate how well it fits the empirical
    data and return weights, parameters, and a fitting statistic.
    
    inputs:
    jy, ix: grid indices
    precip_ens_nonzero: vector of nonzero precip values
    empirical_precipvals : 0.05, 0.1, ... 0.95 empirical quantiles
    empirical_CDF: roughly 0.05, 0.1, ... 0.95 but subject to number of samples
    weights: input and output array of Gamma mixture weights
    alpha: input and output array of Gamma fitted alphas
    beta: input and output array of Gamma fitted betas
    i1: index of minimum quantile to evaluate when determining goodness of fit.
    pflag: true if printing desired
    weight_save_2m: 2d-vector of saved weights from last call
    alpha_save_2m: 2d-vector of saved alpha estimates from last call
    beta_save_2m: 2d-vector of saved beta estimates from last call
    nmixture: indicates how many parameters are used in the mix
    fraction_zero: fraction of points with zero precip
    nstride: how many grid points to step in each direction
    """

    two_parameter_fail = True # in case routine bombs off
    try:
        now = datetime.now()
        current_time = now.strftime("%H:%M:%S")
        
        # --- call the R routine that estimates the weights and a mixture 
        #     of two Gamma distributions. Must convert back from R data format..
        
        if pflag == True: print ('calling gammamix_em 2 parameter')
        if pflag == True: print ('precip_ens_nonzero[0], [-1] = ', \
            precip_ens_nonzero[0], precip_ens_nonzero[-1] )
        if pflag == True: print ('weight_save_2m = ', weight_save_2m)
        if pflag == True: print ('alpha_save_2m = ', alpha_save_2m)
        if pflag == True: print ('beta_save_2m = ', beta_save_2m)
        #best_result = gammamix_em(precip_ens_nonzero, \
        #    mix_prop=weight_save_2m, alpha=alpha_save_2m, invbeta=beta_save_2m,
        #    k=2, epsilon=0.001, maxit=50, maxrestarts=20, verb=False)
            
        if ix == 0:
            best_result = gammamix_em(precip_ens_nonzero, \
                mix_prop=None, alpha=None, invbeta=None,\
                k=2, epsilon=0.003, maxit=60, maxrestarts=20, verb=False)
        elif nmixture[jy,ix-nstride] == 3 :
            best_result = gammamix_em(precip_ens_nonzero, \
                mix_prop=weight_save_2m, alpha=alpha_save_2m, invbeta=beta_save_2m,\
                k=2, epsilon=0.003, maxit=60, maxrestarts=20, verb=False)
        else: 
            best_result = gammamix_em(precip_ens_nonzero, \
                mix_prop=None, alpha=None, invbeta=None,\
                k=2, epsilon=0.003, maxit=60, maxrestarts=20, verb=False)
        
        params = best_result.params
        alpha[0:2,jy,ix] = np.array(params.alpha[:])
        beta[0:2,jy,ix] = np.array(params.invbeta[:])
        weights[0:2,jy,ix] = np.array(params.mix_prop[:])
        weights[2,jy,ix] = 0.0
        alpha[2,jy,ix] = 1.0
        beta[2,jy,ix] = 1.0
        nmixture[jy,ix] = 2
        weight_save_2m = np.array(params.mix_prop[:])
        alpha_save_2m = np.array(params.alpha[:])
        beta_save_2m = np.array(params.invbeta[:])
        
        if pflag == True: print ('   weights = ', weights[:,jy,ix])
        if pflag == True: print ('   alpha = ', alpha[:,jy,ix])
        if pflag == True: print ('   beta = ', beta[:,jy,ix])
                
        #  ---- estimate a CDF from a weighted mixture of the two distributions.  
        
        y0 = empirical_precipvals / beta[0,jy,ix]
        y1 = empirical_precipvals / beta[1,jy,ix]
        fitted_CDF0 = stats.gamma.cdf(y0, alpha[0,jy,ix])
        fitted_CDF1 = stats.gamma.cdf(y1, alpha[1,jy,ix])
        fitted_CDF = weights[0,jy,ix]*fitted_CDF0 + weights[1,jy,ix]*fitted_CDF1
        if pflag == True: print ('   fitted_CDF = ',fitted_CDF)
        
        # ---- calculate a statistic for how far off the fitted CDF is from the 
        #      empirical CDF.   Do this only for either the higher quantiles of the
        #      distribution or where the quantile exceeds 1 mm (the i1 index)
        
        #Dnstat2 = (1.0-fraction_zeros)*np.max(np.abs(fitted_CDF[i1:] - empirical_CDF[i1:]))
        Dnstat2 = np.max(np.abs(fitted_CDF[i1:] - empirical_CDF[i1:]))
        if pflag == True: print ('   np.abs(fitted_CDF - empirical_CDF) = ', \
            np.abs(fitted_CDF - empirical_CDF))
        if pflag == True: print ('   Dnstat for 2 gamma mixture = ', Dnstat2)
    except:
        print ('two_parameter_fail')
        two_parameter_fail = True
        Dnstat2 = 0.10

    return two_parameter_fail, weights, alpha, beta, Dnstat2, weight_save_2m, \
        alpha_save_2m, beta_save_2m, nmixture

    
# =====================================================================

def three_parameter_gamma(jy, ix, precip_ens_nonzero, empirical_precipvals, \
    empirical_CDF, weights, alpha, beta, i1, pflag, weight_save_3m, \
    alpha_save_3m, beta_save_3m, nmixture, fraction_zero, nstride):    
    
    """
    
    Call an R gamma mixture routine to estimate the weights and parameters of
    a mixture of three Gamma distributions.  Evaluate how well it fits the empirical
    data and return weights, parameters, and a fitting statistic.
    
    inputs:
    jy, ix: grid indices
    precip_ens_nonzero: vector of nonzero precip values
    empirical_precipvals : 0.05, 0.1, ... 0.95 empirical quantiles
    empirical_CDF: roughly 0.05, 0.1, ... 0.95 but subject to number of samples
    weights: input and output array of Gamma mixture weights
    alpha: input and output array of Gamma fitted alphas [3, ny, nx]
    beta: input and output array of Gamma fitted betas
    i1: index of minimum quantile to evaluate when determining goodness of fit.
    pflag: true if printing desired
    weight_save_3m: first guess for weights from previous call
    alpha_save_3m: first guess for alpha
    beta_save_3m: first guess for beta
    nmixture: indicates how many parameters are used in the mix
    fraction_zero: fraction of points with zero precip
    nstride: how many grid points to step in each direction
    """

    three_parameter_fail = False # in case routine bombs off
    try:    
    
        if pflag == True: print ('   Need to try 3 gamma mixture.')
        
        # --- call the R routine that estimates the weights and a mixture 
        #     of 3 Gamma distributions. Must convert back from R data format.
        
        if pflag == True: print ('calling gammamix_em 3 parameter')
        if pflag == True: print ('precip_ens_nonzero[0], [-1] = ', \
            precip_ens_nonzero[0], precip_ens_nonzero[-1] )
        if pflag == True: print ('weight_save_3m = ', weight_save_3m)
        if pflag == True: print ('alpha_save_3m = ', alpha_save_3m)
        if pflag == True: print ('beta_save_3m = ', beta_save_3m)
            
        if ix == 0:
            best_result = gammamix_em(precip_ens_nonzero, \
                mix_prop=None, alpha=None, invbeta=None,\
                k=3, epsilon=0.003, maxit=60, maxrestarts=20, verb=False)
        elif nmixture[jy,ix-nstride] == 3 :
            best_result = gammamix_em(precip_ens_nonzero, \
                mix_prop=weight_save_3m, alpha=alpha_save_3m, invbeta=beta_save_3m,\
                k=3, epsilon=0.003, maxit=60, maxrestarts=20, verb=False)
        else: 
            best_result = gammamix_em(precip_ens_nonzero, \
                mix_prop=None, alpha=None, invbeta=None,\
                k=3, epsilon=0.003, maxit=60, maxrestarts=20, verb=False)   
                
        # baseline .002, 100
         
        params = best_result.params
        alpha[:,jy,ix] = np.array(params.alpha[:])
        beta[:,jy,ix] = np.array(params.invbeta[:])
        weights[:,jy,ix] = np.array(params.mix_prop[:])
                
        weight_save_3m = np.array(params.mix_prop[:])
        alpha_save_3m = np.array(params.alpha[:])
        beta_save_3m = np.array(params.invbeta[:])

        if pflag == True: print ('   weights = ', weights[:,jy,ix])
        if pflag == True: print ('   alpha = ', alpha[:,jy,ix])
        if pflag == True: print ('   beta = ', beta[:,jy,ix])
        
        #  ---- estimate a CDF from a weighted mixture of the 3 distributions.  
        
        y0 = empirical_precipvals / beta[0,jy,ix]
        y1 = empirical_precipvals / beta[1,jy,ix]
        y2 = empirical_precipvals / beta[2,jy,ix]
        fitted_CDF0 = stats.gamma.cdf(y0, alpha[0,jy,ix])
        fitted_CDF1 = stats.gamma.cdf(y1, alpha[1,jy,ix])
        fitted_CDF2 = stats.gamma.cdf(y2, alpha[2,jy,ix])
        fitted_CDF = weights[0,jy,ix]*fitted_CDF0 + weights[1,jy,ix]*fitted_CDF1 + \
            weights[2,jy,ix]*fitted_CDF2
        if pflag == True: print ('   fitted_CDF = ',fitted_CDF)
        
        # ---- calculate a statistic for how far off the fitted CDF is from the 
        #      empirical CDF.   Do this only for either the higher quantiles of the
        #      distribution or where the quantile exceeds 1 mm (the i1 index)
        
        #Dnstat3 = (1.0-fraction_zero)*np.max(np.abs(fitted_CDF[i1:] - empirical_CDF[i1:]))
        Dnstat3 = np.max(np.abs(fitted_CDF[i1:] - empirical_CDF[i1:]))
        if pflag == True: print ('   np.abs(fitted_CDF - empirical_CDF) = ', \
            np.abs(fitted_CDF - empirical_CDF))
        if pflag == True: print ('   Dnstat for 3 gamma mixture = ', Dnstat3)
        nmixture[jy,ix] = 3
    except:
        print ('three_parameter_fail')
        three_parameter_fail = True
        Dnstat3 = 0.10    
    
    return three_parameter_fail, weights, alpha, beta, Dnstat3, \
        weight_save_3m, alpha_save_3m, beta_save_3m, nmixture
    
# =====================================================================

def decide_which_mixture(jy, ix, weights, alpha, beta, nmixture, pflag, \
    Dnstat1, Dnstat2, Dnstat3, alpha_save_1m, beta_save_1m, \
    weight_save_1m, alpha_save_2m, beta_save_2m, \
    weight_save_2m, alpha_save_3m, beta_save_3m, \
    weight_save_3m):

    """ based on the Dn statistics for 1, 2, 3, 4 Gamma mixtures,
        decide which mixture to use (the one with lowest Dn)
    """

    Dnstats = np.array([Dnstat1, Dnstat2, Dnstat3])
    #if pflag == True: print ('Dnstats 1,2,3,4 = ', Dnstats)
    if pflag == True: print ('Dnstats 1,2,3  = ', Dnstats)
    imin = np.argmin(Dnstats)
    if imin == 0:
        if pflag == True: print ('selected 1-Gamma mixture')
        weights[0,jy,ix] = 1.0
        alpha[0,jy,ix] = alpha_save_1m
        beta[0,jy,ix] = beta_save_1m
        weights[1:,jy,ix] = 0.0
        alpha[1:,jy,ix] = 1.0
        beta[1:,jy,ix] = 1.0
        nmixture[jy,ix] = 1
    elif imin == 1:
        if pflag == True: print ('selected 2-Gamma mixture')
        weights[0:2,jy,ix] = weight_save_2m[:]
        alpha[0:2,jy,ix] = alpha_save_2m[:]
        beta[0:2,jy,ix] = beta_save_2m[:]
        weights[2,jy,ix] = 0.0
        alpha[2,jy,ix] = 1.0
        beta[2,jy,ix] = 1.0
        nmixture[jy,ix] = 2
    elif imin == 2:
        if pflag == True: print ('selected 3-Gamma mixture')
        weights[:,jy,ix] = weight_save_3m[:]
        alpha[:,jy,ix] = alpha_save_3m[:]
        beta[:,jy,ix] = beta_save_3m[:]
        nmixture[jy,ix] = 3
    return weights, alpha, beta, nmixture
    
# =====================================================================
    
# ---- inputs from command line

cmonth = sys.argv[1] # 'Jan', 'Feb', etc.
clead = sys.argv[2] # 03, 06, 12, etc.

# ---- set parameters

pflag = False # for print statements
master_directory = '/Volumes/Backup Plus/gefsv12/precip/netcdf/'
nmembers = 5

# ---- read in the previously generated netCDF file with precipitation
#      for this month and lead time.  All members, dates for this 
#      month have been smushed into one leading index, dimension
#      nsamps, since the date of the forecast within the month and 
#      the member number is irrelevant for the distribution fitting.

now = datetime.now()
current_time = now.strftime("%H:%M:%S")
if pflag == True: print ('before read ', current_time)    
ncfile = master_directory + cmonth + '_apcp_h' + clead + '.nc'
if pflag == True: print (ncfile)
nc = Dataset(ncfile)
precip_ens = nc.variables['apcp_fcst'][:,:,:]
nsamps, nyin, nxin = np.shape(precip_ens)
lons_1d = nc.variables['lons_fcst'][:]
lats_1d = nc.variables['lats_fcst'][:]
nc.close()
now = datetime.now()
current_time = now.strftime("%H:%M:%S")
if pflag == True: print ('after read ', current_time) 

# ---- more initialization of output storage arrays now that 
#      we know the array dimensions

weights = np.zeros((3,nyin, nxin), dtype=np.float)
alpha = np.zeros((3,nyin,nxin), dtype=np.float)
beta = np.zeros((3,nyin,nxin), dtype=np.float)
fzero = np.zeros((nyin, nxin), dtype=np.float)
nmixture = np.zeros((nyin, nxin), dtype=np.int)
Dnstat1a = 0.10*np.ones((nyin, nxin), dtype=np.float)
Dnstat2a = 0.10*np.ones((nyin, nxin), dtype=np.float)
Dnstat3a = 0.10*np.ones((nyin, nxin), dtype=np.float)
 

weight_save_1m = 1.0
alpha_save_1m = 1.0
beta_save_1m = 1.0
   
# ---- loop over the grid points and estimate the Gamma distributions
#      for each parameter.  First see if a single Gamma distribution
#      is appropriate; if not, try a mixture of two.   If that still
#      doesn't fit well, try a mixture of three.   
        
tktr = 0

weight_save_2m = 0.5*np.ones((2), dtype=np.float)
alpha_save_2m = np.random.uniform(low=0.5, high=1.5, size=2)
beta_save_2m =  np.random.uniform(low=0.5, high=1.5, size=2)
weight_save_3m = 0.33333*np.ones((3), dtype=np.float) # bullshirt values
alpha_save_3m = np.random.uniform(low=0.5, high=1.5, size=3) 
beta_save_3m = np.random.uniform(low=0.5, high=1.5, size=3) 
        
nstride = 5
for jy in range(0,nyin,nstride):
#for jy in range(410,411,5):
        
    for ix in range(0,nxin,nstride):
    #for ix in range (600,601,5):
        

        now = datetime.now()
        current_time = now.strftime("%H:%M:%S")
        tktr = tktr+1  # number of grid points processed
        print ('************ time, jy, ix, lon, lat = ',\
            current_time, jy, ix, tktr, nyin*nxin, lons_1d[ix], lats_1d[jy])
        if pflag == True: print ('   Before fraczero_possamps, jy,ix, \
            nsamps, time = ', jy,ix, nxin*nyin, current_time)
            
        # ---- there is a grib compaction error that can give negative
        #      values slightly smaller than teeny precip.  to make sure 
        #      that we don't have either negative values or lots of the 
        #      same tiny values, subtractoff teeny_precip
        
        if pflag == True: print ('   before subtracting teeny_precip, max, min = ', \
            np.max(precip_ens[:,jy,ix]), np.min(precip_ens[:,jy,ix]))
        precip_ens_1d = precip_ens[:,jy,ix]
        tp = np.min( [np.abs(np.min(precip_ens_1d)), 0.0] )
        teeny_precip = tp*np.ones(nsamps)
        if pflag == True: print ('   tp = ', tp)
        precip_ens_1d = precip_ens_1d - teeny_precip[:]

        fraction_zero, precip_ens_nonzero, nz = \
            fraczero_possamps(nsamps, precip_ens_1d) # return sorted
        if pflag == True: print ('   precip_ens_nonzero[-400:-1] = ',\
            precip_ens_nonzero[-400:-1])
        fzero[jy,ix] = fraction_zero 
        if nz > 0:
            pmean = np.mean(precip_ens_nonzero)
        else:
            pmean = 0.0
        if pflag == True: print ('   After fraczero_possamps, jy,ix = ', \
            jy,ix, nxin*nyin, current_time)
        if pflag == True: print ('   number of samples with positive precip = ', nz)
        if pflag == True: print ('   precip_ens_nonzero[-10:] = ', \
            precip_ens_nonzero[-20:])
        if nz > 40 and precip_ens_nonzero[-1] > 2.0:
        
            # --- first fit a single Gamma distribution per Wilks; determine 
            #     the quantiles associated with every 1/20th percentile 
            #     (0.05 to 0.95) with a few extras, if there are enough samples.
            #     With smaller number of samples, do the sampling at a fewer
            #     number of quantiles
        
            query_these_indices, empirical_CDF, empirical_precipvals, \
                pmean, alpha_hat, beta_hat, i1, Dnstat1, \
                weight_save_1m, alpha_save_1m, beta_save_1m = \
                one_parameter_gamma(nz, precip_ens_nonzero, pflag, fraction_zero)
            Dnstat1a[jy,ix] = Dnstat1
            
            # ---- decide if Dn excessive.   If so, then try a 2-component 
            #      Gamma mixture model.  excessive_threshold inspired 
            #      by Wilks textbook, Table 5.2, assume for simplicity alpha=1
            #      since there is only a weak dependence on alpha.
            
            excessive_threshold = 0.02
            if pflag == True: print ('excessive_threshold = ', excessive_threshold)  
            if Dnstat1 > excessive_threshold:
                
                two_parameter_fail, weights, alpha, beta, Dnstat2, \
                    weight_save_2m, alpha_save_2m, beta_save_2m, nmixture = \
                    two_parameter_gamma(jy, ix, precip_ens_nonzero, \
                    empirical_precipvals, empirical_CDF, weights, \
                    alpha, beta, i1, pflag, weight_save_2m, alpha_save_2m, \
                    beta_save_2m, nmixture, fraction_zero, nstride)
                Dnstat2a[jy,ix] = Dnstat2
                    
                if two_parameter_fail == True or Dnstat2 > excessive_threshold:
                
                    # ---- try 3-parameter Gamma
                
                    three_parameter_fail, weights, alpha, beta, Dnstat3,\
                        weight_save_3m, alpha_save_3m, beta_save_3m, nmixture = \
                        three_parameter_gamma(jy, ix, precip_ens_nonzero, \
                        empirical_precipvals, empirical_CDF, weights, \
                        alpha, beta, i1, pflag, weight_save_3m, alpha_save_3m, \
                        beta_save_3m, nmixture, fraction_zero, nstride)
                    Dnstat3a[jy,ix] = Dnstat3
                    #print ('Dnstat3 = ', Dnstat3)
                    
                    if three_parameter_fail == True or Dnstat3 > excessive_threshold: 
                        
                        #print ('np.shape(nmixture) = ', np.shape(nmixture))
                        #print ('jy,ix, weights, alpha, beta, pflag = ', jy,ix, \
                        #    weights[:,0,0], alpha[:,0,0], beta[:,0,0], pflag)
                        #print ('Dnstat 1 2 3 = ', Dnstat1, Dnstat2, Dnstat3)
                        #print ('alpha_save_1m, beta_save_1m, weight_save_1m = ', \
                        #    alpha_save_1m, beta_save_1m, weight_save_1m )
                        #print ('alpha_save_2m, beta_save_2m, weight_save_2m = ', \
                        #    alpha_save_2m, beta_save_2m, weight_save_2m )
                        #print ('alpha_save_3m, beta_save_3m, weight_save_3m = ', \
                        #    alpha_save_3m, beta_save_3m, weight_save_3m )
                        #print ('nmixture[0,0] = ', nmixture[0,0], nmixture[-1,-1])
                        
                        weights, alpha, beta, nmixture = decide_which_mixture(\
                        jy, ix, weights, alpha, beta, nmixture, pflag, \
                        Dnstat1, Dnstat2, Dnstat3, alpha_save_1m, beta_save_1m, \
                        weight_save_1m, alpha_save_2m, beta_save_2m, \
                        weight_save_2m, alpha_save_3m, beta_save_3m, \
                        weight_save_3m)

            else: # ok to use simple single Gamma
            
                weights[0,jy,ix] = 1.0
                alpha[0,jy,ix] = alpha_hat
                beta[0,jy,ix] = beta_hat
                weights[1:,jy,ix] = 0.0
                alpha[1:,jy,ix] = 1.0
                beta[1:,jy,ix] = 1.0
                nmixture[jy,ix] = 1
            
        else: 
            
            # --- very few positive samples, or light precip; fit an simple maximum
            #      likelihood distribution to single mode Gamma distribution
            
            if pflag == True: print ('   very light precipitation at this grid point. ', pmean)
            if nz < 2:  # not long enough sample to find some nonzero precip amounts
                alpha_hat = 1.0
                beta_hat = 100.0
            else:  # use maximum likelihood method of Wilks text to fit non-mixture distribution.
                pmean = np.mean(precip_ens_nonzero)
                lnxbar = np.log(pmean)
                meanlnxi = np.mean(np.sum(np.log(precip_ens_nonzero)))
                #if pflag == True: print('   lnxbar, meanlnxi = ',lnxbar, meanlnxi)
                D = lnxbar - meanlnxi
                alpha_hat = (1.0 + np.sqrt(1.0+4.0*D/3.0)) / (4.0*D)
                beta_hat = pmean / alpha_hat
                if pflag == True: print('   single distribution alpha, beta = ', alpha_hat, beta_hat)
            weights[0,jy,ix] = 1.0
            alpha[0,jy,ix] = alpha_hat
            beta[0,jy,ix] = beta_hat
            weights[1:,jy,ix] = 0.0
            alpha[1:,jy,ix] = 1.0
            beta[1:,jy,ix] = 1.0
            nmixture[jy,ix] = 1

# --- save to cPickle file

#outfile = master_directory + cmonth+ '_apcp_gamma_parameters_h' + clead + '.cPick'
#outfile = master_directory + cmonth+ '_apcp_gamma_parameters_v2_h' + clead + '.cPick'
outfile = master_directory + cmonth+ '_apcp_gamma_parameters_v3_eps003_maxit60_h' + clead + '.cPick'
#outfile = master_directory + cmonth+ '_apcp_gamma_parameters_singlepoint_eps003_maxit60_h' + clead + '.cPick'
print ('writing to ', outfile)
ouf = open(outfile, 'wb')
cPickle.dump(weights, ouf)
cPickle.dump(alpha, ouf)
cPickle.dump(beta, ouf)
cPickle.dump(fzero, ouf)
cPickle.dump(Dnstat1a, ouf)
cPickle.dump(Dnstat2a, ouf)
cPickle.dump(Dnstat3a, ouf)
cPickle.dump(nmixture, ouf)
ouf.close()



                                                                                                                                                                                                                        ._CDF_fitting_forecast_precip_v5.py                                                                 000775  000765  000024  00000000470 14016263124 017536  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2       8                                      ATTR      8                            com.apple.lastuseddate#PS      (   	  com.macromates.bookmarks   1     com.macromates.selectionRange      4     com.macromates.visibleIndex  `    	     ( '359' )6404506                                                                                                                                                                                                        CDF_fitting_forecast_precip_v5.py                                                                   000775  000765  000024  00000072315 14016263124 017330  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         """
CDF_fitting_forecast_precip_v5.py cmonth clead cdomain

this python script is designed to fit a mixture of Gamma CDFs to closely
match an empirical distribution of precipitation.   Up to 3 Gamma distributions
can be mixed, though 1 or 2 is preferable if they provide a sufficiently
close fit.   The script is tailored to GEFSv12 forecasts over one of the
National Digital Forecast Database domains for the National Blend of
Models

Designed by Tom Hamill, NOAA, with help from Michael Scheuerer, Dec 2020

"""

import os, sys
from datetime import datetime
import numpy as np
import _pickle as cPickle
from netCDF4 import Dataset
import _pickle as cPickle
import scipy.stats as stats
from gammamix import gammamix_em

# =====================================================================

def set_domain_boundaries(cdomain):
    
    """ used grib file of 2.5-km blend output grid to determine bounding 
        lat and lon, and from that, the domain bounding indices for the 
        0.25 GEFSv12 reforecast data that will encompass the domain.    
    """
    if cdomain == 'conus': 
        jmin = 93
        jmax = 246
        imin = 368
        imax = 686
    elif cdomain == 'pr':
        jmin = 243
        jmax = 256
        imin = 649
        imax = 667   
    elif cdomain == 'ak':
        jmin = 19
        jmax = 161
        imin = 201
        imax = 967
    else:
        print ('invalid domain.  Exiting.')     
        sys.exit()    
 
    return jmin, jmax, imin, imax
 
# =====================================================================

def find_nearest(vec, value):
    
    """ given a vector vec and a particular value, find the index in vec
    that is nearest to value"""
    
    idx = np.abs(vec-value).argmin()
    return idx

# =====================================================================

def fraczero_possamps(nsamps, precip_ens):
    
    """
    
    from the vector input sample precip_ens, define the fraction of
    samples with zero precipitation.   For the positive samples, add
    a small random number to deal with the fact that the data was 
    discretized to ~0.1 mm, so that when later creating CDFs we don't 
    have empirical values with lots of tied amounts.  Also, sort the 
    nonzero amounts and return.
    
    """
    
    number_zeros = 0
    precip_ens_nonzero = np.delete(precip_ens, \
        np.where(precip_ens <= 0.006))  # censor at 0.006 mm
    nz = len(precip_ens_nonzero)
    # data discretized, so add random component of this magnitude
    precip_ens_nonzero = precip_ens_nonzero + \
        np.random.uniform(low=-0.005,high=0.005,size=nz) 
    precip_ens_nonzero = np.sort(precip_ens_nonzero)  
    ntotal = len(precip_ens)
    nzero = ntotal - len(precip_ens_nonzero)
    fraction_zero = float(nzero) / float(ntotal)
    return fraction_zero, precip_ens_nonzero, nz

# =====================================================================

def one_parameter_gamma(nz, precip_ens_nonzero, pflag, fraction_zero):

    """ 
    Along with some other ancillary stuff, return the fitted Gamma distribution
    alpha and beta values, along with an evaluation (Dnstat) of how 
    closely the fitted CDF matches the empirical CDF.   Dnstat follows
    the Dn statistics discussed in Wilks (2011) Statistical Methods in
    the Atmospheric Sciences text for distribution fitting. 
    
    inputs:  
    
    nz: number of nonzero samples
    precip_ens_nonzero[nz]:   sorted vector of nonzero precip amounts (mm)
    pflag: print flag, true if diagnostic printing desired
    fraction_zero: fraction of samples that are zero
    
    """

    # ---- define the indices in the previously sorted precip_ens_nonzero vector
    #      that are at the 0.05, 0.10, .... , 0.95 quantiles of the sorted vector, + a few 
    #      others at high quantiles.   Less if there are few samples
    
    query_these_indices = [ nz//20, nz//10, (3*nz)//20, nz//5, nz//4, (3*nz)//10, \
        (7*nz)//20, (2*nz)//5, (9*nz)//20, nz//2, (11*nz)//20, (3*nz)//5, (13*nz)//20, \
        (7*nz)//10, (3*nz)//4, (4*nz)//5, (17*nz)//20, (35*nz)//40,(9*nz)//10, \
        (37*nz)//40, (19*nz)//20, (39*nz)//40, (79*nz)//80]
    empirical_precipvals = precip_ens_nonzero[query_these_indices] 
    if pflag == True: print ('   nz, query_these_indices = ', nz, query_these_indices)
    
    # ---- convert the query_these_indices into the cumulative probability
    
    empirical_CDF = np.array(query_these_indices, dtype=np.float) / float(nz) 
    
    if pflag == True: print ('   empirical_CDF = ',empirical_CDF)
    
    # ---- extract the quantiles at these cumulative probabilities
    
    if pflag == True: print ('   empirical_precipvals = ', empirical_precipvals)
    
    # ---- See Wilks Statistical Meteorology Text, section on Gamma Distribution.   Use the
    #      Thom (1958) method of maximum-likelihood estimator, and from this estimate
    #      the Gamma distribution Parameters.   Ref: Statistical Methods in the Atmospheric
    #      Sciences (3rd Ed), 2011, Daniel S. Wilks (Academic Press)
    
    pmean = np.mean(precip_ens_nonzero)
    lnxbar = np.log(pmean)
    meanlnxi = np.mean(np.log(precip_ens_nonzero))
    D = lnxbar - meanlnxi
    alpha_hat = (1.0 + np.sqrt(1.0+4.0*D/3.0)) / (4.0*D)
    beta_hat = pmean / alpha_hat
    
    # ---- Now we evaluate how good the fitted distribution matches the empirical one.
    #      Because errors in fit are largely irrelevant for the quantile mapping at very small
    #      values, we will evaluate the goodness of fit only at the minimum of: (a) 
    #      where there is precip > 1.0 mm, or (b) the 90th percentile of the empirical 
    #      distribution, whichever is smaller.   
    
    if pflag == True: print ('   D = ',D,' alpha_hat = ', alpha_hat,' beta_hat = ', beta_hat)
    y0 = empirical_precipvals / beta_hat
    fitted_CDF = stats.gamma.cdf(y0, alpha_hat)
    if pflag == True: print ('   fitted_CDF = ', fitted_CDF)
    if pflag == True: print ('   CDF differences: ', np.abs(fitted_CDF - empirical_CDF))
    
    # --- only bother evaluating Dn statistic either for quantiles associated with precip
    #     > 1.0 mm, or above the 35/40th quantile, whichever is smaller.
    
    i1 = np.min([17,np.argmin(np.abs(empirical_precipvals-1.0))])
    Dnstat = np.max(np.abs(fitted_CDF[i1:] - empirical_CDF[i1:]))
    print ('   Dnstat for 1 gamma mixture = ', Dnstat)
    weight_save_1m = 1.0
    alpha_save_1m = alpha_hat
    beta_save_1m = beta_hat
    
    return query_these_indices, empirical_CDF, empirical_precipvals, \
        pmean, alpha_hat, beta_hat, i1, Dnstat, weight_save_1m, \
        alpha_save_1m, beta_save_1m
        
# =====================================================================

def two_parameter_gamma(jy, ix, precip_ens_nonzero, empirical_precipvals, \
    empirical_CDF, weights, alpha, beta, i1, pflag, weight_save_2m, \
    alpha_save_2m, beta_save_2m, nmixture, fraction_zero, nstride, \
    Dnstat2, excessive_threshold):
    
    """
    
    Call an R gamma mixture routine to estimate the weights and parameters of
    a mixture of two Gamma distributions.  Evaluate how well it fits the empirical
    data and return weights, parameters, and a fitting statistic.
    
    inputs:
    jy, ix: grid indices
    precip_ens_nonzero: vector of nonzero precip values
    empirical_precipvals : 0.05, 0.1, ... 0.95 empirical quantiles
    empirical_CDF: roughly 0.05, 0.1, ... 0.95 but subject to number of samples
    weights: input and output array of Gamma mixture weights
    alpha: input and output array of Gamma fitted alphas
    beta: input and output array of Gamma fitted betas
    i1: index of minimum quantile to evaluate when determining goodness of fit.
    pflag: true if printing desired
    weight_save_2m: 2d-vector of saved weights from last call
    alpha_save_2m: 2d-vector of saved alpha estimates from last call
    beta_save_2m: 2d-vector of saved beta estimates from last call
    nmixture: indicates how many parameters are used in the mix
    fraction_zero: fraction of points with zero precip
    nstride: how many grid points to step in each direction
    Dnstat2: previous grid point's goodness of fit statistic
    excessive_threshold: threshold value for Dnstat that triggers action
    
    """

    two_parameter_fail = False # in case routine bombs off
    try:
        now = datetime.now()
        current_time = now.strftime("%H:%M:%S")
        
        # --- call the R-inspired routine that estimates the weights and a mixture 
        #     of two Gamma distributions. 
        
        if pflag == True: print ('calling gammamix_em 2 parameter')
        if pflag == True: print ('precip_ens_nonzero[0], [-1] = ', \
            precip_ens_nonzero[0], precip_ens_nonzero[-1] )
        if pflag == True: print ('weight_save_2m = ', weight_save_2m)
        if pflag == True: print ('alpha_save_2m = ', alpha_save_2m)
        if pflag == True: print ('beta_save_2m = ', beta_save_2m)
            
        if ix == 0: 
            best_result = gammamix_em(precip_ens_nonzero, \
                mix_prop=None, alpha=None, invbeta=None,\
                k=2, epsilon=0.003, maxit=60, maxrestarts=20, verb=False)
        #elif nmixture[jy,ix-nstride] == 2 and Dnstat2 < excessive_threshold*1.5:
        elif Dnstat2 < excessive_threshold*1.5:
            best_result = gammamix_em(precip_ens_nonzero, \
                mix_prop=weight_save_2m, alpha=alpha_save_2m, invbeta=beta_save_2m,\
                k=2, epsilon=0.003, maxit=60, maxrestarts=20, verb=False)
        else: # better to restart with new values
            best_result = gammamix_em(precip_ens_nonzero, \
                mix_prop=None, alpha=None, invbeta=None,\
                k=2, epsilon=0.003, maxit=60, maxrestarts=20, verb=False)
        
        params = best_result.params
        alpha[0:2,jy,ix] = np.array(params.alpha[:])
        beta[0:2,jy,ix] = np.array(params.invbeta[:])
        weights[0:2,jy,ix] = np.array(params.mix_prop[:])
        weights[2,jy,ix] = 0.0
        alpha[2,jy,ix] = 1.0
        beta[2,jy,ix] = 1.0
        nmixture[jy,ix] = 2
        weight_save_2m = np.array(params.mix_prop[:])
        alpha_save_2m = np.array(params.alpha[:])
        beta_save_2m = np.array(params.invbeta[:])
        
        if pflag == True: print ('   weights = ', weights[:,jy,ix])
        if pflag == True: print ('   alpha = ', alpha[:,jy,ix])
        if pflag == True: print ('   beta = ', beta[:,jy,ix])
                
        #  ---- estimate a CDF from a weighted mixture of the two distributions.  
        
        y0 = empirical_precipvals / beta[0,jy,ix]
        y1 = empirical_precipvals / beta[1,jy,ix]
        fitted_CDF0 = stats.gamma.cdf(y0, alpha[0,jy,ix])
        fitted_CDF1 = stats.gamma.cdf(y1, alpha[1,jy,ix])
        fitted_CDF = weights[0,jy,ix]*fitted_CDF0 + weights[1,jy,ix]*fitted_CDF1
        if pflag == True: print ('   fitted_CDF = ',fitted_CDF)
        
        # ---- calculate a statistic for how far off the fitted CDF is from the 
        #      empirical CDF.   Do this only for either the higher quantiles of the
        #      distribution or where the quantile exceeds 1 mm (the i1 index)
        
        Dnstat2 = np.max(np.abs(fitted_CDF[i1:] - empirical_CDF[i1:]))
        if pflag == True: print ('   np.abs(fitted_CDF - empirical_CDF) = ', \
            np.abs(fitted_CDF - empirical_CDF))
        print ('   Dnstat for 2 gamma mixture = ', Dnstat2)
    except: # something went wrong ...
        print ('   two_parameter_fail')
        two_parameter_fail = True
        Dnstat2 = 0.10

    return two_parameter_fail, weights, alpha, beta, Dnstat2, weight_save_2m, \
        alpha_save_2m, beta_save_2m, nmixture

    
# =====================================================================

def three_parameter_gamma(jy, ix, precip_ens_nonzero, empirical_precipvals, \
    empirical_CDF, weights, alpha, beta, i1, pflag, weight_save_3m, \
    alpha_save_3m, beta_save_3m, nmixture, fraction_zero, nstride, mop_up,\
    Dnstat3, excessive_threshold):    
    
    """
    
    Call an R-inspired gamma mixture routine to estimate the weights and parameters of
    a mixture of three Gamma distributions.  Evaluate how well it fits the empirical
    data and return weights, parameters, and a fitting statistic.
    
    inputs:
    jy, ix: grid indices
    precip_ens_nonzero: vector of nonzero precip values
    empirical_precipvals : 0.05, 0.1, ... 0.95 empirical quantiles
    empirical_CDF: roughly 0.05, 0.1, ... 0.95 but subject to number of samples
    weights: input and output array of Gamma mixture weights
    alpha: input and output array of Gamma fitted alphas [3, ny, nx]
    beta: input and output array of Gamma fitted betas
    i1: index of minimum quantile to evaluate when determining goodness of fit.
    pflag: true if printing desired
    weight_save_3m: first guess for weights from previous call
    alpha_save_3m: first guess for alpha
    beta_save_3m: first guess for beta
    nmixture: indicates how many parameters are used in the mix
    fraction_zero: fraction of points with zero precip
    nstride: how many grid points to step in each direction
    mop_up: flag for whether we're having a 2nd go at things to address points
        where first time through, no good fit.
    Dnstat3: previous grid point's goodness of fit statistic
    excessive_threshold: threshold value for Dnstat that triggers action
    
    """

    three_parameter_fail = False # in case routine bombs off
    try:    
    
        if pflag == True: print ('   Need to try 3 gamma mixture.')
        
        # --- call the R routine that estimates the weights and a mixture 
        #     of 3 Gamma distributions. Must convert back from R data format.
        
        if pflag == True: print ('   calling gammamix_em 3 parameter')
        if pflag == True: print ('   precip_ens_nonzero[0], [-1] = ', \
            precip_ens_nonzero[0], precip_ens_nonzero[-1] )
        if pflag == True: print ('   weight_save_3m = ', weight_save_3m)
        if pflag == True: print ('   alpha_save_3m = ', alpha_save_3m)
        if pflag == True: print ('   beta_save_3m = ', beta_save_3m)
            
        if ix == 0 or mop_up == True:
            print ('   resetting 3-parameter Gamma mixture')
            best_result = gammamix_em(precip_ens_nonzero, \
                mix_prop=None, alpha=None, invbeta=None,\
                k=3, epsilon=0.001, maxit=100, maxrestarts=20, verb=False)
        #elif nmixture[jy,ix-nstride] == 3 and Dnstat3 < excessive_threshold*1.5:
        elif Dnstat3 < excessive_threshold*1.5:
            best_result = gammamix_em(precip_ens_nonzero, \
                mix_prop=weight_save_3m, alpha=alpha_save_3m, invbeta=beta_save_3m,\
                k=3, epsilon=0.003, maxit=60, maxrestarts=20, verb=False)
        else: 
            best_result = gammamix_em(precip_ens_nonzero, \
                mix_prop=None, alpha=None, invbeta=None,\
                k=3, epsilon=0.003, maxit=60, maxrestarts=20, verb=False)   
         
        params = best_result.params
        alpha[:,jy,ix] = np.array(params.alpha[:])
        beta[:,jy,ix] = np.array(params.invbeta[:])
        weights[:,jy,ix] = np.array(params.mix_prop[:])
                
        weight_save_3m = np.array(params.mix_prop[:])
        alpha_save_3m = np.array(params.alpha[:])
        beta_save_3m = np.array(params.invbeta[:])

        if pflag == True: print ('   weights = ', weights[:,jy,ix])
        if pflag == True: print ('   alpha = ', alpha[:,jy,ix])
        if pflag == True: print ('   beta = ', beta[:,jy,ix])
        
        #  ---- estimate a CDF from a weighted mixture of the 3 distributions.  
        
        y0 = empirical_precipvals / beta[0,jy,ix]
        y1 = empirical_precipvals / beta[1,jy,ix]
        y2 = empirical_precipvals / beta[2,jy,ix]
        fitted_CDF0 = stats.gamma.cdf(y0, alpha[0,jy,ix])
        fitted_CDF1 = stats.gamma.cdf(y1, alpha[1,jy,ix])
        fitted_CDF2 = stats.gamma.cdf(y2, alpha[2,jy,ix])
        fitted_CDF = weights[0,jy,ix]*fitted_CDF0 + weights[1,jy,ix]*fitted_CDF1 + \
            weights[2,jy,ix]*fitted_CDF2
        if pflag == True: print ('   fitted_CDF = ',fitted_CDF)
        
        # ---- calculate a statistic for how far off the fitted CDF is from the 
        #      empirical CDF.   Do this only for either the higher quantiles of the
        #      distribution or where the quantile exceeds 1 mm (the i1 index)
        
        Dnstat3 = np.max(np.abs(fitted_CDF[i1:] - empirical_CDF[i1:]))
        if pflag == True: print ('   np.abs(fitted_CDF - empirical_CDF) = ', \
            np.abs(fitted_CDF - empirical_CDF))
        print ('   Dnstat for 3 gamma mixture = ', Dnstat3)
        nmixture[jy,ix] = 3
    except:
        print ('   three_parameter_fail')
        three_parameter_fail = True
        Dnstat3 = 0.10    
    
    return three_parameter_fail, weights, alpha, beta, Dnstat3, \
        weight_save_3m, alpha_save_3m, beta_save_3m, nmixture
    
# =====================================================================

def decide_which_mixture(jy, ix, weights, alpha, beta, nmixture, pflag, \
    Dnstat1, Dnstat2, Dnstat3, alpha_save_1m, beta_save_1m, \
    weight_save_1m, alpha_save_2m, beta_save_2m, \
    weight_save_2m, alpha_save_3m, beta_save_3m, \
    weight_save_3m):

    """ based on the Dn statistics for 1, 2, 3 Gamma mixtures,
        decide which mixture to use (the one with lowest Dn)
    """

    Dnstats = np.array([Dnstat1, Dnstat2, Dnstat3])
    #if pflag == True: print ('Dnstats 1,2,3,4 = ', Dnstats)
    print ('   Dnstats 1,2,3  = ', Dnstats)
    imin = np.argmin(Dnstats)
    if imin == 0:
        if pflag == True: print ('selected 1-Gamma mixture')
        weights[0,jy,ix] = 1.0
        alpha[0,jy,ix] = alpha_save_1m
        beta[0,jy,ix] = beta_save_1m
        weights[1:,jy,ix] = 0.0
        alpha[1:,jy,ix] = 1.0
        beta[1:,jy,ix] = 1.0
        nmixture[jy,ix] = 1
    elif imin == 1:
        if pflag == True: print ('selected 2-Gamma mixture')
        weights[0:2,jy,ix] = weight_save_2m[:]
        alpha[0:2,jy,ix] = alpha_save_2m[:]
        beta[0:2,jy,ix] = beta_save_2m[:]
        weights[2,jy,ix] = 0.0
        alpha[2,jy,ix] = 1.0
        beta[2,jy,ix] = 1.0
        nmixture[jy,ix] = 2
    elif imin == 2:
        if pflag == True: print ('selected 3-Gamma mixture')
        weights[:,jy,ix] = weight_save_3m[:]
        alpha[:,jy,ix] = alpha_save_3m[:]
        beta[:,jy,ix] = beta_save_3m[:]
        nmixture[jy,ix] = 3
    return weights, alpha, beta, nmixture
    
# =====================================================================
    
# ---- inputs from command line

cmonth = sys.argv[1] # 'Jan', 'Feb', etc.
clead = sys.argv[2] # 03, 06, 12, etc.
cdomain = sys.argv[3] # conus, etc.

jmin, jmax, imin, imax = set_domain_boundaries(cdomain)

# ---- set parameters

pflag = False # for print statements
master_directory = '/Volumes/Backup Plus/gefsv12/precip/netcdf/'
nmembers = 5

# ---- read in the previously generated netCDF file with precipitation
#      for this month and lead time.  All members, dates for this 
#      month have been smushed into one leading index, dimension
#      nsamps, since the date of the forecast within the month and 
#      the member number is irrelevant for the distribution fitting.
   
ncfile = master_directory + cmonth + '_apcp_h' + clead + '.nc'
if pflag == True: print (ncfile)
nc = Dataset(ncfile)
precip_ens = nc.variables['apcp_fcst'][:,jmin:jmax,imin:imax]
nsamps, nyin, nxin = np.shape(precip_ens)
lons_1d = nc.variables['lons_fcst'][imin:imax]
lats_1d = nc.variables['lats_fcst'][jmin:jmax]
nc.close()

# ---- more initialization of output storage arrays now that 
#      we know the array dimensions

weights = np.zeros((3,nyin, nxin), dtype=np.float)
alpha = np.zeros((3,nyin,nxin), dtype=np.float)
beta = np.zeros((3,nyin,nxin), dtype=np.float)
fzero = np.zeros((nyin, nxin), dtype=np.float)
nmixture = np.zeros((nyin, nxin), dtype=np.int)
Dnstat1a = 0.10*np.ones((nyin, nxin), dtype=np.float)
Dnstat2a = 0.10*np.ones((nyin, nxin), dtype=np.float)
Dnstat3a = 0.10*np.ones((nyin, nxin), dtype=np.float)

weight_save_1m = 1.0
alpha_save_1m = 1.0
beta_save_1m = 1.0
   
# ---- loop over the grid points and estimate the Gamma distributions
#      for each parameter.  First see if a single Gamma distribution
#      is appropriate; if not, try a mixture of two.   If that still
#      doesn't fit well, try a mixture of three.   
        
tktr = 0
weight_save_2m = 0.5*np.ones((2), dtype=np.float)
alpha_save_2m = np.random.uniform(low=0.5, high=1.5, size=2)
beta_save_2m =  np.random.uniform(low=0.5, high=1.5, size=2)
weight_save_3m = 0.33333*np.ones((3), dtype=np.float) # bullshirt values
alpha_save_3m = np.random.uniform(low=0.5, high=1.5, size=3) 
beta_save_3m = np.random.uniform(low=0.5, high=1.5, size=3) 
weight_save_3mop = 0.33333*np.ones((3), dtype=np.float) # bullshirt values
alpha_save_3mop = np.random.uniform(low=0.5, high=1.5, size=3) 
beta_save_3mop = np.random.uniform(low=0.5, high=1.5, size=3) 
nstride = 1
Dnstat1 = 0.10
Dnstat2 = 0.10
Dnstat3 = 0.10

now = datetime.now()
begin_time = now.strftime("%H:%M:%S")
for jy in range(0,nyin,nstride):
#for jy in range(60, 61):
        
    for ix in range(0,nxin,nstride):
    #for ix in range (200,251):
        
        mop_up = False
        now = datetime.now()
        current_time = now.strftime("%H:%M:%S")
        tktr = tktr+1  # number of grid points processed
        print ('***** begin time, current time, jy, ix, nyin, nxin, lon, lat = ',\
            begin_time, current_time, jy, ix, nyin, nxin, lons_1d[ix], lats_1d[jy])
            
        # ---- there is a grib round-off error that can give negative
        #      values slightly smaller than teeny precip.  to make sure 
        #      that we don't have either negative values or lots of the 
        #      same tiny values, subtractoff teeny_precip
        
        precip_ens_1d = precip_ens[:,jy,ix]
        tp = np.min( [np.abs(np.min(precip_ens_1d)), 0.0] )
        teeny_precip = tp*np.ones(nsamps)
        if pflag == True: print ('   tp = ', tp)
        precip_ens_1d = precip_ens_1d - teeny_precip[:]

        fraction_zero, precip_ens_nonzero, nz = \
            fraczero_possamps(nsamps, precip_ens_1d) # return sorted
        if pflag == True: print ('   precip_ens_nonzero[-400:-1] = ',\
            precip_ens_nonzero[-400:-1])
        fzero[jy,ix] = fraction_zero 
        if nz > 0:
            pmean = np.mean(precip_ens_nonzero)
        else:
            pmean = 0.0
        if pflag == True: print ('   After fraczero_possamps, jy,ix = ', \
            jy,ix, nxin*nyin, current_time)
        if pflag == True: print ('   number of samples with positive precip = ', nz)
        if pflag == True: print ('   precip_ens_nonzero[-10:] = ', \
            precip_ens_nonzero[-20:])
        if nz > 40 and precip_ens_nonzero[-1] > 2.0:
        
            # --- first fit a single Gamma distribution per Wilks; determine 
            #     the quantiles associated with every 1/20th percentile 
            #     (0.05 to 0.95) with a few extras, if there are enough samples.
            #     With smaller number of samples, do the sampling at a fewer
            #     number of quantiles
        
            query_these_indices, empirical_CDF, empirical_precipvals, \
                pmean, alpha_hat, beta_hat, i1, Dnstat1, \
                weight_save_1m, alpha_save_1m, beta_save_1m = \
                one_parameter_gamma(nz, precip_ens_nonzero, pflag, fraction_zero)
            Dnstat1a[jy,ix] = Dnstat1
            
            # ---- decide if Dn excessive.   If so, then try a 2-component 
            #      Gamma mixture model.  excessive_threshold inspired 
            #      by Wilks textbook, Table 5.2, assume for simplicity alpha=1
            #      since there is only a weak dependence on alpha.
            
            excessive_threshold = 0.025
            if pflag == True: print ('excessive_threshold = ', excessive_threshold)  
            if Dnstat1 > excessive_threshold:
                
                two_parameter_fail, weights, alpha, beta, Dnstat2, \
                    weight_save_2m, alpha_save_2m, beta_save_2m, nmixture = \
                    two_parameter_gamma(jy, ix, precip_ens_nonzero, \
                    empirical_precipvals, empirical_CDF, weights, \
                    alpha, beta, i1, pflag, weight_save_2m, alpha_save_2m, \
                    beta_save_2m, nmixture, fraction_zero, nstride, \
                    Dnstat2, excessive_threshold)
                Dnstat2a[jy,ix] = Dnstat2
                if pflag == True:
                    print ('   two_parameter_fail, Dnstat2 = ', two_parameter_fail, Dnstat2)
                    
                if two_parameter_fail == True or Dnstat2 > excessive_threshold:
                
                    # ---- try 3-parameter Gamma
                
                    three_parameter_fail, weights, alpha, beta, Dnstat3,\
                        weight_save_3m, alpha_save_3m, beta_save_3m, nmixture = \
                        three_parameter_gamma(jy, ix, precip_ens_nonzero, \
                        empirical_precipvals, empirical_CDF, weights, \
                        alpha, beta, i1, pflag, weight_save_3m, alpha_save_3m, \
                        beta_save_3m, nmixture, fraction_zero, nstride, mop_up, \
                        Dnstat3, excessive_threshold)
                    Dnstat3a[jy,ix] = Dnstat3
                    
                    if three_parameter_fail == True or Dnstat3 > excessive_threshold: 
                        
                        weights, alpha, beta, nmixture = decide_which_mixture(\
                            jy, ix, weights, alpha, beta, nmixture, pflag, \
                            Dnstat1, Dnstat2, Dnstat3, alpha_save_1m, beta_save_1m, \
                            weight_save_1m, alpha_save_2m, beta_save_2m, \
                            weight_save_2m, alpha_save_3m, beta_save_3m, \
                            weight_save_3m)

            else: # ok to use simple single Gamma
            
                weights[0,jy,ix] = 1.0
                alpha[0,jy,ix] = alpha_hat
                beta[0,jy,ix] = beta_hat
                weights[1:,jy,ix] = 0.0
                alpha[1:,jy,ix] = 1.0
                beta[1:,jy,ix] = 1.0
                nmixture[jy,ix] = 1
            
        else: 
            
            # --- very few positive samples, or light precip; fit an simple maximum
            #      likelihood distribution to single mode Gamma distribution
            
            if pflag == True: print ('   very light precipitation at this grid point. ', pmean)
            if nz < 2:  # not long enough sample to find some nonzero precip amounts
                alpha_hat = 1.0
                beta_hat = 100.0
            else:  # use maximum likelihood method of Wilks text to fit non-mixture distribution.
                pmean = np.mean(precip_ens_nonzero)
                lnxbar = np.log(pmean)
                meanlnxi = np.mean(np.sum(np.log(precip_ens_nonzero)))
                D = lnxbar - meanlnxi
                alpha_hat = (1.0 + np.sqrt(1.0+4.0*D/3.0)) / (4.0*D)
                beta_hat = pmean / alpha_hat
                if pflag == True: print('   single distribution alpha, beta = ', alpha_hat, beta_hat)
            weights[0,jy,ix] = 1.0
            alpha[0,jy,ix] = alpha_hat
            beta[0,jy,ix] = beta_hat
            weights[1:,jy,ix] = 0.0
            alpha[1:,jy,ix] = 1.0
            beta[1:,jy,ix] = 1.0
            nmixture[jy,ix] = 1

        # ----- sometimes, by using the previous grid point's estimate as a prior mixture, it can
        #       go off into non-sensical parameter space.   Check for this, and if so, initiate 
        #       a final 3-parameter mixture without using prior grid point's saved values.

        if Dnstat1a[jy,ix] > excessive_threshold and \
        Dnstat2a[jy,ix] > excessive_threshold and \
        Dnstat3a[jy,ix] > excessive_threshold:
        
            mop_up = True
            print ('   mopping up! ')
            three_parameter_fail, weights, alpha, beta, Dnstat3mop,\
                weight_save_3mop, alpha_save_3mop, beta_save_3mop, nmixture = \
                three_parameter_gamma(jy, ix, precip_ens_nonzero, \
                empirical_precipvals, empirical_CDF, weights, \
                alpha, beta, i1, pflag, weight_save_3mop, alpha_save_3mop, \
                beta_save_3mop, nmixture, fraction_zero, nstride, mop_up, \
                Dnstat3, excessive_threshold)

            if Dnstat3mop < Dnstat3a[jy,ix]:
                Dnstat3 = Dnstat3mop
                weight_save_3m = weight_save_3mop
                alpha_save_3m = alpha_save_3mop
                beta_save_3m = beta_save_3mop
                                
            weights, alpha, beta, nmixture = decide_which_mixture(\
                jy, ix, weights, alpha, beta, nmixture, pflag, \
                Dnstat1, Dnstat2, Dnstat3, alpha_save_1m, beta_save_1m, \
                weight_save_1m, alpha_save_2m, beta_save_2m, \
                weight_save_2m, alpha_save_3m, beta_save_3m, \
                weight_save_3m)
            
# --- save to cPickle file

outfile = master_directory + cmonth+'_'+cdomain+'_apcp_gamma_parameters_h' + clead + '.cPick'
#outfile = master_directory + cmonth+'_'+cdomain+'_apcp_gamma_parameters_test_h' + clead + '.cPick'

print ('writing to ', outfile)
ouf = open(outfile, 'wb')
cPickle.dump(weights, ouf)
cPickle.dump(alpha, ouf)
cPickle.dump(beta, ouf)
cPickle.dump(fzero, ouf)
cPickle.dump(Dnstat1a, ouf)
cPickle.dump(Dnstat2a, ouf)
cPickle.dump(Dnstat3a, ouf)
cPickle.dump(nmixture, ouf)
ouf.close()



                                                                                                                                                                                                                                                                                                                   ._CDF_fitting_forecast_precip_v6.py                                                                 000755  000765  000024  00000000471 14071641717 017547  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2       9                                      ATTR      9     !                      com.apple.lastuseddate#PS      (   	  com.macromates.bookmarks   1     com.macromates.selectionRange      4     com.macromates.visibleIndex  `    	     ( '359' )64010510                                                                                                                                                                                                       CDF_fitting_forecast_precip_v6.py                                                                   000755  000765  000024  00000072315 14071641717 017340  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         """
CDF_fitting_forecast_precip_v5.py cmonth clead cdomain

this python script is designed to fit a mixture of Gamma CDFs to closely
match an empirical distribution of precipitation.   Up to 3 Gamma distributions
can be mixed, though 1 or 2 is preferable if they provide a sufficiently
close fit.   The script is tailored to GEFSv12 forecasts over one of the
National Digital Forecast Database domains for the National Blend of
Models

Designed by Tom Hamill, NOAA, with help from Michael Scheuerer, Dec 2020

"""

import os, sys
from datetime import datetime
import numpy as np
import _pickle as cPickle
from netCDF4 import Dataset
import _pickle as cPickle
import scipy.stats as stats
from gammamix import gammamix_em

# =====================================================================

def set_domain_boundaries(cdomain):
    
    """ used grib file of 2.5-km blend output grid to determine bounding 
        lat and lon, and from that, the domain bounding indices for the 
        0.25 GEFSv12 reforecast data that will encompass the domain.    
    """
    if cdomain == 'conus': 
        jmin = 93
        jmax = 246
        imin = 368
        imax = 686
    elif cdomain == 'pr':
        jmin = 243
        jmax = 256
        imin = 649
        imax = 667   
    elif cdomain == 'ak':
        jmin = 19
        jmax = 161
        imin = 201
        imax = 967
    else:
        print ('invalid domain.  Exiting.')     
        sys.exit()    
 
    return jmin, jmax, imin, imax
 
# =====================================================================

def find_nearest(vec, value):
    
    """ given a vector vec and a particular value, find the index in vec
    that is nearest to value"""
    
    idx = np.abs(vec-value).argmin()
    return idx

# =====================================================================

def fraczero_possamps(nsamps, precip_ens):
    
    """
    
    from the vector input sample precip_ens, define the fraction of
    samples with zero precipitation.   For the positive samples, add
    a small random number to deal with the fact that the data was 
    discretized to ~0.1 mm, so that when later creating CDFs we don't 
    have empirical values with lots of tied amounts.  Also, sort the 
    nonzero amounts and return.
    
    """
    
    number_zeros = 0
    precip_ens_nonzero = np.delete(precip_ens, \
        np.where(precip_ens <= 0.006))  # censor at 0.006 mm
    nz = len(precip_ens_nonzero)
    # data discretized, so add random component of this magnitude
    precip_ens_nonzero = precip_ens_nonzero + \
        np.random.uniform(low=-0.005,high=0.005,size=nz) 
    precip_ens_nonzero = np.sort(precip_ens_nonzero)  
    ntotal = len(precip_ens)
    nzero = ntotal - len(precip_ens_nonzero)
    fraction_zero = float(nzero) / float(ntotal)
    return fraction_zero, precip_ens_nonzero, nz

# =====================================================================

def one_parameter_gamma(nz, precip_ens_nonzero, pflag, fraction_zero):

    """ 
    Along with some other ancillary stuff, return the fitted Gamma distribution
    alpha and beta values, along with an evaluation (Dnstat) of how 
    closely the fitted CDF matches the empirical CDF.   Dnstat follows
    the Dn statistics discussed in Wilks (2011) Statistical Methods in
    the Atmospheric Sciences text for distribution fitting. 
    
    inputs:  
    
    nz: number of nonzero samples
    precip_ens_nonzero[nz]:   sorted vector of nonzero precip amounts (mm)
    pflag: print flag, true if diagnostic printing desired
    fraction_zero: fraction of samples that are zero
    
    """

    # ---- define the indices in the previously sorted precip_ens_nonzero vector
    #      that are at the 0.05, 0.10, .... , 0.95 quantiles of the sorted vector, + a few 
    #      others at high quantiles.   Less if there are few samples
    
    query_these_indices = [ nz//20, nz//10, (3*nz)//20, nz//5, nz//4, (3*nz)//10, \
        (7*nz)//20, (2*nz)//5, (9*nz)//20, nz//2, (11*nz)//20, (3*nz)//5, (13*nz)//20, \
        (7*nz)//10, (3*nz)//4, (4*nz)//5, (17*nz)//20, (35*nz)//40,(9*nz)//10, \
        (37*nz)//40, (19*nz)//20, (39*nz)//40, (79*nz)//80]
    empirical_precipvals = precip_ens_nonzero[query_these_indices] 
    if pflag == True: print ('   nz, query_these_indices = ', nz, query_these_indices)
    
    # ---- convert the query_these_indices into the cumulative probability
    
    empirical_CDF = np.array(query_these_indices, dtype=np.float) / float(nz) 
    
    if pflag == True: print ('   empirical_CDF = ',empirical_CDF)
    
    # ---- extract the quantiles at these cumulative probabilities
    
    if pflag == True: print ('   empirical_precipvals = ', empirical_precipvals)
    
    # ---- See Wilks Statistical Meteorology Text, section on Gamma Distribution.   Use the
    #      Thom (1958) method of maximum-likelihood estimator, and from this estimate
    #      the Gamma distribution Parameters.   Ref: Statistical Methods in the Atmospheric
    #      Sciences (3rd Ed), 2011, Daniel S. Wilks (Academic Press)
    
    pmean = np.mean(precip_ens_nonzero)
    lnxbar = np.log(pmean)
    meanlnxi = np.mean(np.log(precip_ens_nonzero))
    D = lnxbar - meanlnxi
    alpha_hat = (1.0 + np.sqrt(1.0+4.0*D/3.0)) / (4.0*D)
    beta_hat = pmean / alpha_hat
    
    # ---- Now we evaluate how good the fitted distribution matches the empirical one.
    #      Because errors in fit are largely irrelevant for the quantile mapping at very small
    #      values, we will evaluate the goodness of fit only at the minimum of: (a) 
    #      where there is precip > 1.0 mm, or (b) the 90th percentile of the empirical 
    #      distribution, whichever is smaller.   
    
    if pflag == True: print ('   D = ',D,' alpha_hat = ', alpha_hat,' beta_hat = ', beta_hat)
    y0 = empirical_precipvals / beta_hat
    fitted_CDF = stats.gamma.cdf(y0, alpha_hat)
    if pflag == True: print ('   fitted_CDF = ', fitted_CDF)
    if pflag == True: print ('   CDF differences: ', np.abs(fitted_CDF - empirical_CDF))
    
    # --- only bother evaluating Dn statistic either for quantiles associated with precip
    #     > 1.0 mm, or above the 35/40th quantile, whichever is smaller.
    
    i1 = np.min([17,np.argmin(np.abs(empirical_precipvals-1.0))])
    Dnstat = np.max(np.abs(fitted_CDF[i1:] - empirical_CDF[i1:]))
    print ('   Dnstat for 1 gamma mixture = ', Dnstat)
    weight_save_1m = 1.0
    alpha_save_1m = alpha_hat
    beta_save_1m = beta_hat
    
    return query_these_indices, empirical_CDF, empirical_precipvals, \
        pmean, alpha_hat, beta_hat, i1, Dnstat, weight_save_1m, \
        alpha_save_1m, beta_save_1m
        
# =====================================================================

def two_parameter_gamma(jy, ix, precip_ens_nonzero, empirical_precipvals, \
    empirical_CDF, weights, alpha, beta, i1, pflag, weight_save_2m, \
    alpha_save_2m, beta_save_2m, nmixture, fraction_zero, nstride, \
    Dnstat2, excessive_threshold):
    
    """
    
    Call an R gamma mixture routine to estimate the weights and parameters of
    a mixture of two Gamma distributions.  Evaluate how well it fits the empirical
    data and return weights, parameters, and a fitting statistic.
    
    inputs:
    jy, ix: grid indices
    precip_ens_nonzero: vector of nonzero precip values
    empirical_precipvals : 0.05, 0.1, ... 0.95 empirical quantiles
    empirical_CDF: roughly 0.05, 0.1, ... 0.95 but subject to number of samples
    weights: input and output array of Gamma mixture weights
    alpha: input and output array of Gamma fitted alphas
    beta: input and output array of Gamma fitted betas
    i1: index of minimum quantile to evaluate when determining goodness of fit.
    pflag: true if printing desired
    weight_save_2m: 2d-vector of saved weights from last call
    alpha_save_2m: 2d-vector of saved alpha estimates from last call
    beta_save_2m: 2d-vector of saved beta estimates from last call
    nmixture: indicates how many parameters are used in the mix
    fraction_zero: fraction of points with zero precip
    nstride: how many grid points to step in each direction
    Dnstat2: previous grid point's goodness of fit statistic
    excessive_threshold: threshold value for Dnstat that triggers action
    
    """

    two_parameter_fail = False # in case routine bombs off
    try:
        now = datetime.now()
        current_time = now.strftime("%H:%M:%S")
        
        # --- call the R-inspired routine that estimates the weights and a mixture 
        #     of two Gamma distributions. 
        
        if pflag == True: print ('calling gammamix_em 2 parameter')
        if pflag == True: print ('precip_ens_nonzero[0], [-1] = ', \
            precip_ens_nonzero[0], precip_ens_nonzero[-1] )
        if pflag == True: print ('weight_save_2m = ', weight_save_2m)
        if pflag == True: print ('alpha_save_2m = ', alpha_save_2m)
        if pflag == True: print ('beta_save_2m = ', beta_save_2m)
            
        if ix == 0: 
            best_result = gammamix_em(precip_ens_nonzero, \
                mix_prop=None, alpha=None, invbeta=None,\
                k=2, epsilon=0.003, maxit=60, maxrestarts=20, verb=False)
        #elif nmixture[jy,ix-nstride] == 2 and Dnstat2 < excessive_threshold*1.5:
        elif Dnstat2 < excessive_threshold*1.5:
            best_result = gammamix_em(precip_ens_nonzero, \
                mix_prop=weight_save_2m, alpha=alpha_save_2m, invbeta=beta_save_2m,\
                k=2, epsilon=0.003, maxit=60, maxrestarts=20, verb=False)
        else: # better to restart with new values
            best_result = gammamix_em(precip_ens_nonzero, \
                mix_prop=None, alpha=None, invbeta=None,\
                k=2, epsilon=0.003, maxit=60, maxrestarts=20, verb=False)
        
        params = best_result.params
        alpha[0:2,jy,ix] = np.array(params.alpha[:])
        beta[0:2,jy,ix] = np.array(params.invbeta[:])
        weights[0:2,jy,ix] = np.array(params.mix_prop[:])
        weights[2,jy,ix] = 0.0
        alpha[2,jy,ix] = 1.0
        beta[2,jy,ix] = 1.0
        nmixture[jy,ix] = 2
        weight_save_2m = np.array(params.mix_prop[:])
        alpha_save_2m = np.array(params.alpha[:])
        beta_save_2m = np.array(params.invbeta[:])
        
        if pflag == True: print ('   weights = ', weights[:,jy,ix])
        if pflag == True: print ('   alpha = ', alpha[:,jy,ix])
        if pflag == True: print ('   beta = ', beta[:,jy,ix])
                
        #  ---- estimate a CDF from a weighted mixture of the two distributions.  
        
        y0 = empirical_precipvals / beta[0,jy,ix]
        y1 = empirical_precipvals / beta[1,jy,ix]
        fitted_CDF0 = stats.gamma.cdf(y0, alpha[0,jy,ix])
        fitted_CDF1 = stats.gamma.cdf(y1, alpha[1,jy,ix])
        fitted_CDF = weights[0,jy,ix]*fitted_CDF0 + weights[1,jy,ix]*fitted_CDF1
        if pflag == True: print ('   fitted_CDF = ',fitted_CDF)
        
        # ---- calculate a statistic for how far off the fitted CDF is from the 
        #      empirical CDF.   Do this only for either the higher quantiles of the
        #      distribution or where the quantile exceeds 1 mm (the i1 index)
        
        Dnstat2 = np.max(np.abs(fitted_CDF[i1:] - empirical_CDF[i1:]))
        if pflag == True: print ('   np.abs(fitted_CDF - empirical_CDF) = ', \
            np.abs(fitted_CDF - empirical_CDF))
        print ('   Dnstat for 2 gamma mixture = ', Dnstat2)
    except: # something went wrong ...
        print ('   two_parameter_fail')
        two_parameter_fail = True
        Dnstat2 = 0.10

    return two_parameter_fail, weights, alpha, beta, Dnstat2, weight_save_2m, \
        alpha_save_2m, beta_save_2m, nmixture

    
# =====================================================================

def three_parameter_gamma(jy, ix, precip_ens_nonzero, empirical_precipvals, \
    empirical_CDF, weights, alpha, beta, i1, pflag, weight_save_3m, \
    alpha_save_3m, beta_save_3m, nmixture, fraction_zero, nstride, mop_up,\
    Dnstat3, excessive_threshold):    
    
    """
    
    Call an R-inspired gamma mixture routine to estimate the weights and parameters of
    a mixture of three Gamma distributions.  Evaluate how well it fits the empirical
    data and return weights, parameters, and a fitting statistic.
    
    inputs:
    jy, ix: grid indices
    precip_ens_nonzero: vector of nonzero precip values
    empirical_precipvals : 0.05, 0.1, ... 0.95 empirical quantiles
    empirical_CDF: roughly 0.05, 0.1, ... 0.95 but subject to number of samples
    weights: input and output array of Gamma mixture weights
    alpha: input and output array of Gamma fitted alphas [3, ny, nx]
    beta: input and output array of Gamma fitted betas
    i1: index of minimum quantile to evaluate when determining goodness of fit.
    pflag: true if printing desired
    weight_save_3m: first guess for weights from previous call
    alpha_save_3m: first guess for alpha
    beta_save_3m: first guess for beta
    nmixture: indicates how many parameters are used in the mix
    fraction_zero: fraction of points with zero precip
    nstride: how many grid points to step in each direction
    mop_up: flag for whether we're having a 2nd go at things to address points
        where first time through, no good fit.
    Dnstat3: previous grid point's goodness of fit statistic
    excessive_threshold: threshold value for Dnstat that triggers action
    
    """

    three_parameter_fail = False # in case routine bombs off
    try:    
    
        if pflag == True: print ('   Need to try 3 gamma mixture.')
        
        # --- call the R routine that estimates the weights and a mixture 
        #     of 3 Gamma distributions. Must convert back from R data format.
        
        if pflag == True: print ('   calling gammamix_em 3 parameter')
        if pflag == True: print ('   precip_ens_nonzero[0], [-1] = ', \
            precip_ens_nonzero[0], precip_ens_nonzero[-1] )
        if pflag == True: print ('   weight_save_3m = ', weight_save_3m)
        if pflag == True: print ('   alpha_save_3m = ', alpha_save_3m)
        if pflag == True: print ('   beta_save_3m = ', beta_save_3m)
            
        if ix == 0 or mop_up == True:
            print ('   resetting 3-parameter Gamma mixture')
            best_result = gammamix_em(precip_ens_nonzero, \
                mix_prop=None, alpha=None, invbeta=None,\
                k=3, epsilon=0.001, maxit=100, maxrestarts=20, verb=False)
        #elif nmixture[jy,ix-nstride] == 3 and Dnstat3 < excessive_threshold*1.5:
        elif Dnstat3 < excessive_threshold*1.5:
            best_result = gammamix_em(precip_ens_nonzero, \
                mix_prop=weight_save_3m, alpha=alpha_save_3m, invbeta=beta_save_3m,\
                k=3, epsilon=0.003, maxit=60, maxrestarts=20, verb=False)
        else: 
            best_result = gammamix_em(precip_ens_nonzero, \
                mix_prop=None, alpha=None, invbeta=None,\
                k=3, epsilon=0.003, maxit=60, maxrestarts=20, verb=False)   
         
        params = best_result.params
        alpha[:,jy,ix] = np.array(params.alpha[:])
        beta[:,jy,ix] = np.array(params.invbeta[:])
        weights[:,jy,ix] = np.array(params.mix_prop[:])
                
        weight_save_3m = np.array(params.mix_prop[:])
        alpha_save_3m = np.array(params.alpha[:])
        beta_save_3m = np.array(params.invbeta[:])

        if pflag == True: print ('   weights = ', weights[:,jy,ix])
        if pflag == True: print ('   alpha = ', alpha[:,jy,ix])
        if pflag == True: print ('   beta = ', beta[:,jy,ix])
        
        #  ---- estimate a CDF from a weighted mixture of the 3 distributions.  
        
        y0 = empirical_precipvals / beta[0,jy,ix]
        y1 = empirical_precipvals / beta[1,jy,ix]
        y2 = empirical_precipvals / beta[2,jy,ix]
        fitted_CDF0 = stats.gamma.cdf(y0, alpha[0,jy,ix])
        fitted_CDF1 = stats.gamma.cdf(y1, alpha[1,jy,ix])
        fitted_CDF2 = stats.gamma.cdf(y2, alpha[2,jy,ix])
        fitted_CDF = weights[0,jy,ix]*fitted_CDF0 + weights[1,jy,ix]*fitted_CDF1 + \
            weights[2,jy,ix]*fitted_CDF2
        if pflag == True: print ('   fitted_CDF = ',fitted_CDF)
        
        # ---- calculate a statistic for how far off the fitted CDF is from the 
        #      empirical CDF.   Do this only for either the higher quantiles of the
        #      distribution or where the quantile exceeds 1 mm (the i1 index)
        
        Dnstat3 = np.max(np.abs(fitted_CDF[i1:] - empirical_CDF[i1:]))
        if pflag == True: print ('   np.abs(fitted_CDF - empirical_CDF) = ', \
            np.abs(fitted_CDF - empirical_CDF))
        print ('   Dnstat for 3 gamma mixture = ', Dnstat3)
        nmixture[jy,ix] = 3
    except:
        print ('   three_parameter_fail')
        three_parameter_fail = True
        Dnstat3 = 0.10    
    
    return three_parameter_fail, weights, alpha, beta, Dnstat3, \
        weight_save_3m, alpha_save_3m, beta_save_3m, nmixture
    
# =====================================================================

def decide_which_mixture(jy, ix, weights, alpha, beta, nmixture, pflag, \
    Dnstat1, Dnstat2, Dnstat3, alpha_save_1m, beta_save_1m, \
    weight_save_1m, alpha_save_2m, beta_save_2m, \
    weight_save_2m, alpha_save_3m, beta_save_3m, \
    weight_save_3m):

    """ based on the Dn statistics for 1, 2, 3 Gamma mixtures,
        decide which mixture to use (the one with lowest Dn)
    """

    Dnstats = np.array([Dnstat1, Dnstat2, Dnstat3])
    #if pflag == True: print ('Dnstats 1,2,3,4 = ', Dnstats)
    print ('   Dnstats 1,2,3  = ', Dnstats)
    imin = np.argmin(Dnstats)
    if imin == 0:
        if pflag == True: print ('selected 1-Gamma mixture')
        weights[0,jy,ix] = 1.0
        alpha[0,jy,ix] = alpha_save_1m
        beta[0,jy,ix] = beta_save_1m
        weights[1:,jy,ix] = 0.0
        alpha[1:,jy,ix] = 1.0
        beta[1:,jy,ix] = 1.0
        nmixture[jy,ix] = 1
    elif imin == 1:
        if pflag == True: print ('selected 2-Gamma mixture')
        weights[0:2,jy,ix] = weight_save_2m[:]
        alpha[0:2,jy,ix] = alpha_save_2m[:]
        beta[0:2,jy,ix] = beta_save_2m[:]
        weights[2,jy,ix] = 0.0
        alpha[2,jy,ix] = 1.0
        beta[2,jy,ix] = 1.0
        nmixture[jy,ix] = 2
    elif imin == 2:
        if pflag == True: print ('selected 3-Gamma mixture')
        weights[:,jy,ix] = weight_save_3m[:]
        alpha[:,jy,ix] = alpha_save_3m[:]
        beta[:,jy,ix] = beta_save_3m[:]
        nmixture[jy,ix] = 3
    return weights, alpha, beta, nmixture
    
# =====================================================================
    
# ---- inputs from command line

cmonth = sys.argv[1] # 'Jan', 'Feb', etc.
clead = sys.argv[2] # 03, 06, 12, etc.
cdomain = sys.argv[3] # conus, etc.

jmin, jmax, imin, imax = set_domain_boundaries(cdomain)

# ---- set parameters

pflag = False # for print statements
master_directory = '/Volumes/Backup Plus/gefsv12/precip/netcdf/'
nmembers = 5

# ---- read in the previously generated netCDF file with precipitation
#      for this month and lead time.  All members, dates for this 
#      month have been smushed into one leading index, dimension
#      nsamps, since the date of the forecast within the month and 
#      the member number is irrelevant for the distribution fitting.
   
ncfile = master_directory + cmonth + '_apcp_h' + clead + '.nc'
if pflag == True: print (ncfile)
nc = Dataset(ncfile)
precip_ens = nc.variables['apcp_fcst'][:,jmin:jmax,imin:imax]
nsamps, nyin, nxin = np.shape(precip_ens)
lons_1d = nc.variables['lons_fcst'][imin:imax]
lats_1d = nc.variables['lats_fcst'][jmin:jmax]
nc.close()

# ---- more initialization of output storage arrays now that 
#      we know the array dimensions

weights = np.zeros((3,nyin, nxin), dtype=np.float)
alpha = np.zeros((3,nyin,nxin), dtype=np.float)
beta = np.zeros((3,nyin,nxin), dtype=np.float)
fzero = np.zeros((nyin, nxin), dtype=np.float)
nmixture = np.zeros((nyin, nxin), dtype=np.int)
Dnstat1a = 0.10*np.ones((nyin, nxin), dtype=np.float)
Dnstat2a = 0.10*np.ones((nyin, nxin), dtype=np.float)
Dnstat3a = 0.10*np.ones((nyin, nxin), dtype=np.float)

weight_save_1m = 1.0
alpha_save_1m = 1.0
beta_save_1m = 1.0
   
# ---- loop over the grid points and estimate the Gamma distributions
#      for each parameter.  First see if a single Gamma distribution
#      is appropriate; if not, try a mixture of two.   If that still
#      doesn't fit well, try a mixture of three.   
        
tktr = 0
weight_save_2m = 0.5*np.ones((2), dtype=np.float)
alpha_save_2m = np.random.uniform(low=0.5, high=1.5, size=2)
beta_save_2m =  np.random.uniform(low=0.5, high=1.5, size=2)
weight_save_3m = 0.33333*np.ones((3), dtype=np.float) # bullshirt values
alpha_save_3m = np.random.uniform(low=0.5, high=1.5, size=3) 
beta_save_3m = np.random.uniform(low=0.5, high=1.5, size=3) 
weight_save_3mop = 0.33333*np.ones((3), dtype=np.float) # bullshirt values
alpha_save_3mop = np.random.uniform(low=0.5, high=1.5, size=3) 
beta_save_3mop = np.random.uniform(low=0.5, high=1.5, size=3) 
nstride = 1
Dnstat1 = 0.10
Dnstat2 = 0.10
Dnstat3 = 0.10

now = datetime.now()
begin_time = now.strftime("%H:%M:%S")
for jy in range(0,nyin,nstride):
#for jy in range(60, 61):
        
    for ix in range(0,nxin,nstride):
    #for ix in range (200,251):
        
        mop_up = False
        now = datetime.now()
        current_time = now.strftime("%H:%M:%S")
        tktr = tktr+1  # number of grid points processed
        print ('***** begin time, current time, jy, ix, nyin, nxin, lon, lat = ',\
            begin_time, current_time, jy, ix, nyin, nxin, lons_1d[ix], lats_1d[jy])
            
        # ---- there is a grib round-off error that can give negative
        #      values slightly smaller than teeny precip.  to make sure 
        #      that we don't have either negative values or lots of the 
        #      same tiny values, subtractoff teeny_precip
        
        precip_ens_1d = precip_ens[:,jy,ix]
        tp = np.min( [np.abs(np.min(precip_ens_1d)), 0.0] )
        teeny_precip = tp*np.ones(nsamps)
        if pflag == True: print ('   tp = ', tp)
        precip_ens_1d = precip_ens_1d - teeny_precip[:]

        fraction_zero, precip_ens_nonzero, nz = \
            fraczero_possamps(nsamps, precip_ens_1d) # return sorted
        if pflag == True: print ('   precip_ens_nonzero[-400:-1] = ',\
            precip_ens_nonzero[-400:-1])
        fzero[jy,ix] = fraction_zero 
        if nz > 0:
            pmean = np.mean(precip_ens_nonzero)
        else:
            pmean = 0.0
        if pflag == True: print ('   After fraczero_possamps, jy,ix = ', \
            jy,ix, nxin*nyin, current_time)
        if pflag == True: print ('   number of samples with positive precip = ', nz)
        if pflag == True: print ('   precip_ens_nonzero[-10:] = ', \
            precip_ens_nonzero[-20:])
        if nz > 40 and precip_ens_nonzero[-1] > 2.0:
        
            # --- first fit a single Gamma distribution per Wilks; determine 
            #     the quantiles associated with every 1/20th percentile 
            #     (0.05 to 0.95) with a few extras, if there are enough samples.
            #     With smaller number of samples, do the sampling at a fewer
            #     number of quantiles
        
            query_these_indices, empirical_CDF, empirical_precipvals, \
                pmean, alpha_hat, beta_hat, i1, Dnstat1, \
                weight_save_1m, alpha_save_1m, beta_save_1m = \
                one_parameter_gamma(nz, precip_ens_nonzero, pflag, fraction_zero)
            Dnstat1a[jy,ix] = Dnstat1
            
            # ---- decide if Dn excessive.   If so, then try a 2-component 
            #      Gamma mixture model.  excessive_threshold inspired 
            #      by Wilks textbook, Table 5.2, assume for simplicity alpha=1
            #      since there is only a weak dependence on alpha.
            
            excessive_threshold = 0.025
            if pflag == True: print ('excessive_threshold = ', excessive_threshold)  
            if Dnstat1 > excessive_threshold:
                
                two_parameter_fail, weights, alpha, beta, Dnstat2, \
                    weight_save_2m, alpha_save_2m, beta_save_2m, nmixture = \
                    two_parameter_gamma(jy, ix, precip_ens_nonzero, \
                    empirical_precipvals, empirical_CDF, weights, \
                    alpha, beta, i1, pflag, weight_save_2m, alpha_save_2m, \
                    beta_save_2m, nmixture, fraction_zero, nstride, \
                    Dnstat2, excessive_threshold)
                Dnstat2a[jy,ix] = Dnstat2
                if pflag == True:
                    print ('   two_parameter_fail, Dnstat2 = ', two_parameter_fail, Dnstat2)
                    
                if two_parameter_fail == True or Dnstat2 > excessive_threshold:
                
                    # ---- try 3-parameter Gamma
                
                    three_parameter_fail, weights, alpha, beta, Dnstat3,\
                        weight_save_3m, alpha_save_3m, beta_save_3m, nmixture = \
                        three_parameter_gamma(jy, ix, precip_ens_nonzero, \
                        empirical_precipvals, empirical_CDF, weights, \
                        alpha, beta, i1, pflag, weight_save_3m, alpha_save_3m, \
                        beta_save_3m, nmixture, fraction_zero, nstride, mop_up, \
                        Dnstat3, excessive_threshold)
                    Dnstat3a[jy,ix] = Dnstat3
                    
                    if three_parameter_fail == True or Dnstat3 > excessive_threshold: 
                        
                        weights, alpha, beta, nmixture = decide_which_mixture(\
                            jy, ix, weights, alpha, beta, nmixture, pflag, \
                            Dnstat1, Dnstat2, Dnstat3, alpha_save_1m, beta_save_1m, \
                            weight_save_1m, alpha_save_2m, beta_save_2m, \
                            weight_save_2m, alpha_save_3m, beta_save_3m, \
                            weight_save_3m)

            else: # ok to use simple single Gamma
            
                weights[0,jy,ix] = 1.0
                alpha[0,jy,ix] = alpha_hat
                beta[0,jy,ix] = beta_hat
                weights[1:,jy,ix] = 0.0
                alpha[1:,jy,ix] = 1.0
                beta[1:,jy,ix] = 1.0
                nmixture[jy,ix] = 1
            
        else: 
            
            # --- very few positive samples, or light precip; fit an simple maximum
            #      likelihood distribution to single mode Gamma distribution
            
            if pflag == True: print ('   very light precipitation at this grid point. ', pmean)
            if nz < 2:  # not long enough sample to find some nonzero precip amounts
                alpha_hat = 1.0
                beta_hat = 100.0
            else:  # use maximum likelihood method of Wilks text to fit non-mixture distribution.
                pmean = np.mean(precip_ens_nonzero)
                lnxbar = np.log(pmean)
                meanlnxi = np.mean(np.sum(np.log(precip_ens_nonzero)))
                D = lnxbar - meanlnxi
                alpha_hat = (1.0 + np.sqrt(1.0+4.0*D/3.0)) / (4.0*D)
                beta_hat = pmean / alpha_hat
                if pflag == True: print('   single distribution alpha, beta = ', alpha_hat, beta_hat)
            weights[0,jy,ix] = 1.0
            alpha[0,jy,ix] = alpha_hat
            beta[0,jy,ix] = beta_hat
            weights[1:,jy,ix] = 0.0
            alpha[1:,jy,ix] = 1.0
            beta[1:,jy,ix] = 1.0
            nmixture[jy,ix] = 1

        # ----- sometimes, by using the previous grid point's estimate as a prior mixture, it can
        #       go off into non-sensical parameter space.   Check for this, and if so, initiate 
        #       a final 3-parameter mixture without using prior grid point's saved values.

        if Dnstat1a[jy,ix] > excessive_threshold and \
        Dnstat2a[jy,ix] > excessive_threshold and \
        Dnstat3a[jy,ix] > excessive_threshold:
        
            mop_up = True
            print ('   mopping up! ')
            three_parameter_fail, weights, alpha, beta, Dnstat3mop,\
                weight_save_3mop, alpha_save_3mop, beta_save_3mop, nmixture = \
                three_parameter_gamma(jy, ix, precip_ens_nonzero, \
                empirical_precipvals, empirical_CDF, weights, \
                alpha, beta, i1, pflag, weight_save_3mop, alpha_save_3mop, \
                beta_save_3mop, nmixture, fraction_zero, nstride, mop_up, \
                Dnstat3, excessive_threshold)

            if Dnstat3mop < Dnstat3a[jy,ix]:
                Dnstat3 = Dnstat3mop
                weight_save_3m = weight_save_3mop
                alpha_save_3m = alpha_save_3mop
                beta_save_3m = beta_save_3mop
                                
            weights, alpha, beta, nmixture = decide_which_mixture(\
                jy, ix, weights, alpha, beta, nmixture, pflag, \
                Dnstat1, Dnstat2, Dnstat3, alpha_save_1m, beta_save_1m, \
                weight_save_1m, alpha_save_2m, beta_save_2m, \
                weight_save_2m, alpha_save_3m, beta_save_3m, \
                weight_save_3m)
            
# --- save to cPickle file

outfile = master_directory + cmonth+'_'+cdomain+'_apcp_gamma_parameters_h' + clead + '.cPick'
#outfile = master_directory + cmonth+'_'+cdomain+'_apcp_gamma_parameters_test_h' + clead + '.cPick'

print ('writing to ', outfile)
ouf = open(outfile, 'wb')
cPickle.dump(weights, ouf)
cPickle.dump(alpha, ouf)
cPickle.dump(beta, ouf)
cPickle.dump(fzero, ouf)
cPickle.dump(Dnstat1a, ouf)
cPickle.dump(Dnstat2a, ouf)
cPickle.dump(Dnstat3a, ouf)
cPickle.dump(nmixture, ouf)
ouf.close()



                                                                                                                                                                                                                                                                                                                   ._CDF_fitting_mswep_precip.py                                                                       000775  000765  000024  00000000471 14016263124 016452  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2       9                                      ATTR      9     !                      com.apple.lastuseddate#PS      (   	  com.macromates.bookmarks   1     com.macromates.selectionRange      4     com.macromates.visibleIndex  `    =     ( '357' )65528765                                                                                                                                                                                                       CDF_fitting_mswep_precip.py                                                                         000775  000765  000024  00000074677 14016263124 016260  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         """
CDF_fitting_mswep_precip.py cmonth cend_hour

this python script is designed to fit a mixture of Gamma CDFs to closely
match an empirical distribution of precipitation.   Up to 3 Gamma distributions
can be mixed, though 1 or 2 is preferable if they provide a sufficiently
close fit.   The script is tailored to MSWEP precipitation analyses over 
one of the National Digital Forecast Database domains for the National 
Blend of Models

Designed by Tom Hamill, NOAA, with help from Michael Scheuerer, Dec 2020

"""

import os, sys
from datetime import datetime
import numpy as np
import _pickle as cPickle
from netCDF4 import Dataset
import _pickle as cPickle
import scipy.stats as stats
from gammamix import gammamix_em

# =====================================================================

def set_domain_boundaries(cdomain):
    
    """ used grib file of 2.5-km blend output grid to determine bounding 
        lat and lon, and from that, the domain bounding indices for the 
        0.25 GEFSv12 reforecast data that will encompass the domain.    
    """
    if cdomain == 'conus': 
        jmin = 93
        jmax = 246
        imin = 368
        imax = 686
    elif cdomain == 'pr':
        jmin = 243
        jmax = 256
        imin = 649
        imax = 667   
    elif cdomain == 'ak':
        jmin = 19
        jmax = 161
        imin = 201
        imax = 967
    else:
        print ('invalid domain.  Exiting.')     
        sys.exit()    
 
    return jmin, jmax, imin, imax
 
# =====================================================================

def find_nearest(vec, value):
    
    """ given a vector vec and a particular value, find the index in vec
    that is nearest to value"""
    
    idx = np.abs(vec-value).argmin()
    return idx

# =====================================================================

def fraczero_possamps(nsamps, precip_ens):
    
    """
    
    from the vector input sample precip_ens, define the fraction of
    samples with zero precipitation.   For the positive samples, add
    a small random number to deal with the fact that the data was 
    discretized to ~0.1 mm, so that when later creating CDFs we don't 
    have empirical values with lots of tied amounts.  Also, sort the 
    nonzero amounts and return.
    
    """
    
    number_zeros = 0
    precip_ens_nonzero = np.delete(precip_ens, \
        np.where(precip_ens <= 0.006))  # censor at 0.006 mm
    nz = len(precip_ens_nonzero)
    # data discretized, so add random component of this magnitude
    precip_ens_nonzero = precip_ens_nonzero + \
        np.random.uniform(low=-0.005,high=0.005,size=nz) 
    precip_ens_nonzero = np.sort(precip_ens_nonzero)  
    ntotal = len(precip_ens)
    nzero = ntotal - len(precip_ens_nonzero)
    fraction_zero = float(nzero) / float(ntotal)
    return fraction_zero, precip_ens_nonzero, nz

# =====================================================================

def one_parameter_gamma(nz, precip_ens_nonzero, pflag, fraction_zero):

    """ 
    Along with some other ancillary stuff, return the fitted Gamma distribution
    alpha and beta values, along with an evaluation (Dnstat) of how 
    closely the fitted CDF matches the empirical CDF.   Dnstat follows
    the Dn statistics discussed in Wilks (2011) Statistical Methods in
    the Atmospheric Sciences text for distribution fitting. 
    
    inputs:  
    
    nz: number of nonzero samples
    precip_ens_nonzero[nz]:   sorted vector of nonzero precip amounts (mm)
    pflag: print flag, true if diagnostic printing desired
    fraction_zero: fraction of samples that are zero
    
    """

    # ---- define the indices in the previously sorted precip_ens_nonzero vector
    #      that are at the 0.05, 0.10, .... , 0.95 quantiles of the sorted vector, + a few 
    #      others at high quantiles.   Less if there are few samples
    
    query_these_indices = [ nz//20, nz//10, (3*nz)//20, nz//5, nz//4, (3*nz)//10, \
        (7*nz)//20, (2*nz)//5, (9*nz)//20, nz//2, (11*nz)//20, (3*nz)//5, (13*nz)//20, \
        (7*nz)//10, (3*nz)//4, (4*nz)//5, (17*nz)//20, (35*nz)//40,(9*nz)//10, \
        (37*nz)//40, (19*nz)//20, (39*nz)//40, (79*nz)//80]
    empirical_precipvals = precip_ens_nonzero[query_these_indices] 
    if pflag == True: print ('   nz, query_these_indices = ', nz, query_these_indices)
    
    # ---- convert the query_these_indices into the cumulative probability
    
    empirical_CDF = np.array(query_these_indices, dtype=np.float) / float(nz) 
    
    if pflag == True: print ('   empirical_CDF = ',empirical_CDF)
    
    # ---- extract the quantiles at these cumulative probabilities
    
    if pflag == True: print ('   empirical_precipvals = ', empirical_precipvals)
    
    # ---- See Wilks Statistical Meteorology Text, section on Gamma Distribution.   Use the
    #      Thom (1958) method of maximum-likelihood estimator, and from this estimate
    #      the Gamma distribution Parameters.   Ref: Statistical Methods in the Atmospheric
    #      Sciences (3rd Ed), 2011, Daniel S. Wilks (Academic Press)
    
    pmean = np.mean(precip_ens_nonzero)
    lnxbar = np.log(pmean)
    meanlnxi = np.mean(np.log(precip_ens_nonzero))
    D = lnxbar - meanlnxi
    alpha_hat = (1.0 + np.sqrt(1.0+4.0*D/3.0)) / (4.0*D)
    beta_hat = pmean / alpha_hat
    
    # ---- Now we evaluate how good the fitted distribution matches the empirical one.
    #      Because errors in fit are largely irrelevant for the quantile mapping at very small
    #      values, we will evaluate the goodness of fit only at the minimum of: (a) 
    #      where there is precip > 1.0 mm, or (b) the 90th percentile of the empirical 
    #      distribution, whichever is smaller.   
    
    if pflag == True: print ('   D = ',D,' alpha_hat = ', alpha_hat,' beta_hat = ', beta_hat)
    y0 = empirical_precipvals / beta_hat
    fitted_CDF = stats.gamma.cdf(y0, alpha_hat)
    if pflag == True: print ('   fitted_CDF = ', fitted_CDF)
    if pflag == True: print ('   CDF differences: ', np.abs(fitted_CDF - empirical_CDF))
    
    # --- only bother evaluating Dn statistic either for quantiles associated with precip
    #     > 1.0 mm, or above the 35/40th quantile, whichever is smaller.
    
    i1 = np.min([17,np.argmin(np.abs(empirical_precipvals-1.0))])
    Dnstat = np.max(np.abs(fitted_CDF[i1:] - empirical_CDF[i1:]))
    print ('   Dnstat for 1 gamma mixture = ', Dnstat)
    weight_save_1m = 1.0
    alpha_save_1m = alpha_hat
    beta_save_1m = beta_hat
    
    return query_these_indices, empirical_CDF, empirical_precipvals, \
        pmean, alpha_hat, beta_hat, i1, Dnstat, weight_save_1m, \
        alpha_save_1m, beta_save_1m
        
# =====================================================================

def two_parameter_gamma(jy, ix, precip_ens_nonzero, empirical_precipvals, \
    empirical_CDF, weights, alpha, beta, i1, pflag, weight_save_2m, \
    alpha_save_2m, beta_save_2m, nmixture, fraction_zero, nstride, \
    Dnstat2, excessive_threshold):
    
    """
    
    Call an R gamma mixture routine to estimate the weights and parameters of
    a mixture of two Gamma distributions.  Evaluate how well it fits the empirical
    data and return weights, parameters, and a fitting statistic.
    
    inputs:
    jy, ix: grid indices
    precip_ens_nonzero: vector of nonzero precip values
    empirical_precipvals : 0.05, 0.1, ... 0.95 empirical quantiles
    empirical_CDF: roughly 0.05, 0.1, ... 0.95 but subject to number of samples
    weights: input and output array of Gamma mixture weights
    alpha: input and output array of Gamma fitted alphas
    beta: input and output array of Gamma fitted betas
    i1: index of minimum quantile to evaluate when determining goodness of fit.
    pflag: true if printing desired
    weight_save_2m: 2d-vector of saved weights from last call
    alpha_save_2m: 2d-vector of saved alpha estimates from last call
    beta_save_2m: 2d-vector of saved beta estimates from last call
    nmixture: indicates how many parameters are used in the mix
    fraction_zero: fraction of points with zero precip
    nstride: how many grid points to step in each direction
    Dnstat2: previous grid point's goodness of fit statistic
    excessive_threshold: threshold value for Dnstat that triggers action
    
    """

    two_parameter_fail = False # in case routine bombs off
    try:
        now = datetime.now()
        current_time = now.strftime("%H:%M:%S")
        
        # --- call the R-inspired routine that estimates the weights and a mixture 
        #     of two Gamma distributions. 
        
        if pflag == True: print ('calling gammamix_em 2 parameter')
        if pflag == True: print ('precip_ens_nonzero[0], [-1] = ', \
            precip_ens_nonzero[0], precip_ens_nonzero[-1] )
        if pflag == True: print ('weight_save_2m = ', weight_save_2m)
        if pflag == True: print ('alpha_save_2m = ', alpha_save_2m)
        if pflag == True: print ('beta_save_2m = ', beta_save_2m)
            
        if ix == 0: 
            best_result = gammamix_em(precip_ens_nonzero, \
                mix_prop=None, alpha=None, invbeta=None,\
                k=2, epsilon=0.003, maxit=60, maxrestarts=20, verb=False)
        elif nmixture[jy,ix-nstride] == 2 and Dnstat2 < excessive_threshold*1.5:
            best_result = gammamix_em(precip_ens_nonzero, \
                mix_prop=weight_save_2m, alpha=alpha_save_2m, invbeta=beta_save_2m,\
                k=2, epsilon=0.003, maxit=60, maxrestarts=20, verb=False)
        else: # better to restart with new values
            best_result = gammamix_em(precip_ens_nonzero, \
                mix_prop=None, alpha=None, invbeta=None,\
                k=2, epsilon=0.003, maxit=60, maxrestarts=20, verb=False)
        
        params = best_result.params
        alpha[0:2,jy,ix] = np.array(params.alpha[:])
        beta[0:2,jy,ix] = np.array(params.invbeta[:])
        weights[0:2,jy,ix] = np.array(params.mix_prop[:])
        weights[2,jy,ix] = 0.0
        alpha[2,jy,ix] = 1.0
        beta[2,jy,ix] = 1.0
        nmixture[jy,ix] = 2
        weight_save_2m = np.array(params.mix_prop[:])
        alpha_save_2m = np.array(params.alpha[:])
        beta_save_2m = np.array(params.invbeta[:])
        
        if pflag == True: print ('   weights = ', weights[:,jy,ix])
        if pflag == True: print ('   alpha = ', alpha[:,jy,ix])
        if pflag == True: print ('   beta = ', beta[:,jy,ix])
                
        #  ---- estimate a CDF from a weighted mixture of the two distributions.  
        
        y0 = empirical_precipvals / beta[0,jy,ix]
        y1 = empirical_precipvals / beta[1,jy,ix]
        fitted_CDF0 = stats.gamma.cdf(y0, alpha[0,jy,ix])
        fitted_CDF1 = stats.gamma.cdf(y1, alpha[1,jy,ix])
        fitted_CDF = weights[0,jy,ix]*fitted_CDF0 + weights[1,jy,ix]*fitted_CDF1
        if pflag == True: print ('   fitted_CDF = ',fitted_CDF)
        
        # ---- calculate a statistic for how far off the fitted CDF is from the 
        #      empirical CDF.   Do this only for either the higher quantiles of the
        #      distribution or where the quantile exceeds 1 mm (the i1 index)
        
        Dnstat2 = np.max(np.abs(fitted_CDF[i1:] - empirical_CDF[i1:]))
        if pflag == True: print ('   np.abs(fitted_CDF - empirical_CDF) = ', \
            np.abs(fitted_CDF - empirical_CDF))
        print ('   Dnstat for 2 gamma mixture = ', Dnstat2)
    except: # something went wrong ...
        print ('   two_parameter_fail')
        two_parameter_fail = True
        Dnstat2 = 0.10

    return two_parameter_fail, weights, alpha, beta, Dnstat2, weight_save_2m, \
        alpha_save_2m, beta_save_2m, nmixture

    
# =====================================================================

def three_parameter_gamma(jy, ix, precip_ens_nonzero, empirical_precipvals, \
    empirical_CDF, weights, alpha, beta, i1, pflag, weight_save_3m, \
    alpha_save_3m, beta_save_3m, nmixture, fraction_zero, nstride, mop_up,\
    Dnstat3, excessive_threshold):    
    
    """
    
    Call an R-inspired gamma mixture routine to estimate the weights and parameters of
    a mixture of three Gamma distributions.  Evaluate how well it fits the empirical
    data and return weights, parameters, and a fitting statistic.
    
    inputs:
    jy, ix: grid indices
    precip_ens_nonzero: vector of nonzero precip values
    empirical_precipvals : 0.05, 0.1, ... 0.95 empirical quantiles
    empirical_CDF: roughly 0.05, 0.1, ... 0.95 but subject to number of samples
    weights: input and output array of Gamma mixture weights
    alpha: input and output array of Gamma fitted alphas [3, ny, nx]
    beta: input and output array of Gamma fitted betas
    i1: index of minimum quantile to evaluate when determining goodness of fit.
    pflag: true if printing desired
    weight_save_3m: first guess for weights from previous call
    alpha_save_3m: first guess for alpha
    beta_save_3m: first guess for beta
    nmixture: indicates how many parameters are used in the mix
    fraction_zero: fraction of points with zero precip
    nstride: how many grid points to step in each direction
    mop_up: flag for whether we're having a 2nd go at things to address points
        where first time through, no good fit.
    Dnstat3: previous grid point's goodness of fit statistic
    excessive_threshold: threshold value for Dnstat that triggers action
    
    """

    three_parameter_fail = False # in case routine bombs off
    try:    
    
        if pflag == True: print ('   Need to try 3 gamma mixture.')
        
        # --- call the R routine that estimates the weights and a mixture 
        #     of 3 Gamma distributions. Must convert back from R data format.
        
        if pflag == True: print ('   calling gammamix_em 3 parameter')
        if pflag == True: print ('   precip_ens_nonzero[0], [-1] = ', \
            precip_ens_nonzero[0], precip_ens_nonzero[-1] )
        if pflag == True: print ('   weight_save_3m = ', weight_save_3m)
        if pflag == True: print ('   alpha_save_3m = ', alpha_save_3m)
        if pflag == True: print ('   beta_save_3m = ', beta_save_3m)
            
        if ix == 0 or mop_up == True:
            print ('   resetting 3-parameter Gamma mixture')
            best_result = gammamix_em(precip_ens_nonzero, \
                mix_prop=None, alpha=None, invbeta=None,\
                k=3, epsilon=0.001, maxit=100, maxrestarts=20, verb=False)
        elif nmixture[jy,ix-nstride] == 3 and Dnstat3 < excessive_threshold*1.5:
            best_result = gammamix_em(precip_ens_nonzero, \
                mix_prop=weight_save_3m, alpha=alpha_save_3m, invbeta=beta_save_3m,\
                k=3, epsilon=0.003, maxit=60, maxrestarts=20, verb=False)
        else: 
            best_result = gammamix_em(precip_ens_nonzero, \
                mix_prop=None, alpha=None, invbeta=None,\
                k=3, epsilon=0.003, maxit=60, maxrestarts=20, verb=False)   
         
        params = best_result.params
        alpha[:,jy,ix] = np.array(params.alpha[:])
        beta[:,jy,ix] = np.array(params.invbeta[:])
        weights[:,jy,ix] = np.array(params.mix_prop[:])
                
        weight_save_3m = np.array(params.mix_prop[:])
        alpha_save_3m = np.array(params.alpha[:])
        beta_save_3m = np.array(params.invbeta[:])

        if pflag == True: print ('   weights = ', weights[:,jy,ix])
        if pflag == True: print ('   alpha = ', alpha[:,jy,ix])
        if pflag == True: print ('   beta = ', beta[:,jy,ix])
        
        #  ---- estimate a CDF from a weighted mixture of the 3 distributions.  
        
        y0 = empirical_precipvals / beta[0,jy,ix]
        y1 = empirical_precipvals / beta[1,jy,ix]
        y2 = empirical_precipvals / beta[2,jy,ix]
        fitted_CDF0 = stats.gamma.cdf(y0, alpha[0,jy,ix])
        fitted_CDF1 = stats.gamma.cdf(y1, alpha[1,jy,ix])
        fitted_CDF2 = stats.gamma.cdf(y2, alpha[2,jy,ix])
        fitted_CDF = weights[0,jy,ix]*fitted_CDF0 + weights[1,jy,ix]*fitted_CDF1 + \
            weights[2,jy,ix]*fitted_CDF2
        if pflag == True: print ('   fitted_CDF = ',fitted_CDF)
        
        # ---- calculate a statistic for how far off the fitted CDF is from the 
        #      empirical CDF.   Do this only for either the higher quantiles of the
        #      distribution or where the quantile exceeds 1 mm (the i1 index)
        
        Dnstat3 = np.max(np.abs(fitted_CDF[i1:] - empirical_CDF[i1:]))
        if pflag == True: print ('   np.abs(fitted_CDF - empirical_CDF) = ', \
            np.abs(fitted_CDF - empirical_CDF))
        print ('   Dnstat for 3 gamma mixture = ', Dnstat3)
        nmixture[jy,ix] = 3
    except:
        print ('   three_parameter_fail')
        three_parameter_fail = True
        Dnstat3 = 0.10    
    
    return three_parameter_fail, weights, alpha, beta, Dnstat3, \
        weight_save_3m, alpha_save_3m, beta_save_3m, nmixture
    
# =====================================================================

def decide_which_mixture(jy, ix, weights, alpha, beta, nmixture, pflag, \
    Dnstat1, Dnstat2, Dnstat3, alpha_save_1m, beta_save_1m, \
    weight_save_1m, alpha_save_2m, beta_save_2m, \
    weight_save_2m, alpha_save_3m, beta_save_3m, \
    weight_save_3m):

    """ based on the Dn statistics for 1, 2, 3 Gamma mixtures,
        decide which mixture to use (the one with lowest Dn)
    """

    Dnstats = np.array([Dnstat1, Dnstat2, Dnstat3])
    #if pflag == True: print ('Dnstats 1,2,3,4 = ', Dnstats)
    print ('   Dnstats 1,2,3  = ', Dnstats)
    imin = np.argmin(Dnstats)
    if imin == 0:
        if pflag == True: print ('selected 1-Gamma mixture')
        weights[0,jy,ix] = 1.0
        alpha[0,jy,ix] = alpha_save_1m
        beta[0,jy,ix] = beta_save_1m
        weights[1:,jy,ix] = 0.0
        alpha[1:,jy,ix] = 1.0
        beta[1:,jy,ix] = 1.0
        nmixture[jy,ix] = 1
    elif imin == 1:
        if pflag == True: print ('selected 2-Gamma mixture')
        weights[0:2,jy,ix] = weight_save_2m[:]
        alpha[0:2,jy,ix] = alpha_save_2m[:]
        beta[0:2,jy,ix] = beta_save_2m[:]
        weights[2,jy,ix] = 0.0
        alpha[2,jy,ix] = 1.0
        beta[2,jy,ix] = 1.0
        nmixture[jy,ix] = 2
    elif imin == 2:
        if pflag == True: print ('selected 3-Gamma mixture')
        weights[:,jy,ix] = weight_save_3m[:]
        alpha[:,jy,ix] = alpha_save_3m[:]
        beta[:,jy,ix] = beta_save_3m[:]
        nmixture[jy,ix] = 3
    return weights, alpha, beta, nmixture
    
# =====================================================================
    
# ---- inputs from command line

cmonth = sys.argv[1] # '01', '02' etc.
cend_hour = sys.argv[2] # 06, 12, 18, 00 -- end hour of 6-h period
imonth = int(cmonth) - 1
nstride = 10
cdomain = 'conus'
excessive_threshold = 0.035

# ---- set parameters

pflag = False # for print statements
master_directory = '/Volumes/Backup Plus/mswep/'
ndaysomo = [31, 28, 31,  30, 31, 30,  31, 31, 30,  31, 30, 31]
ndaysomo_leap = [31, 28, 31,  30, 31, 30,  31, 31, 30,  31, 30, 31]

# ---- determine the overall number of daily precipitation 
#      samples across all years for this month

if imonth == 1:  # Feb
    nsamps = ndaysomo[imonth]*20
else:
    nsamps = 6*ndaysomo_leap[imonth] + 14*ndaysomo[imonth]
print ('nsamps = ', nsamps)

# ---- read in the previously generated netCDF file with precipitation
#      for this month and lead time.  All members, dates for this 
#      month have been smushed into one leading index, dimension
#      nsamps, since the date of the forecast within the month and 
#      the member number is irrelevant for the distribution fitting.
   
ktr = 0
for iyear in range(2000,2020):
    if iyear%4 == 0:
        ndays = ndaysomo_leap[imonth]
    else:
        ndays = ndaysomo[imonth]
    cyear = str(iyear)
    infile = master_directory + cyear + cmonth + '_on_ndfd_grid_6hourly.nc'
    print (iyear, infile)
    nc = Dataset(infile)
    yyyymmddhh_end = nc.variables['yyyymmddhh_end'][:]
    for iday in range(1,ndays+1):
        if iday < 10:
            cday = '0'+str(iday)
        else:
            cday = str(iday)
        iyyyymmddhh = int(str(iyear)+cmonth+cday+cend_hour)
        idx = np.where(yyyymmddhh_end == iyyyymmddhh)[0]
        #print (iyyyymmddhh, idx)
        precip_in = np.squeeze(nc.variables['apcp_anal'][idx,:,:])
        #print ('max min precip_in = ',np.max(precip_in), np.min(precip_in))
        if iyear == 2000 and iday == 1:
            #print ('np.shape(precip_in) = ',np.shape(precip_in))
            #sys.exit()
            nyin, nxin = np.shape(precip_in)
            precip_tseries = np.zeros((nsamps,nyin,nxin), dtype=np.float32)
            lons = nc.variables['lons'][:,:]
            lats = nc.variables['lats'][:,:]
        precip_tseries[ktr,:,:] = precip_in[:,:]
        ktr = ktr+1
    nc.close()

# ---- more initialization of output storage arrays now that 
#      we know the array dimensions

weights = np.zeros((3,nyin, nxin), dtype=np.float)
alpha = np.zeros((3,nyin,nxin), dtype=np.float)
beta = np.zeros((3,nyin,nxin), dtype=np.float)
fzero = np.zeros((nyin, nxin), dtype=np.float)
nmixture = np.zeros((nyin, nxin), dtype=np.int)
Dnstat1a = 0.10*np.ones((nyin, nxin), dtype=np.float)
Dnstat2a = 0.10*np.ones((nyin, nxin), dtype=np.float)
Dnstat3a = 0.10*np.ones((nyin, nxin), dtype=np.float)

weight_save_1m = 1.0
alpha_save_1m = 1.0
beta_save_1m = 1.0
   
# ---- loop over the grid points and estimate the Gamma distributions
#      for each parameter.  First see if a single Gamma distribution
#      is appropriate; if not, try a mixture of two.   If that still
#      doesn't fit well, try a mixture of three.   
        
tktr = 0
weight_save_2m = 0.5*np.ones((2), dtype=np.float)
alpha_save_2m = np.random.uniform(low=0.5, high=1.5, size=2)
beta_save_2m =  np.random.uniform(low=0.5, high=1.5, size=2)
weight_save_3m = 0.33333*np.ones((3), dtype=np.float) # bullshirt values
alpha_save_3m = np.random.uniform(low=0.5, high=1.5, size=3) 
beta_save_3m = np.random.uniform(low=0.5, high=1.5, size=3) 
weight_save_3mop = 0.33333*np.ones((3), dtype=np.float) # bullshirt values
alpha_save_3mop = np.random.uniform(low=0.5, high=1.5, size=3) 
beta_save_3mop = np.random.uniform(low=0.5, high=1.5, size=3) 
Dnstat1 = 0.10
Dnstat2 = 0.10
Dnstat3 = 0.10

now = datetime.now()
begin_time = now.strftime("%H:%M:%S")
for jy in range(0,nyin,nstride):
#for jy in range(60, 61):
        
    for ix in range(0,nxin,nstride):
    #for ix in range (200,251):
        
        mop_up = False
        now = datetime.now()
        current_time = now.strftime("%H:%M:%S")
        tktr = tktr+1  # number of grid points processed
        print ('***** begin time, current time, jy, ix, nyin, nxin, lon, lat = ',\
            begin_time, current_time, jy, ix, nyin, nxin, lons[jy, ix], lats[jy,ix])
            
        # ---- there is a grib round-off error that can give negative
        #      values slightly smaller than teeny precip.  to make sure 
        #      that we don't have either negative values or lots of the 
        #      same tiny values, subtractoff teeny_precip
        
        precip_ens_1d = precip_tseries[:,jy,ix]
        tp = np.min( [np.abs(np.min(precip_ens_1d)), 0.0] )
        teeny_precip = tp*np.ones(nsamps)
        if pflag == True: print ('   tp = ', tp)
        precip_ens_1d = precip_ens_1d - teeny_precip[:]

        fraction_zero, precip_ens_nonzero, nz = \
            fraczero_possamps(nsamps, precip_ens_1d) # return sorted
        if pflag == True: print ('   precip_ens_nonzero[0:-1] = ',\
            precip_ens_nonzero[0:-1])
        fzero[jy,ix] = fraction_zero 
        if nz > 0:
            pmean = np.mean(precip_ens_nonzero)
        else:
            pmean = 0.0
        if pflag == True: print ('   After fraczero_possamps, jy,ix = ', \
            jy,ix, nxin*nyin, current_time)
        if pflag == True: print ('   number of samples with positive precip = ', nz)
        if pflag == True: print ('   precip_ens_nonzero[-10:] = ', \
            precip_ens_nonzero[-20:])
        if nz > 20 and precip_ens_nonzero[-1] > 2.0:
        
            # --- first fit a single Gamma distribution per Wilks; determine 
            #     the quantiles associated with every 1/20th percentile 
            #     (0.05 to 0.95) with a few extras, if there are enough samples.
            #     With smaller number of samples, do the sampling at a fewer
            #     number of quantiles
        
            query_these_indices, empirical_CDF, empirical_precipvals, \
                pmean, alpha_hat, beta_hat, i1, Dnstat1, \
                weight_save_1m, alpha_save_1m, beta_save_1m = \
                one_parameter_gamma(nz, precip_ens_nonzero, pflag, fraction_zero)
            Dnstat1a[jy,ix] = Dnstat1
            
            # ---- decide if Dn excessive.   If so, then try a 2-component 
            #      Gamma mixture model.  excessive_threshold inspired 
            #      by Wilks textbook, Table 5.2, assume for simplicity alpha=1
            #      since there is only a weak dependence on alpha.
            
            excessive_threshold = 0.025
            if pflag == True: print ('excessive_threshold = ', excessive_threshold)  
            if Dnstat1 > excessive_threshold:
                
                two_parameter_fail, weights, alpha, beta, Dnstat2, \
                    weight_save_2m, alpha_save_2m, beta_save_2m, nmixture = \
                    two_parameter_gamma(jy, ix, precip_ens_nonzero, \
                    empirical_precipvals, empirical_CDF, weights, \
                    alpha, beta, i1, pflag, weight_save_2m, alpha_save_2m, \
                    beta_save_2m, nmixture, fraction_zero, nstride, \
                    Dnstat2, excessive_threshold)
                Dnstat2a[jy,ix] = Dnstat2
                if pflag == True:
                    print ('   two_parameter_fail, Dnstat2 = ', two_parameter_fail, Dnstat2)
                    
                if two_parameter_fail == True or Dnstat2 > excessive_threshold:
                
                    # ---- try 3-parameter Gamma
                
                    three_parameter_fail, weights, alpha, beta, Dnstat3,\
                        weight_save_3m, alpha_save_3m, beta_save_3m, nmixture = \
                        three_parameter_gamma(jy, ix, precip_ens_nonzero, \
                        empirical_precipvals, empirical_CDF, weights, \
                        alpha, beta, i1, pflag, weight_save_3m, alpha_save_3m, \
                        beta_save_3m, nmixture, fraction_zero, nstride, mop_up, \
                        Dnstat3, excessive_threshold)
                    Dnstat3a[jy,ix] = Dnstat3
                    
                    if three_parameter_fail == True or Dnstat3 > excessive_threshold: 
                        
                        weights, alpha, beta, nmixture = decide_which_mixture(\
                            jy, ix, weights, alpha, beta, nmixture, pflag, \
                            Dnstat1, Dnstat2, Dnstat3, alpha_save_1m, beta_save_1m, \
                            weight_save_1m, alpha_save_2m, beta_save_2m, \
                            weight_save_2m, alpha_save_3m, beta_save_3m, \
                            weight_save_3m)

            else: # ok to use simple single Gamma
            
                weights[0,jy,ix] = 1.0
                alpha[0,jy,ix] = alpha_hat
                beta[0,jy,ix] = beta_hat
                weights[1:,jy,ix] = 0.0
                alpha[1:,jy,ix] = 1.0
                beta[1:,jy,ix] = 1.0
                nmixture[jy,ix] = 1
            
        else: 
            
            # --- very few positive samples, or light precip; fit an simple maximum
            #      likelihood distribution to single mode Gamma distribution
            
            if pflag == True: print ('   very light precipitation at this grid point. ', pmean)
            if nz < 2:  # not long enough sample to find some nonzero precip amounts
                alpha_hat = 1.0
                beta_hat = 100.0
            else:  # use maximum likelihood method of Wilks text to fit non-mixture distribution.
                pmean = np.mean(precip_ens_nonzero)
                lnxbar = np.log(pmean)
                meanlnxi = np.mean(np.sum(np.log(precip_ens_nonzero)))
                D = lnxbar - meanlnxi
                alpha_hat = (1.0 + np.sqrt(1.0+4.0*D/3.0)) / (4.0*D)
                beta_hat = pmean / alpha_hat
                if pflag == True: print('   single distribution alpha, beta = ', alpha_hat, beta_hat)
            weights[0,jy,ix] = 1.0
            alpha[0,jy,ix] = alpha_hat
            beta[0,jy,ix] = beta_hat
            weights[1:,jy,ix] = 0.0
            alpha[1:,jy,ix] = 1.0
            beta[1:,jy,ix] = 1.0
            nmixture[jy,ix] = 1
            Dnstat1[jy,ix] = 0.01  # a fictitious value, but precip so low, it doesn't matter
            

        # ----- sometimes, by using the previous grid point's estimate as a prior mixture, it can
        #       go off into non-sensical parameter space.   Check Dn for this, and if so, initiate 
        #       a final 3-parameter mixture *without* using prior grid point's saved values.

        if Dnstat1a[jy,ix] > excessive_threshold and \
        Dnstat2a[jy,ix] > excessive_threshold and \
        Dnstat3a[jy,ix] > excessive_threshold:
        
            mop_up = True
            print ('   mopping up! ')
            three_parameter_fail, weights, alpha, beta, Dnstat3mop,\
                weight_save_3mop, alpha_save_3mop, beta_save_3mop, nmixture = \
                three_parameter_gamma(jy, ix, precip_ens_nonzero, \
                empirical_precipvals, empirical_CDF, weights, \
                alpha, beta, i1, pflag, weight_save_3mop, alpha_save_3mop, \
                beta_save_3mop, nmixture, fraction_zero, nstride, mop_up, \
                Dnstat3, excessive_threshold)

            if Dnstat3mop < Dnstat3a[jy,ix]:
                Dnstat3 = Dnstat3mop
                weight_save_3m = weight_save_3mop
                alpha_save_3m = alpha_save_3mop
                beta_save_3m = beta_save_3mop
                                
            weights, alpha, beta, nmixture = decide_which_mixture(\
                jy, ix, weights, alpha, beta, nmixture, pflag, \
                Dnstat1, Dnstat2, Dnstat3, alpha_save_1m, beta_save_1m, \
                weight_save_1m, alpha_save_2m, beta_save_2m, \
                weight_save_2m, alpha_save_3m, beta_save_3m, \
                weight_save_3m)
            
# --- save to cPickle file

outfile = master_directory + cmonth+'_'+cdomain+'_MSWEP_apcp_gamma_parameters_h' + cend_hour + '.cPick'
#outfile = master_directory + cmonth+'_'+cdomain+'_apcp_gamma_parameters_test_h' + cend_hour + '.cPick'

print ('writing to ', outfile)
ouf = open(outfile, 'wb')
cPickle.dump(weights, ouf)
cPickle.dump(alpha, ouf)
cPickle.dump(beta, ouf)
cPickle.dump(fzero, ouf)
cPickle.dump(Dnstat1a, ouf)
cPickle.dump(Dnstat2a, ouf)
cPickle.dump(Dnstat3a, ouf)
cPickle.dump(nmixture, ouf)
ouf.close()



                                                                 ._CDF_fitting_mswep_precip_byjindex.py                                                              000775  000765  000024  00000000474 14016263124 020351  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2  
     <                                      ATTR      <     $                      com.apple.lastuseddate#PS      (   	  com.macromates.bookmarks   1     com.macromates.selectionRange      7     com.macromates.visibleIndex  `         ( '358' )682:3618492                                                                                                                                                                                                    CDF_fitting_mswep_precip_byjindex.py                                                                000775  000765  000024  00000075716 14016263124 020147  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         """
CDF_fitting_mswep_precip_byjindex.py cmonth cend_hour cjstart cjend

suggest splitting up into 20 jobs, 80 gps each 
this python script is designed to fit a mixture of Gamma CDFs to closely
match an empirical distribution of precipitation.   Up to 3 Gamma distributions
can be mixed, though 1 or 2 is preferable if they provide a sufficiently
close fit.   The script is tailored to MSWEP precipitation analyses over 
one of the National Digital Forecast Database domains for the National 
Blend of Models

Designed by Tom Hamill, NOAA, with help from Michael Scheuerer, Dec 2020

"""

import os, sys
from datetime import datetime
import numpy as np
import _pickle as cPickle
from netCDF4 import Dataset
import _pickle as cPickle
import scipy.stats as stats
from gammamix import gammamix_em

# =====================================================================

def set_domain_boundaries(cdomain):
    
    """ used grib file of 2.5-km blend output grid to determine bounding 
        lat and lon, and from that, the domain bounding indices for the 
        0.25 GEFSv12 reforecast data that will encompass the domain.    
    """
    if cdomain == 'conus': 
        jmin = 93
        jmax = 246
        imin = 368
        imax = 686
    elif cdomain == 'pr':
        jmin = 243
        jmax = 256
        imin = 649
        imax = 667   
    elif cdomain == 'ak':
        jmin = 19
        jmax = 161
        imin = 201
        imax = 967
    else:
        print ('invalid domain.  Exiting.')     
        sys.exit()    
 
    return jmin, jmax, imin, imax
 
# =====================================================================

def find_nearest(vec, value):
    
    """ given a vector vec and a particular value, find the index in vec
    that is nearest to value"""
    
    idx = np.abs(vec-value).argmin()
    return idx

# =====================================================================

def fraczero_possamps(nsamps, precip_ens):
    
    """
    
    from the vector input sample precip_ens, define the fraction of
    samples with zero precipitation.   For the positive samples, add
    a small random number to deal with the fact that the data was 
    discretized to ~0.1 mm, so that when later creating CDFs we don't 
    have empirical values with lots of tied amounts.  Also, sort the 
    nonzero amounts and return.
    
    """
    
    number_zeros = 0
    precip_ens_nonzero = np.delete(precip_ens, \
        np.where(precip_ens <= 0.006))  # censor at 0.006 mm
    nz = len(precip_ens_nonzero)
    # data discretized, so add random component of this magnitude
    precip_ens_nonzero = precip_ens_nonzero + \
        np.random.uniform(low=-0.005,high=0.005,size=nz) 
    precip_ens_nonzero = np.sort(precip_ens_nonzero)  
    ntotal = len(precip_ens)
    nzero = ntotal - len(precip_ens_nonzero)
    fraction_zero = float(nzero) / float(ntotal)
    return fraction_zero, precip_ens_nonzero, nz

# =====================================================================

def one_parameter_gamma(nz, precip_ens_nonzero, pflag, fraction_zero):

    """ 
    Along with some other ancillary stuff, return the fitted Gamma distribution
    alpha and beta values, along with an evaluation (Dnstat) of how 
    closely the fitted CDF matches the empirical CDF.   Dnstat follows
    the Dn statistics discussed in Wilks (2011) Statistical Methods in
    the Atmospheric Sciences text for distribution fitting. 
    
    inputs:  
    
    nz: number of nonzero samples
    precip_ens_nonzero[nz]:   sorted vector of nonzero precip amounts (mm)
    pflag: print flag, true if diagnostic printing desired
    fraction_zero: fraction of samples that are zero
    
    """

    # ---- define the indices in the previously sorted precip_ens_nonzero vector
    #      that are at the 0.05, 0.10, .... , 0.95 quantiles of the sorted vector, + a few 
    #      others at high quantiles.   Less if there are few samples
    
    query_these_indices = [ nz//20, nz//10, (3*nz)//20, nz//5, nz//4, (3*nz)//10, \
        (7*nz)//20, (2*nz)//5, (9*nz)//20, nz//2, (11*nz)//20, (3*nz)//5, (13*nz)//20, \
        (7*nz)//10, (3*nz)//4, (4*nz)//5, (17*nz)//20, (35*nz)//40,(9*nz)//10, \
        (37*nz)//40, (19*nz)//20, (39*nz)//40, (79*nz)//80, (159*nz)//160]
    empirical_precipvals = precip_ens_nonzero[query_these_indices] 
    if pflag == True: print ('   nz, query_these_indices = ', nz, query_these_indices)
    
    # ---- convert the query_these_indices into the cumulative probability
    
    empirical_CDF = np.array(query_these_indices, dtype=np.float) / float(nz) 
    
    if pflag == True: print ('   empirical_CDF = ',empirical_CDF)
    
    # ---- extract the quantiles at these cumulative probabilities
    
    if pflag == True: print ('   empirical_precipvals = ', empirical_precipvals)
    
    # ---- See Wilks Statistical Meteorology Text, section on Gamma Distribution.   Use the
    #      Thom (1958) method of maximum-likelihood estimator, and from this estimate
    #      the Gamma distribution Parameters.   Ref: Statistical Methods in the Atmospheric
    #      Sciences (3rd Ed), 2011, Daniel S. Wilks (Academic Press)
    
    pmean = np.mean(precip_ens_nonzero)
    lnxbar = np.log(pmean)
    meanlnxi = np.mean(np.log(precip_ens_nonzero))
    D = lnxbar - meanlnxi
    alpha_hat = (1.0 + np.sqrt(1.0+4.0*D/3.0)) / (4.0*D)
    beta_hat = pmean / alpha_hat
    
    # ---- Now we evaluate how good the fitted distribution matches the empirical one.
    #      Because errors in fit are largely irrelevant for the quantile mapping at very small
    #      values, we will evaluate the goodness of fit only at the minimum of: (a) 
    #      where there is precip > 1.0 mm, or (b) the 90th percentile of the empirical 
    #      distribution, whichever is smaller.   
    
    if pflag == True: print ('   D = ',D,' alpha_hat = ', alpha_hat,' beta_hat = ', beta_hat)
    y0 = empirical_precipvals / beta_hat
    fitted_CDF = stats.gamma.cdf(y0, alpha_hat)
    if pflag == True: print ('   fitted_CDF = ', fitted_CDF)
    if pflag == True: print ('   CDF differences: ', np.abs(fitted_CDF - empirical_CDF))
    
    # --- only bother evaluating Dn statistic either for quantiles associated with precip
    #     > 1.0 mm, or above the 35/40th quantile, whichever is smaller.
    
    i1 = np.min([17,np.argmin(np.abs(empirical_precipvals-1.0))])
    Dnstat = np.max(np.abs(fitted_CDF[i1:] - empirical_CDF[i1:]))
    print ('   Dnstat for 1 gamma mixture = ', Dnstat)
    weight_save_1m = 1.0
    alpha_save_1m = alpha_hat
    beta_save_1m = beta_hat
    
    return query_these_indices, empirical_CDF, empirical_precipvals, \
        pmean, alpha_hat, beta_hat, i1, Dnstat, weight_save_1m, \
        alpha_save_1m, beta_save_1m
        
# =====================================================================

def two_parameter_gamma(jy, ix, precip_ens_nonzero, empirical_precipvals, \
    empirical_CDF, weights, alpha, beta, i1, pflag, weight_save_2m, \
    alpha_save_2m, beta_save_2m, nmixture, fraction_zero, nstride, \
    Dnstat2, excessive_threshold):
    
    """
    
    Call an R gamma mixture routine to estimate the weights and parameters of
    a mixture of two Gamma distributions.  Evaluate how well it fits the empirical
    data and return weights, parameters, and a fitting statistic.
    
    inputs:
    jy, ix: grid indices
    precip_ens_nonzero: vector of nonzero precip values
    empirical_precipvals : 0.05, 0.1, ... 0.95 empirical quantiles
    empirical_CDF: roughly 0.05, 0.1, ... 0.95 but subject to number of samples
    weights: input and output array of Gamma mixture weights
    alpha: input and output array of Gamma fitted alphas
    beta: input and output array of Gamma fitted betas
    i1: index of minimum quantile to evaluate when determining goodness of fit.
    pflag: true if printing desired
    weight_save_2m: 2d-vector of saved weights from last call
    alpha_save_2m: 2d-vector of saved alpha estimates from last call
    beta_save_2m: 2d-vector of saved beta estimates from last call
    nmixture: indicates how many parameters are used in the mix
    fraction_zero: fraction of points with zero precip
    nstride: how many grid points to step in each direction
    Dnstat2: previous grid point's goodness of fit statistic
    excessive_threshold: threshold value for Dnstat that triggers action
    
    """

    two_parameter_fail = False # in case routine bombs off
    try:
        now = datetime.now()
        current_time = now.strftime("%H:%M:%S")
        
        # --- call the R-inspired routine that estimates the weights and a mixture 
        #     of two Gamma distributions. 
        
        if pflag == True: print ('calling gammamix_em 2 parameter')
        if pflag == True: print ('precip_ens_nonzero[0], [-1] = ', \
            precip_ens_nonzero[0], precip_ens_nonzero[-1] )
        if pflag == True: print ('weight_save_2m = ', weight_save_2m)
        if pflag == True: print ('alpha_save_2m = ', alpha_save_2m)
        if pflag == True: print ('beta_save_2m = ', beta_save_2m)
            
        if ix == 0: 
            best_result = gammamix_em(precip_ens_nonzero, \
                mix_prop=None, alpha=None, invbeta=None,\
                k=2, epsilon=0.003, maxit=60, maxrestarts=20, verb=False)
        elif nmixture[jy,ix-nstride] == 2 and Dnstat2 < excessive_threshold*1.5:
            best_result = gammamix_em(precip_ens_nonzero, \
                mix_prop=weight_save_2m, alpha=alpha_save_2m, invbeta=beta_save_2m,\
                k=2, epsilon=0.003, maxit=60, maxrestarts=20, verb=False)
        else: # better to restart with new values
            best_result = gammamix_em(precip_ens_nonzero, \
                mix_prop=None, alpha=None, invbeta=None,\
                k=2, epsilon=0.003, maxit=60, maxrestarts=20, verb=False)
        
        params = best_result.params
        alpha[0:2,jy,ix] = np.array(params.alpha[:])
        beta[0:2,jy,ix] = np.array(params.invbeta[:])
        weights[0:2,jy,ix] = np.array(params.mix_prop[:])
        weights[2,jy,ix] = 0.0
        alpha[2,jy,ix] = 1.0
        beta[2,jy,ix] = 1.0
        nmixture[jy,ix] = 2
        weight_save_2m = np.array(params.mix_prop[:])
        alpha_save_2m = np.array(params.alpha[:])
        beta_save_2m = np.array(params.invbeta[:])
        
        if pflag == True: print ('   weights = ', weights[:,jy,ix])
        if pflag == True: print ('   alpha = ', alpha[:,jy,ix])
        if pflag == True: print ('   beta = ', beta[:,jy,ix])
                
        #  ---- estimate a CDF from a weighted mixture of the two distributions.  
        
        y0 = empirical_precipvals / beta[0,jy,ix]
        y1 = empirical_precipvals / beta[1,jy,ix]
        fitted_CDF0 = stats.gamma.cdf(y0, alpha[0,jy,ix])
        fitted_CDF1 = stats.gamma.cdf(y1, alpha[1,jy,ix])
        fitted_CDF = weights[0,jy,ix]*fitted_CDF0 + weights[1,jy,ix]*fitted_CDF1
        if pflag == True: print ('   fitted_CDF = ',fitted_CDF)
        
        # ---- calculate a statistic for how far off the fitted CDF is from the 
        #      empirical CDF.   Do this only for either the higher quantiles of the
        #      distribution or where the quantile exceeds 1 mm (the i1 index)
        
        Dnstat2 = np.max(np.abs(fitted_CDF[i1:] - empirical_CDF[i1:]))
        if pflag == True: print ('   np.abs(fitted_CDF - empirical_CDF) = ', \
            np.abs(fitted_CDF - empirical_CDF))
        print ('   Dnstat for 2 gamma mixture = ', Dnstat2)
    except: # something went wrong ...
        print ('   two_parameter_fail')
        two_parameter_fail = True
        Dnstat2 = 0.10

    return two_parameter_fail, weights, alpha, beta, Dnstat2, weight_save_2m, \
        alpha_save_2m, beta_save_2m, nmixture

    
# =====================================================================

def three_parameter_gamma(jy, ix, precip_ens_nonzero, empirical_precipvals, \
    empirical_CDF, weights, alpha, beta, i1, pflag, weight_save_3m, \
    alpha_save_3m, beta_save_3m, nmixture, fraction_zero, nstride, mop_up,\
    Dnstat3, excessive_threshold):    
    
    """
    
    Call an R-inspired gamma mixture routine to estimate the weights and parameters of
    a mixture of three Gamma distributions.  Evaluate how well it fits the empirical
    data and return weights, parameters, and a fitting statistic.
    
    inputs:
    jy, ix: grid indices
    precip_ens_nonzero: vector of nonzero precip values
    empirical_precipvals : 0.05, 0.1, ... 0.95 empirical quantiles
    empirical_CDF: roughly 0.05, 0.1, ... 0.95 but subject to number of samples
    weights: input and output array of Gamma mixture weights
    alpha: input and output array of Gamma fitted alphas [3, ny, nx]
    beta: input and output array of Gamma fitted betas
    i1: index of minimum quantile to evaluate when determining goodness of fit.
    pflag: true if printing desired
    weight_save_3m: first guess for weights from previous call
    alpha_save_3m: first guess for alpha
    beta_save_3m: first guess for beta
    nmixture: indicates how many parameters are used in the mix
    fraction_zero: fraction of points with zero precip
    nstride: how many grid points to step in each direction
    mop_up: flag for whether we're having a 2nd go at things to address points
        where first time through, no good fit.
    Dnstat3: previous grid point's goodness of fit statistic
    excessive_threshold: threshold value for Dnstat that triggers action
    
    """

    three_parameter_fail = False # in case routine bombs off
    try:    
    
        if pflag == True: print ('   Need to try 3 gamma mixture.')
        
        # --- call the R routine that estimates the weights and a mixture 
        #     of 3 Gamma distributions. Must convert back from R data format.
        
        if pflag == True: print ('   calling gammamix_em 3 parameter')
        if pflag == True: print ('   precip_ens_nonzero[0], [-1] = ', \
            precip_ens_nonzero[0], precip_ens_nonzero[-1] )
        if pflag == True: print ('   weight_save_3m = ', weight_save_3m)
        if pflag == True: print ('   alpha_save_3m = ', alpha_save_3m)
        if pflag == True: print ('   beta_save_3m = ', beta_save_3m)
            
        if ix == 0 or mop_up == True:
            print ('   resetting 3-parameter Gamma mixture')
            best_result = gammamix_em(precip_ens_nonzero, \
                mix_prop=None, alpha=None, invbeta=None,\
                k=3, epsilon=0.001, maxit=100, maxrestarts=20, verb=False)
        elif nmixture[jy,ix-nstride] == 3 and Dnstat3 < excessive_threshold*1.5:
            best_result = gammamix_em(precip_ens_nonzero, \
                mix_prop=weight_save_3m, alpha=alpha_save_3m, invbeta=beta_save_3m,\
                k=3, epsilon=0.003, maxit=60, maxrestarts=20, verb=False)
        else: 
            best_result = gammamix_em(precip_ens_nonzero, \
                mix_prop=None, alpha=None, invbeta=None,\
                k=3, epsilon=0.003, maxit=60, maxrestarts=20, verb=False)   
         
        params = best_result.params
        alpha[:,jy,ix] = np.array(params.alpha[:])
        beta[:,jy,ix] = np.array(params.invbeta[:])
        weights[:,jy,ix] = np.array(params.mix_prop[:])
                
        weight_save_3m = np.array(params.mix_prop[:])
        alpha_save_3m = np.array(params.alpha[:])
        beta_save_3m = np.array(params.invbeta[:])

        if pflag == True: print ('   weights = ', weights[:,jy,ix])
        if pflag == True: print ('   alpha = ', alpha[:,jy,ix])
        if pflag == True: print ('   beta = ', beta[:,jy,ix])
        
        #  ---- estimate a CDF from a weighted mixture of the 3 distributions.  
        
        y0 = empirical_precipvals / beta[0,jy,ix]
        y1 = empirical_precipvals / beta[1,jy,ix]
        y2 = empirical_precipvals / beta[2,jy,ix]
        fitted_CDF0 = stats.gamma.cdf(y0, alpha[0,jy,ix])
        fitted_CDF1 = stats.gamma.cdf(y1, alpha[1,jy,ix])
        fitted_CDF2 = stats.gamma.cdf(y2, alpha[2,jy,ix])
        fitted_CDF = weights[0,jy,ix]*fitted_CDF0 + weights[1,jy,ix]*fitted_CDF1 + \
            weights[2,jy,ix]*fitted_CDF2
        if pflag == True: print ('   fitted_CDF = ',fitted_CDF)
        
        # ---- calculate a statistic for how far off the fitted CDF is from the 
        #      empirical CDF.   Do this only for either the higher quantiles of the
        #      distribution or where the quantile exceeds 1 mm (the i1 index)
        
        Dnstat3 = np.max(np.abs(fitted_CDF[i1:] - empirical_CDF[i1:]))
        if pflag == True: print ('   np.abs(fitted_CDF - empirical_CDF) = ', \
            np.abs(fitted_CDF - empirical_CDF))
        print ('   Dnstat for 3 gamma mixture = ', Dnstat3)
        nmixture[jy,ix] = 3
    except:
        print ('   three_parameter_fail')
        three_parameter_fail = True
        Dnstat3 = 0.10    
    
    return three_parameter_fail, weights, alpha, beta, Dnstat3, \
        weight_save_3m, alpha_save_3m, beta_save_3m, nmixture
    
# =====================================================================

def decide_which_mixture(jy, ix, weights, alpha, beta, nmixture, pflag, \
    Dnstat1, Dnstat2, Dnstat3, alpha_save_1m, beta_save_1m, \
    weight_save_1m, alpha_save_2m, beta_save_2m, \
    weight_save_2m, alpha_save_3m, beta_save_3m, \
    weight_save_3m):

    """ based on the Dn statistics for 1, 2, 3 Gamma mixtures,
        decide which mixture to use (the one with lowest Dn)
    """

    Dnstats = np.array([Dnstat1, Dnstat2, Dnstat3])
    #if pflag == True: print ('Dnstats 1,2,3,4 = ', Dnstats)
    print ('   Dnstats 1,2,3  = ', Dnstats)
    imin = np.argmin(Dnstats)
    if imin == 0:
        if pflag == True: print ('selected 1-Gamma mixture')
        weights[0,jy,ix] = 1.0
        alpha[0,jy,ix] = alpha_save_1m
        beta[0,jy,ix] = beta_save_1m
        weights[1:,jy,ix] = 0.0
        alpha[1:,jy,ix] = 1.0
        beta[1:,jy,ix] = 1.0
        nmixture[jy,ix] = 1
    elif imin == 1:
        if pflag == True: print ('selected 2-Gamma mixture')
        weights[0:2,jy,ix] = weight_save_2m[:]
        alpha[0:2,jy,ix] = alpha_save_2m[:]
        beta[0:2,jy,ix] = beta_save_2m[:]
        weights[2,jy,ix] = 0.0
        alpha[2,jy,ix] = 1.0
        beta[2,jy,ix] = 1.0
        nmixture[jy,ix] = 2
    elif imin == 2:
        if pflag == True: print ('selected 3-Gamma mixture')
        weights[:,jy,ix] = weight_save_3m[:]
        alpha[:,jy,ix] = alpha_save_3m[:]
        beta[:,jy,ix] = beta_save_3m[:]
        nmixture[jy,ix] = 3
    return weights, alpha, beta, nmixture
    
# =====================================================================
    
# ---- inputs from command line

cmonth = sys.argv[1] # '01', '02' etc.
cend_hour = sys.argv[2] # 06, 12, 18, 00 -- end hour of 6-h period
cjstart = sys.argv[3]
cjend = sys.argv[4]
imonth = int(cmonth) - 1
nstride = 10
cdomain = 'conus'
excessive_threshold = 0.035
ijstart = int(cjstart)
ijend = int(cjend)

# ---- set parameters

pflag = False # for print statements
master_directory = '/Volumes/Backup Plus/mswep/'
ndaysomo = [31, 28, 31,  30, 31, 30,  31, 31, 30,  31, 30, 31]
ndaysomo_leap = [31, 28, 31,  30, 31, 30,  31, 31, 30,  31, 30, 31]

# ---- determine the overall number of daily precipitation 
#      samples across all years for this month

if imonth == 1:  # Feb
    nsamps = ndaysomo[imonth]*20
else:
    nsamps = 6*ndaysomo_leap[imonth] + 14*ndaysomo[imonth]
print ('nsamps = ', nsamps)

# ---- read in the previously generated netCDF file with precipitation
#      for this month and lead time.  All members, dates for this 
#      month have been smushed into one leading index, dimension
#      nsamps, since the date of the forecast within the month and 
#      the member number is irrelevant for the distribution fitting.
   
ktr = 0
for iyear in range(2000,2020):
    if iyear%4 == 0:
        ndays = ndaysomo_leap[imonth]
    else:
        ndays = ndaysomo[imonth]
    cyear = str(iyear)
    infile = master_directory + cyear + cmonth + '_on_ndfd_grid_6hourly.nc'
    print (iyear, infile)
    nc = Dataset(infile)
    yyyymmddhh_end = nc.variables['yyyymmddhh_end'][:]
    for iday in range(1,ndays+1):
        if iday < 10:
            cday = '0'+str(iday)
        else:
            cday = str(iday)
        iyyyymmddhh = int(str(iyear)+cmonth+cday+cend_hour)
        idx = np.where(yyyymmddhh_end == iyyyymmddhh)[0]
        #print (iyyyymmddhh, idx)
        precip_in = np.squeeze(nc.variables['apcp_anal'][idx,:,:])
        #print ('max min precip_in = ',np.max(precip_in), np.min(precip_in))
        if iyear == 2000 and iday == 1:
            #print ('np.shape(precip_in) = ',np.shape(precip_in))
            #sys.exit()
            nyin, nxin = np.shape(precip_in)
            precip_tseries = np.zeros((nsamps,nyin,nxin), dtype=np.float32)
            lons = nc.variables['lons'][:,:]
            lats = nc.variables['lats'][:,:]
        precip_tseries[ktr,:,:] = precip_in[:,:]
        ktr = ktr+1
    nc.close()

# ---- more initialization of output storage arrays now that 
#      we know the array dimensions

weights = np.zeros((3,nyin, nxin), dtype=np.float)
alpha = np.zeros((3,nyin,nxin), dtype=np.float)
beta = np.zeros((3,nyin,nxin), dtype=np.float)
fzero = np.zeros((nyin, nxin), dtype=np.float)
nmixture = np.zeros((nyin, nxin), dtype=np.int)
Dnstat1a = 0.10*np.ones((nyin, nxin), dtype=np.float)
Dnstat2a = 0.10*np.ones((nyin, nxin), dtype=np.float)
Dnstat3a = 0.10*np.ones((nyin, nxin), dtype=np.float)

weight_save_1m = 1.0
alpha_save_1m = 1.0
beta_save_1m = 1.0
   
# ---- loop over the grid points and estimate the Gamma distributions
#      for each parameter.  First see if a single Gamma distribution
#      is appropriate; if not, try a mixture of two.   If that still
#      doesn't fit well, try a mixture of three.   
        
tktr = 0
weight_save_2m = 0.5*np.ones((2), dtype=np.float)
alpha_save_2m = np.random.uniform(low=0.5, high=1.5, size=2)
beta_save_2m =  np.random.uniform(low=0.5, high=1.5, size=2)
weight_save_3m = 0.33333*np.ones((3), dtype=np.float) # bullshirt values
alpha_save_3m = np.random.uniform(low=0.5, high=1.5, size=3) 
beta_save_3m = np.random.uniform(low=0.5, high=1.5, size=3) 
weight_save_3mop = 0.33333*np.ones((3), dtype=np.float) # bullshirt values
alpha_save_3mop = np.random.uniform(low=0.5, high=1.5, size=3) 
beta_save_3mop = np.random.uniform(low=0.5, high=1.5, size=3) 
Dnstat1 = 0.10
Dnstat2 = 0.10
Dnstat3 = 0.10

now = datetime.now()
begin_time = now.strftime("%H:%M:%S")
#for jy in range(ijstart, ijend):        
#    for ix in range(0,nxin):
        
for jy in range(ijstart, ijend):        
    for ix in range(0,nxin):
        
        mop_up = False
        now = datetime.now()
        current_time = now.strftime("%H:%M:%S")
        tktr = tktr+1  # number of grid points processed
        print ('***** begin time, current time, jy, ix, nyin, nxin, lon, lat = ',\
            begin_time, current_time, jy, ix, nyin, nxin, lons[jy, ix], lats[jy,ix])
            
        # ---- there is a grib round-off error that can give negative
        #      values slightly smaller than teeny precip.  to make sure 
        #      that we don't have either negative values or lots of the 
        #      same tiny values, subtractoff teeny_precip
        
        precip_ens_1d = precip_tseries[:,jy,ix]
        tp = np.min( [np.abs(np.min(precip_ens_1d)), 0.0] )
        teeny_precip = tp*np.ones(nsamps)
        if pflag == True: print ('   tp = ', tp)
        precip_ens_1d = precip_ens_1d - teeny_precip[:]

        fraction_zero, precip_ens_nonzero, nz = \
            fraczero_possamps(nsamps, precip_ens_1d) # return sorted
        if pflag == True: print ('   precip_ens_nonzero[0:-1] = ',\
            precip_ens_nonzero[0:-1])
        fzero[jy,ix] = fraction_zero 
        if nz > 0:
            pmean = np.mean(precip_ens_nonzero)
        else:
            pmean = 0.0
        if pflag == True: print ('   After fraczero_possamps, jy,ix = ', \
            jy,ix, nxin*nyin, current_time)
        if pflag == True: print ('   number of samples with positive precip = ', nz)
        if pflag == True: print ('   precip_ens_nonzero[-10:] = ', \
            precip_ens_nonzero[-20:])
        if nz > 20 and precip_ens_nonzero[-1] > 2.0:
        
            # --- first fit a single Gamma distribution per Wilks; determine 
            #     the quantiles associated with every 1/20th percentile 
            #     (0.05 to 0.95) with a few extras, if there are enough samples.
            #     With smaller number of samples, do the sampling at a fewer
            #     number of quantiles
        
            query_these_indices, empirical_CDF, empirical_precipvals, \
                pmean, alpha_hat, beta_hat, i1, Dnstat1, \
                weight_save_1m, alpha_save_1m, beta_save_1m = \
                one_parameter_gamma(nz, precip_ens_nonzero, pflag, fraction_zero)
            Dnstat1a[jy,ix] = Dnstat1
            
            # ---- decide if Dn excessive.   If so, then try a 2-component 
            #      Gamma mixture model.  excessive_threshold inspired 
            #      by Wilks textbook, Table 5.2, assume for simplicity alpha=1
            #      since there is only a weak dependence on alpha.
            
            excessive_threshold = 0.025
            if pflag == True: print ('excessive_threshold = ', excessive_threshold)  
            if Dnstat1 > excessive_threshold:
                
                two_parameter_fail, weights, alpha, beta, Dnstat2, \
                    weight_save_2m, alpha_save_2m, beta_save_2m, nmixture = \
                    two_parameter_gamma(jy, ix, precip_ens_nonzero, \
                    empirical_precipvals, empirical_CDF, weights, \
                    alpha, beta, i1, pflag, weight_save_2m, alpha_save_2m, \
                    beta_save_2m, nmixture, fraction_zero, nstride, \
                    Dnstat2, excessive_threshold)
                Dnstat2a[jy,ix] = Dnstat2
                if pflag == True:
                    print ('   two_parameter_fail, Dnstat2 = ', two_parameter_fail, Dnstat2)
                    
                if two_parameter_fail == True or Dnstat2 > excessive_threshold:
                
                    # ---- try 3-parameter Gamma
                
                    three_parameter_fail, weights, alpha, beta, Dnstat3,\
                        weight_save_3m, alpha_save_3m, beta_save_3m, nmixture = \
                        three_parameter_gamma(jy, ix, precip_ens_nonzero, \
                        empirical_precipvals, empirical_CDF, weights, \
                        alpha, beta, i1, pflag, weight_save_3m, alpha_save_3m, \
                        beta_save_3m, nmixture, fraction_zero, nstride, mop_up, \
                        Dnstat3, excessive_threshold)
                    Dnstat3a[jy,ix] = Dnstat3
                    
                    if three_parameter_fail == True or Dnstat3 > excessive_threshold: 
                        
                        weights, alpha, beta, nmixture = decide_which_mixture(\
                            jy, ix, weights, alpha, beta, nmixture, pflag, \
                            Dnstat1, Dnstat2, Dnstat3, alpha_save_1m, beta_save_1m, \
                            weight_save_1m, alpha_save_2m, beta_save_2m, \
                            weight_save_2m, alpha_save_3m, beta_save_3m, \
                            weight_save_3m)

            else: # ok to use simple single Gamma
            
                weights[0,jy,ix] = 1.0
                alpha[0,jy,ix] = alpha_hat
                beta[0,jy,ix] = beta_hat
                weights[1:,jy,ix] = 0.0
                alpha[1:,jy,ix] = 1.0
                beta[1:,jy,ix] = 1.0
                nmixture[jy,ix] = 1
            
        else: 
            
            # --- very few positive samples, or light precip; fit an simple maximum
            #      likelihood distribution to single mode Gamma distribution
            
            if pflag == True: print ('   very light precipitation at this grid point. ', pmean)
            if nz < 2:  # not long enough sample to find some nonzero precip amounts
                alpha_hat = 1.0
                beta_hat = 100.0
            else:  # use maximum likelihood method of Wilks text to fit non-mixture distribution.
                pmean = np.mean(precip_ens_nonzero)
                lnxbar = np.log(pmean)
                meanlnxi = np.mean(np.sum(np.log(precip_ens_nonzero)))
                D = lnxbar - meanlnxi
                alpha_hat = (1.0 + np.sqrt(1.0+4.0*D/3.0)) / (4.0*D)
                beta_hat = pmean / alpha_hat
                if pflag == True: print('   single distribution alpha, beta = ', alpha_hat, beta_hat)
            weights[0,jy,ix] = 1.0
            alpha[0,jy,ix] = alpha_hat
            beta[0,jy,ix] = beta_hat
            weights[1:,jy,ix] = 0.0
            alpha[1:,jy,ix] = 1.0
            beta[1:,jy,ix] = 1.0
            nmixture[jy,ix] = 1
            Dnstat1a[jy,ix] = 0.01  # a fictitious value, but precip so low, it doesn't matter
            

        # ----- sometimes, by using the previous grid point's estimate as a prior mixture, it can
        #       go off into non-sensical parameter space.   Check Dn for this, and if so, initiate 
        #       a final 3-parameter mixture *without* using prior grid point's saved values.

        if Dnstat1a[jy,ix] > excessive_threshold and \
        Dnstat2a[jy,ix] > excessive_threshold and \
        Dnstat3a[jy,ix] > excessive_threshold:
        
            mop_up = True
            print ('   mopping up! ')
            three_parameter_fail, weights, alpha, beta, Dnstat3mop,\
                weight_save_3mop, alpha_save_3mop, beta_save_3mop, nmixture = \
                three_parameter_gamma(jy, ix, precip_ens_nonzero, \
                empirical_precipvals, empirical_CDF, weights, \
                alpha, beta, i1, pflag, weight_save_3mop, alpha_save_3mop, \
                beta_save_3mop, nmixture, fraction_zero, nstride, mop_up, \
                Dnstat3, excessive_threshold)

            if Dnstat3mop < Dnstat3a[jy,ix]:
                Dnstat3 = Dnstat3mop
                weight_save_3m = weight_save_3mop
                alpha_save_3m = alpha_save_3mop
                beta_save_3m = beta_save_3mop
                                
            weights, alpha, beta, nmixture = decide_which_mixture(\
                jy, ix, weights, alpha, beta, nmixture, pflag, \
                Dnstat1, Dnstat2, Dnstat3, alpha_save_1m, beta_save_1m, \
                weight_save_1m, alpha_save_2m, beta_save_2m, \
                weight_save_2m, alpha_save_3m, beta_save_3m, \
                weight_save_3m)
            
# --- save to cPickle file

outfile = master_directory + cmonth+'_'+cdomain+\
    '_MSWEP_apcp_gamma_parameters_h' + cend_hour +\
    '_'+cjstart+'_to_'+cjend+'.cPick'
print ('writing to ', outfile)
ouf = open(outfile, 'wb')
#cPickle.dump(weights[:,ijstart:ijend,:], ouf)
#cPickle.dump(alpha[:,ijstart:ijend,:], ouf)
#cPickle.dump(beta[:,ijstart:ijend,:], ouf)
#cPickle.dump(fzero[ijstart:ijend,:], ouf)
#cPickle.dump(Dnstat1a[ijstart:ijend,:], ouf)
#cPickle.dump(Dnstat2a[ijstart:ijend,:], ouf)
#cPickle.dump(Dnstat3a[ijstart:ijend,:], ouf)
#cPickle.dump(nmixture[ijstart:ijend,:], ouf)

cPickle.dump(weights[:,:,:], ouf)
cPickle.dump(alpha[:,:,:], ouf)
cPickle.dump(beta[:,:,:], ouf)
cPickle.dump(fzero[:,:], ouf)
cPickle.dump(Dnstat1a[:,:], ouf)
cPickle.dump(Dnstat2a[:,:], ouf)
cPickle.dump(Dnstat3a[:,:], ouf)
cPickle.dump(nmixture[:,:], ouf)

ouf.close()



                                                  ._CDF_fitting_mswep_precip_spline.py                                                                000775  000765  000024  00000000472 14016263124 020025  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2       :                                      ATTR      :     "                      com.apple.lastuseddate#PS      (     com.macromates.bookmarks   0     com.macromates.selectionRange      6     com.macromates.visibleIndex  `         ( '94' )289:369284                                                                                                                                                                                                      CDF_fitting_mswep_precip_spline.py                                                                  000775  000765  000024  00000025700 14016263124 017611  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         """
CDF_fitting_mswep_precip_spline.py cmonth cend_hour

this python script is designed to spline fit an empirical CDF of 
precipitation. The script is tailored to MSWEP precipitation analyses over 
one of the National Digital Forecast Database domains for the National 
Blend of Models

Designed by Tom Hamill, NOAA, with help from Michael Scheuerer, Dec 2020

"""

import os, sys
from datetime import datetime
import numpy as np
import _pickle as cPickle
from netCDF4 import Dataset
import _pickle as cPickle
import scipy.stats as stats
from gammamix import gammamix_em
import scipy.stats as stats
from scipy.interpolate import LSQUnivariateSpline, splrep, splev

# =====================================================================

def set_domain_boundaries(cdomain):
    
    """ used grib file of 2.5-km blend output grid to determine bounding 
        lat and lon, and from that, the domain bounding indices for the 
        0.25 GEFSv12 reforecast data that will encompass the domain.    
    """
    if cdomain == 'conus': 
        jmin = 93
        jmax = 246
        imin = 368
        imax = 686
    elif cdomain == 'pr':
        jmin = 243
        jmax = 256
        imin = 649
        imax = 667   
    elif cdomain == 'ak':
        jmin = 19
        jmax = 161
        imin = 201
        imax = 967
    else:
        print ('invalid domain.  Exiting.')     
        sys.exit()    
 
    return jmin, jmax, imin, imax
 
# =====================================================================

def find_nearest(vec, value):
    
    """ given a vector vec and a particular value, find the index in vec
    that is nearest to value"""
    
    idx = np.abs(vec-value).argmin()
    return idx

# =====================================================================

def fraczero_possamps(nsamps, precip_ens):
    
    """
    
    from the vector input sample precip_ens, define the fraction of
    samples with zero precipitation.   For the positive samples, add
    a small random number to deal with the fact that the data was 
    discretized to ~0.1 mm, so that when later creating CDFs we don't 
    have empirical values with lots of tied amounts.  Also, sort the 
    nonzero amounts and return.
    
    """
    
    precip_ens_nonzero = np.delete(precip_ens, \
        np.where(precip_ens <= 0.0))  # censor at 0.0 mm
    precip_ens_nonzero = precip_ens_nonzero + \
        np.random.uniform(low=-0.01,high=0.01,size=len(precip_ens_nonzero))
    precip_ens_nonzero = np.delete(precip_ens_nonzero, \
        np.where(precip_ens_nonzero <= 0.0))  # censor at 0.0 mm
    nz = len(precip_ens_nonzero)    
        
    #precip_ens_nonzero = precip_ens_nonzero + \
    #    np.random.uniform(low=-0.005,high=0.005,size=nz) 
    precip_ens_nonzero = np.sort(precip_ens_nonzero)  
    ntotal = len(precip_ens)
    nzero = ntotal - len(precip_ens_nonzero)
    fraction_zero = float(nzero) / float(ntotal)
    
    return fraction_zero, precip_ens_nonzero, nz
    
# =====================================================================
    
# ---- inputs from command line

cmonth = sys.argv[1] # '01', '02' etc.
cend_hour = sys.argv[2] # 06, 12, 18, 00 -- end hour of 6-h period
imonth = int(cmonth) - 1
nstride = 1
cdomain = 'conus'
excessive_threshold = 0.035

# ---- set parameters

pflag = False # for print statements
master_directory = '/Volumes/Backup Plus/mswep/'
ndaysomo = [31, 28, 31,  30, 31, 30,  31, 31, 30,  31, 30, 31]
ndaysomo_leap = [31, 28, 31,  30, 31, 30,  31, 31, 30,  31, 30, 31]

cmonths_early = ['12','01','02','03','04','05','06','07','08','09','10','11']
cmonths_late =  ['02','03','04','05','06','07','08','09','10','11','12','01']

# ---- determine the overall number of daily precipitation 
#      samples across all years for this month

iearly = int(cmonths_early[imonth])-1
ilate = int(cmonths_late[imonth])-1

if imonth != 1:  # not Feb
    nsamps_mid = ndaysomo[imonth]*20
else:
    nsamps_mid = 6*ndaysomo_leap[imonth] + 14*ndaysomo[imonth]
if iearly != 1:  # not Feb    
    nsamps_early = ndaysomo[iearly]*20
else:
    nsamps_early = 6*ndaysomo_leap[iearly] + 14*ndaysomo[iearly]
if ilate != 1:  # not Feb    
    nsamps_late = ndaysomo[ilate]*20
else:
    nsamps_late = 6*ndaysomo_leap[ilate] + 14*ndaysomo[ilate]
nsamps = nsamps_mid + nsamps_early + nsamps_late
print ('nsamps = ', nsamps)

# ---- read in the previously generated netCDF file with precipitation
#      for this month and lead time as well as the surrounding
#      two months.  All dates for this month have
#      been smushed into one leading index, dimension nsamps,
#      since the date of the forecast within the month and 
#      the member number is irrelevant for the distribution 
#      fitting.
   
ktr = 0
for iyear in range(2000,2020):
    for cmo in [cmonth, cmonths_early[imonth], cmonths_late[imonth]]:
        imo = int(cmo)-1
        if iyear%4 == 0:
            ndays = ndaysomo_leap[imo]
        else:
            ndays = ndaysomo[imo]
        cyear = str(iyear)    
        infile = master_directory + cyear + cmo + \
            '_on_ndfd_grid_6hourly.nc'
        print (iyear, infile, ndays)
        nc = Dataset(infile)
        yyyymmddhh_end = nc.variables['yyyymmddhh_end'][:]
        #print ('yyyymmddhh_end = ', yyyymmddhh_end)
        for iday in range(1,ndays+1):
            if iday < 10:
                cday = '0'+str(iday)
            else:
                cday = str(iday)
            iyyyymmddhh = int(str(iyear)+cmo+cday+cend_hour)
            #print (iyyyymmddhh)
            idx = np.where(yyyymmddhh_end == iyyyymmddhh)[0]
            precip_in = np.squeeze(nc.variables['apcp_anal'][idx,:,:])
            if iyear == 2000 and iday == 1 and cmo == cmonth:
                nyin, nxin = np.shape(precip_in)
                precip_tseries = np.zeros((nsamps,nyin,nxin), \
                    dtype=np.float64)
                lons = nc.variables['lons'][:,:]
                lats = nc.variables['lats'][:,:]
            precip_tseries[ktr,:,:] = precip_in[:,:]
            ktr = ktr+1
        nc.close()


# ---- loop over the grid points and estimate the spline coefficients 

now = datetime.now()
begin_time = now.strftime("%H:%M:%S")

spline_info = np.zeros((nyin,nxin,2,17), dtype=np.float64) 
spline_info_inv = np.zeros((nyin,nxin,2,17), dtype=np.float64) 
indices_to_query = np.zeros((nyin,nxin,9), dtype=np.float16)
Dnstat = 0.10*np.ones((nyin, nxin), dtype=np.float)
fzero = np.zeros((nyin, nxin), dtype=np.float)
cdf_at_indices = np.asarray([ 0.1, 0.25, 0.33333, 0.5, 0.65, 0.8, 0.85, 0.9, 0.95])

for jy in range(0,nyin,nstride):
#for jy in range(0, 1):

    now = datetime.now()
    current_time = now.strftime("%H:%M:%S")
    print ('***** begin time, current time, jy, nyin, lat = ',\
        begin_time, current_time, jy, nyin, lats[jy,0])
            
    for ix in range(0,nxin,nstride):
    #for ix in range (492,493):
        
        # ---- there is a grib round-off error that can give negative
        #      values slightly smaller than teeny precip.  to make sure 
        #      that we don't have either negative values or lots of the 
        #      same tiny values, subtractoff teeny_precip
        
        
        precip_ens_1d = precip_tseries[:,jy,ix]
        tp = np.min( [np.abs(np.min(precip_ens_1d)), 0.0] )
        teeny_precip = tp*np.ones(nsamps)
        precip_ens_1d = precip_ens_1d - teeny_precip[:]
        fraction_zero, precip_ens_nonzero, nz = \
            fraczero_possamps(nsamps, precip_ens_1d) # return sorted
        if pflag == True: print ('   precip_ens_nonzero[0:-1] = ',\
            precip_ens_nonzero[0:-1])
        fzero[jy,ix] = fraction_zero 
        empirical_cdf = 1.0/(2.0*nz) + np.arange(nz)/nz   
        if nz > 40:
            
            # ---- spline fit the CDF to the precipitation values via 
            #      Michael Scheuerer's hazard function (see Fig 3 in
            #      https://doi.org/10.1175/MWR-D-20-0096.1. )
            
            query_these_indices = [ nz//10, nz//4, nz//3, nz//2, (3*nz)//5, \
                (4*nz)//5, (17*nz)//20, (9*nz)//10, (19*nz)//20]
            indices_to_query[jy,ix,:] = query_these_indices[:]
            empirical_precipvals = precip_ens_nonzero[query_these_indices]
            hazard_function_empirical = -np.log(1.0-empirical_cdf)    
            spltemp = splrep(precip_ens_nonzero, hazard_function_empirical, \
                xb=0., task=-1, t = empirical_precipvals)   
            spline_hazard = splev(precip_ens_nonzero, spltemp)
            spline_cdf = 1.0 - np.exp(-spline_hazard)
            
            # ---- save spline information to numpy array, 
        
            spline_info[jy,ix,0,:] = spltemp[0]
            spline_info[jy,ix,1,:] = spltemp[1]
            
            # ---- in the subsequent quantile mapping, we will want the
            #      analyzed precipitation amount given the quantile.
            #      Accordingly, let's also reverse the data in the spline
            #      and get the spline fits of precipitation amount to the cdf.
            
            hazard_function_at_indices = -np.log(1.0-cdf_at_indices)
            spltemp_inv = splrep(hazard_function_empirical, precip_ens_nonzero, \
                xb=0., task=-1, t = cdf_at_indices)
            
            spline_info_inv[jy,ix,0,:] = spltemp_inv[0]
            spline_info_inv[jy,ix,1,:] = spltemp_inv[1]   

            # --- evaluate Dn statistic, goodness of fit.
            
            diff = np.abs(empirical_cdf - spline_cdf)
            Dnstat[jy,ix] = np.max(diff) 
        
        else:
            
            # ---- too few samples; fit a single-parameter Gamma using Thom 
            #      estimator described in Wilks textbook, Statistical
            #      Methods in the Atmospheric Sciences.
            
            pmean = np.mean(precip_ens_nonzero)
            lnxbar = np.log(pmean)
            meanlnxi = np.mean(np.log(precip_ens_nonzero))
            D = lnxbar - meanlnxi
            alpha_hat = (1.0 + np.sqrt(1.0+4.0*D/3.0)) / (4.0*D)
            beta_hat = pmean / alpha_hat
            indices_to_query[jy,ix,:] = -1 # flag for using Gamma
            spline_info[jy,ix,0,:] = alpha_hat  # smoosh into the spline array
            spline_info[jy,ix,1,:] = beta_hat # smoosh into the spline array
            
            # --- evaluate Dn statistic, goodness of fit.
            
            y0 = precip_ens_nonzero / beta_hat
            fitted_CDF = stats.gamma.cdf(y0, alpha_hat)
            diff = np.abs(empirical_cdf - fitted_CDF)
            Dnstat[jy,ix] = np.max(diff) 
            
# --- save to cPickle file

outfile = master_directory + cmonth+'_'+cdomain+\
    '_MSWEP_spline_info_h' + cend_hour + '.cPick'
print ('writing to ', outfile)
ouf = open(outfile, 'wb')
cPickle.dump(spline_info, ouf)
cPickle.dump(spline_info_inv, ouf)
cPickle.dump(fzero, ouf)
cPickle.dump(indices_to_query, ouf)
ouf.close()

outfile = master_directory + cmonth+'_'+cdomain+\
    '_MSWEP_Dnstat_h' + cend_hour + '.cPick'
print ('writing to ', outfile)
ouf = open(outfile, 'wb')
cPickle.dump(Dnstat, ouf)
cPickle.dump(lons, ouf)
cPickle.dump(lats, ouf)
ouf.close()





                                                                ._CDF_fitting_mswep_precip_spline_v2.py                                                             000775  000765  000024  00000000472 14021772147 020442  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2       :                                      ATTR      :     "                      com.apple.lastuseddate#PS      (     com.macromates.bookmarks   0     com.macromates.selectionRange      6     com.macromates.visibleIndex  `         ( '56' )119:433812                                                                                                                                                                                                      CDF_fitting_mswep_precip_spline_v2.py                                                               000775  000765  000024  00000036507 14021772147 020235  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         def CDF_fitting_mswep_precip_spline_v2(cmonth, cend_hour):

    """
    CDF_fitting_mswep_precip_spline.py cmonth cend_hour
    
    where cmonth = '01' to '12' and cend_hour is '00','06','12', or '18'
    
    this python script is designed to spline fit an empirical CDF of 6-h accumulated
    precipitation. The script is tailored to merged CCPA/MSWEP precipitation  
    analyses over the National Digital Forecast Database CONUS domain for the National 
    Blend of Models

    Designed by Tom Hamill, NOAA, with help from Michael Scheuerer, mar 2021

    """

    import os, sys
    from datetime import datetime
    import numpy as np
    import _pickle as cPickle
    from netCDF4 import Dataset
    import _pickle as cPickle
    import scipy.stats as stats
    import scipy.stats as stats
    from scipy.interpolate import splrep, splev
 
    # =====================================================================

    def fraczero_possamps(nsamps, precip_samples):
    
        """
    
        from the vector input sample precip_samples, define the fraction of
        samples with effectively zero precipitation. Add a 
        small random number to deal with the fact that the data was 
        discretized to ~0.1 mm, so that when later creating CDFs we don't 
        have empirical values with lots of tied amounts.  Also, sort the 
        nonzero amounts and return.
    
        """

        precip_samples_nonzero = np.delete(precip_samples, \
            np.where(precip_samples <= 0.0))  
        precip_samples_nonzero = precip_samples_nonzero + \
            np.random.uniform(low=-0.01,high=0.01,size=len(precip_samples_nonzero))
        precip_samples_nonzero = np.delete(precip_samples_nonzero, \
            np.where(precip_samples_nonzero <= 0.0))  # censor at 0.0 mm
        nz = len(precip_samples_nonzero)   # number non-zero 
        
        precip_samples_nonzero = np.sort(precip_samples_nonzero)  
        ntotal = len(precip_samples)
        nzero = ntotal - len(precip_samples_nonzero)
        fraction_zero = float(nzero) / float(ntotal)
    
        return fraction_zero, precip_samples_nonzero, nz
    
    # =====================================================================

    # ---- set directories, constants
    
    print ('cmonth, cend_hour = ', cmonth, cend_hour)
    imonth = int(cmonth) - 1
    nstride = 1 # do every point
    cdomain = 'conus'
    pflag = False # for print statements
    #master_directory = '/Volumes/Backup Plus/ccpa/'
    master_directory = '/Volumes/NBM/'+cdomain+'_panal'
    master_directory_out = '/Volumes/NBM/'+cdomain+'_panal/CDF_spline/'
    ndaysomo = [31, 28, 31,  30, 31, 30,  31, 31, 30,  31, 30, 31]
    ndaysomo_leap = [31, 28, 31,  30, 31, 30,  31, 31, 30,  31, 30, 31]
    cmonths_early = ['12','01','02','03','04','05','06','07','08','09','10','11']
    cmonths_late =  ['02','03','04','05','06','07','08','09','10','11','12','01']
    
    cmonths = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', \
        'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
        
    yearstart = 2002 # CCPA only available starting 2002
    yearend = 2020 # companion reforecasts end at end of 2019

    # ---- determine the overall number of daily precipitation 
    #      samples across all years for this month and the surrounding
    #      two months

    iearly = int(cmonths_early[imonth])-1
    ilate = int(cmonths_late[imonth])-1

    if imonth != 1:  # not Feb
        nsamps_mid = ndaysomo[imonth]*18
    else:
        nsamps_mid = 4*ndaysomo_leap[imonth] + 14*ndaysomo[imonth]
    
    if iearly != 1:  # not Feb    
        nsamps_early = ndaysomo[iearly]*20
    else:
        nsamps_early = 4*ndaysomo_leap[iearly] + 14*ndaysomo[iearly]
    if ilate != 1:  # not Feb    
        nsamps_late = ndaysomo[ilate]*20
    else:
        nsamps_late = 4*ndaysomo_leap[ilate] + 14*ndaysomo[ilate]
    nsamps = nsamps_mid + nsamps_early + nsamps_late
    print ('nsamps = ', nsamps)

    # ---- read in the previously generated netCDF file with precipitation
    #      for this month and lead time as well as the surrounding
    #      two months.  All dates for this month have
    #      been smushed into one leading index, dimension nsamps,
    #      since the date of the forecast within the month and 
    #      the member number is irrelevant for the distribution 
    #      fitting.
   
    ktr = 0
    for iyear in range(yearstart, yearend):
    
        # --- loop over the month in question and the surrounding 2 months
    
        for cmo in [cmonth, cmonths_early[imonth], cmonths_late[imonth]]:
            imo = int(cmo)-1
            if iyear%4 == 0:
                ndays = ndaysomo_leap[imo]
            else:
                ndays = ndaysomo[imo]
            cyear = str(iyear)    
            #infile = master_directory + cyear + cmo + \
            #    '_ccpa_on_ndfd_grid_6hourly.nc'
            infile = master_directory + cyear + cmo + \
                '_mswep_on_ndfd_grid_6hourly.nc'
                
            print (infile)
            nc = Dataset(infile)
            yyyymmddhh_end = nc.variables['yyyymmddhh_end'][:]
            for iday in range(1,ndays+1):
                if iday < 10:
                    cday = '0'+str(iday)
                else:
                    cday = str(iday)
                iyyyymmddhh = int(str(iyear)+cmo+cday+cend_hour)
                print (yyyymmddhh_end)
                print (iyyyymmddhh)
                idx = np.where(yyyymmddhh_end == iyyyymmddhh)[0]
                precip_in = np.squeeze(nc.variables['apcp_anal'][idx,:,:])
                if iyear == 2002 and iday == 1 and cmo == cmonth:
                    nlats_ndfd, nlons_ndfd = np.shape(precip_in)
                    precip_tseries = np.zeros((nsamps,nlats_ndfd,nlons_ndfd), \
                        dtype=np.float64)
                    missingv = -99.99*np.ones((nlats_ndfd, nlons_ndfd), \
                        dtype=np.float64)
                    lons = nc.variables['lons'][:,:]
                    lats = nc.variables['lats'][:,:]
                precip_in = np.where(precip_in < 500., precip_in, missingv)
                precip_tseries[ktr,:,:] = precip_in[:,:]
                ktr = ktr+1
            nc.close()

    # ---- loop over the grid points and estimate the inverse spline coefficients 

    now = datetime.now()
    begin_time = now.strftime("%H:%M:%S")

    spline_info = np.zeros((nlats_ndfd,nlons_ndfd,2,17), dtype=np.float64)
    spline_info_inv = np.zeros((nlats_ndfd,nlons_ndfd,2,17), dtype=np.float64)
    usegamma = np.zeros((nlats_ndfd,nlons_ndfd), dtype=np.int32) 
        # flag for whether to use Gamma fit (1) or spline (0) or missing data (-1)
    fzero = np.zeros((nlats_ndfd, nlons_ndfd), dtype=np.float)
    cdf_at_indices = np.asarray([ 0.1, 0.25, 0.33333, 0.5, 0.65, 0.8, 0.85, 0.9, 0.95])
        # these are interior knots where to calculate the cum hazard fn.

    print ('******** COMPUTING SPLINE COEFFICIENTS (wetter) or GAMMA PARAMETERS (dry) *********')
    for jy in range(0, nlats_ndfd, nstride):
    #for jy in range(498,499):

        now = datetime.now()
        current_time = now.strftime("%H:%M:%S")
        if jy%10 == 0: print ('   ***** begin time, current time, jy, nlats_ndfd, lat = ',\
            begin_time, current_time, jy, nlats_ndfd, lats[jy,0])
            
        for ix in range(0,nlons_ndfd, nstride):
        #for ix in range (521,522):
        
            # ---- there is a grib round-off error that can give negative
            #      values slightly smaller than teeny precip.  to make sure 
            #      that we don't have either negative values or lots of the 
            #      same tiny values, subtractoff teeny_precip
        
            if pflag == True: print ('******* ',jy,ix)
            precip_samples_1d = precip_tseries[:,jy,ix]
            
            # take this grid point's sample, calc fraction_zero, and return
            # the number of nonzero (nz) samples and their sorted values
            fraction_zero, precip_samples_nonzero, nz = \
                fraczero_possamps(nsamps, precip_samples_1d) # return sorted
            if pflag == True and nz > 0 : print ('   precip_samples_nonzero[0:-1] = ',\
                precip_samples_nonzero[0:-1])
            if pflag == True: print ('   nz = ',nz)
            if nz > 40:
            
                # ---- spline fit the CDF to the precipitation values via 
                #      Michael Scheuerer's hazard function (see Fig 3 in
                #      https://doi.org/10.1175/MWR-D-20-0096.1. )
            
                usegamma[jy,ix] = 0 # a flag to use spline inverse
                fzero[jy,ix] = fraction_zero 
                empirical_cdf = 1.0/(2.0*nz) + np.arange(nz)/nz
                # ---- set where spline knots are.
                query_these_indices = [ nz//10, nz//4, nz//3, nz//2, (3*nz)//5, \
                    (4*nz)//5, (17*nz)//20, (9*nz)//10, (19*nz)//20]
                empirical_precipvals = precip_samples_nonzero[query_these_indices]
                hazard_function_empirical = -np.log(1.0 - empirical_cdf)  
            
                # ---- in the subsequent quantile mapping, we will want the
                #      analyzed precipitation amount given the quantile.
                #      Accordingly, let's reverse the data in the spline
                #      and get the spline fits of precipitation amount 
                #      (y) to the cdf (x).
            
                hazard_function_at_indices = -np.log(1.0-cdf_at_indices)
                spltemp_inv = splrep(hazard_function_empirical, precip_samples_nonzero, \
                    xb=0., task=-1, t = cdf_at_indices)
                spline_info_inv[jy,ix,0,:] = spltemp_inv[0]
                spline_info_inv[jy,ix,1,:] = spltemp_inv[1]
                
                # ---- spline fit to anal CDF with the focus on knots at higher quantiles.
                #      these data are saved not for quantile mapping but for diagnostics.

                spltemp = splrep(precip_samples_nonzero, hazard_function_empirical, \
                    xb=0., task=-1, t = empirical_precipvals)   
                spline_info[jy,ix,0,:] = spltemp[0]
                spline_info[jy,ix,1,:] = spltemp[1]
            
            elif nz < 10:
            
                # --- over the whole training sample, there were not enough 
                #     nonzero precip values recorded.   Flag this as missing data.
                
                usegamma[jy,ix] = -1            
                spline_info_inv[jy,ix,0,:] = -99.99
                spline_info_inv[jy,ix,1,:] = -99.99
                fzero[jy,ix] = -99.99

            else:
            
                # ---- too few samples to use splines; fit a single-parameter Gamma 
                #      using Thom (1958) estimator described in Wilks textbook,
                #      Statistical Methods in the Atmospheric Sciences.

                usegamma[jy,ix] = 1            
                empirical_cdf = 1.0/(2.0*nz) + np.arange(nz)/nz
                fzero[jy,ix] = fraction_zero 
                pmean = np.mean(precip_samples_nonzero)
                lnxbar = np.log(pmean)
                if pflag == True: print ('pmean, lnxbar = ', pmean, lnxbar)
                meanlnxi = np.mean(np.log(precip_samples_nonzero))
                if pflag == True: print ('meanlnxi = ', meanlnxi)
                D = lnxbar - meanlnxi
                alpha_hat = (1.0 + np.sqrt(1.0+4.0*D/3.0)) / (4.0*D)
                beta_hat = pmean / alpha_hat
                if pflag == True: print('alpha_hat, beta_hat = ', alpha_hat, beta_hat)
                spline_info_inv[jy,ix,0,:] = alpha_hat  # smoosh into the spline array
                spline_info_inv[jy,ix,1,:] = beta_hat # smoosh into the spline array   

    # ---- save to netCDF file

    outfile = master_directory_out + cmonths[imonth]+'_'+cdomain+\
        '_MSWEP_spline_info_h' + cend_hour + 'UTC.nc' 
    print ('writing to ',outfile)
    ncout = Dataset(outfile,'w',format='NETCDF4_CLASSIC')

    xf = ncout.createDimension('xf',nlons_ndfd)
    xvf = ncout.createVariable('xf','i4',('xf',))
    xvf.long_name = "eastward grid point number on NDFD grid"
    xvf.units = "n/a"

    yf = ncout.createDimension('yf',nlats_ndfd)
    yvf = ncout.createVariable('yf','i4',('yf',))
    yvf.long_name = "northward grid point number on NDFD grid"
    yvf.units = "n/a"

    xspd = ncout.createDimension('xspd',17)
    xspdf = ncout.createVariable('xspd','i4',('xspd',))
    xspdf.long_name = "index for spline dimension"
    xspdf.units = "n/a"

    x2 = ncout.createDimension('x2',2)
    x2f = ncout.createVariable('x2','i4',('x2',))
    x2f.long_name = "2nd index for spline dimension"
    x2f.units = "n/a"

    lonsa = ncout.createVariable('lons','f4',('yf','xf',))
    lonsa.long_name = "longitude"
    lonsa.units = "degrees_east"

    latsa = ncout.createVariable('lats','f4',('yf','xf',))
    latsa.long_name = "latitude"
    latsa.units = "degrees_north"

    spline_info_inv_out = ncout.createVariable('spline_info_inv',\
        'f4',('yf','xf','x2','xspd'),
        zlib=True,least_significant_digit=6)
    spline_info_inv_out.units = "n/a"
    spline_info_inv_out.long_name = \
        "Information for computing quantile-mapped precipitation from "+\
        "spline inverse (or Gamma CDF for dry points).   When given a "+\
        "forecast quantile, this will predict the analyzed precipitation amt. "+\
        "x2=0 is for knots, x2=1 for spline coefficients.  Splines used "+\
        "only if there are sufficient samples, > 40.   If the sample size is "+\
        "between 10 and 40, fit a Gamma distribution instead, and insert the "+\
        "alpha and beta parameters into this variable, alpha in x2=0, "+\
        "beta in x2=1.  If less than 10 samples, don't try to do anything."
    spline_info_inv_out.missing_value = \
        np.array(-9999.99,dtype=np.float32)
          
    spline_info_out = ncout.createVariable('spline_info',\
        'f4',('yf','xf','x2','xspd'),
        zlib=True,least_significant_digit=6)
    spline_info_out.units = "n/a"
    spline_info_out.long_name = \
        "Information for computing precipitation from spline "+\
        "Diagnostic, and only valid at points that are reasonably moist."
    spline_info_out.missing_value = \
        np.array(-9999.99,dtype=np.float32)

    fzero_out = ncout.createVariable('fzero','f4',('yf','xf',),
        zlib=True,least_significant_digit=6)
    fzero_out.units = "n/a"
    fzero_out.long_name = "fraction_zero"
    fzero_out.missing_value = np.array(-9999.99,dtype=np.float32)

    usegamma_out = ncout.createVariable('usegamma',\
        'i4',('yf','xf',), zlib=True)
    usegamma_out.units = "n/a"
    usegamma_out.long_name = "1 if fit CDF via Gamma distribution, 0 if not"
    usegamma_out.missing_value = np.array(-99,dtype=np.int32)

    # ---- metadata

    ncout.title = "NDFD domain spline inverse coefficients / Gamma parameters "
    ncout.history = "from CDF fitting code by Tom Hamill, PSL"
    ncout.institution =  "psl.noaa.gov"
    ncout.platform = "n/a"
    ncout.references = "n/a"
    
    # ---- copy the outputs to netCDF data structures.

    xvf[:] = range(nlons_ndfd)
    yvf[:] = range(nlats_ndfd)
    xspdf[:] = range(17)
    x2f[:] = range(2)
    lonsa[:] = lons[:,:]
    latsa[:] = lats[:,:]
    spline_info_inv_out[:] = spline_info_inv[:,:,:,:]
    spline_info_out[:] = spline_info[:,:,:,:]
    fzero_out[:] = fzero[:,:]
    usegamma_out[:] = usegamma[:,:]
    
    # ---- close the netCDF file

    ncout.close()


    istat = 0
    return istat                                                                                                                                                                                         ._CDF_spline_fitting_forecast_precip.py                                                             000775  000765  000024  00000000475 14016263126 020505  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2       =                                      ATTR      =     %                      com.apple.lastuseddate#PS      (     com.macromates.bookmarks   3     com.macromates.selectionRange      9     com.macromates.visibleIndex  `    7     ( '474:5' )277:468951                                                                                                                                                                                                   CDF_spline_fitting_forecast_precip.py                                                               000775  000765  000024  00000045002 14016263126 020263  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         def CDF_spline_fitting_forecast_precip(cmonth, clead, cdomain):

    """
    CDF_spline_fitting_forecast_precip.py cmonth clead cdomain

    fit cubic spline to cumulative hazard function of precipitation and save
    for fitting of precipitation CDFs.

    Designed by Tom Hamill, NOAA, with help from Michael Scheuerer, Dec 2020

    """

    import os, sys
    from datetime import datetime
    import numpy as np
    import _pickle as cPickle
    from netCDF4 import Dataset
    import _pickle as cPickle
    import scipy.stats as stats
    from scipy.interpolate import LSQUnivariateSpline, splrep, splev
    import matplotlib.pyplot as plt
    from matplotlib import rcParams
    from mpl_toolkits.basemap import Basemap, interp
    from mpl_toolkits.axes_grid1 import make_axes_locatable

    # =====================================================================

    def set_domain_boundaries(cdomain):
    
        """ used grib file of 2.5-km blend output grid to determine bounding 
            lat and lon, and from that, the domain bounding indices for the 
            0.25 GEFSv12 reforecast data that will encompass the domain.    
        """
        if cdomain == 'conus': 
            jmin = 93
            jmax = 246
            imin = 368
            imax = 686
        elif cdomain == 'pr':
            jmin = 243
            jmax = 256
            imin = 649
            imax = 667   
        elif cdomain == 'ak':
            jmin = 19
            jmax = 161
            imin = 201
            imax = 967
        else:
            print ('invalid domain.  Exiting.')     
            sys.exit()    
 
        return jmin, jmax, imin, imax

    # =====================================================================

    def get_surrounding_months(cmonth):

        if cmonth == 'Jan':
            cmonth_early = 'Dec'
            cmonth_late = 'Feb'
        elif cmonth == 'Feb':
            cmonth_early = 'Jan'
            cmonth_late = 'Mar'
        elif cmonth == 'Mar':
            cmonth_early = 'Feb'
            cmonth_late = 'Apr'
        elif cmonth == 'Apr':
            cmonth_early = 'Mar'
            cmonth_late = 'May'
        elif cmonth == 'May':
            cmonth_early = 'Apr'
            cmonth_late = 'Jun'
        elif cmonth == 'Jun':
            cmonth_early = 'May'
            cmonth_late = 'Jul'
        elif cmonth == 'Jul':
            cmonth_early = 'Jun'
            cmonth_late = 'Aug'
        elif cmonth == 'Aug':
            cmonth_early = 'Jul'
            cmonth_late = 'Sep'
        elif cmonth == 'Sep':
            cmonth_early = 'Aug'
            cmonth_late = 'Oct'
        elif cmonth == 'Oct':
            cmonth_early = 'Sep'
            cmonth_late = 'Nov'
        elif cmonth == 'Nov':
            cmonth_early = 'Oct'
            cmonth_late = 'Dec'
        elif cmonth == 'Dec':
            cmonth_early = 'Nov'
            cmonth_late = 'Jan'
        else:
            print ('invalid month')
            sys.exit()
    
        return cmonth_early, cmonth_late

    # =====================================================================

    def fraczero_possamps(nsamps, precip_ens):
    
        """
    
        from the vector input sample precip_ens, define the fraction of
        samples with zero precipitation.   For the positive samples, add
        a small random number to deal with the fact that the data was 
        discretized to ~0.1 mm, so that when later creating CDFs we don't 
        have empirical values with lots of tied amounts.  Also, sort the 
        nonzero amounts and return.
    
        """
        number_zeros = 0
    
        # data discretized, so add random component of this magnitude
        #precip_ens = np.where(precip_ens < 4.0, precip_ens* \
        #    np.random.uniform(low=-0.5,high=1.5,size=len(precip_ens)), precip_ens)
    
        precip_ens_nonzero = np.delete(precip_ens, \
            np.where(precip_ens <= 0.0))  # censor at 0.0 mm
        precip_ens_nonzero = precip_ens_nonzero + \
            np.random.uniform(low=-0.1,high=0.1,size=len(precip_ens_nonzero))
        precip_ens_nonzero = np.delete(precip_ens_nonzero, \
            np.where(precip_ens_nonzero <= 0.0))  # censor at 0.0 mm
        nz = len(precip_ens_nonzero)    
        
        #precip_ens_nonzero = precip_ens_nonzero + \
        #    np.random.uniform(low=-0.005,high=0.005,size=nz) 
        precip_ens_nonzero = np.sort(precip_ens_nonzero)  
        ntotal = len(precip_ens)
        nzero = ntotal - len(precip_ens_nonzero)
        fraction_zero = float(nzero) / float(ntotal)
        return fraction_zero, precip_ens_nonzero, nz

    # =====================================================================
    
    # ---- inputs from command line

    #cmonth = sys.argv[1] # 'Jan', 'Feb', etc.
    #clead = sys.argv[2] # 03, 06, 12, etc.
    #cdomain = sys.argv[3] # conus, etc.
    cmonth_list = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']
    cmonthnum = ['01','02','03','03','04','05','06','07','08','09','10','11','12']
    print ('cmonth = ', cmonth)
    print (cmonth_list)
    imonth_index = cmonth_list.index(cmonth)

    # ---- set parameters

    jmin, jmax, imin, imax = set_domain_boundaries(cdomain)
    nstride = 1
    cmonth_early, cmonth_late = get_surrounding_months(cmonth)
    pflag = False # for print statements
    master_directory = '/Volumes/NBM/'+cdomain+'_gefsv12/precip/netcdf/'
    master_directory_out = '/Volumes/NBM/'+cdomain+'_gefsv12/CDF_spline/'
    nmembers = 5

    # ---- read in the previously generated netCDF file with precipitation
    #      for this month and lead time.  All members, dates for this 
    #      month have been smushed into one leading index, dimension
    #      nsamps, since the date of the forecast within the month and 
    #      the member number is irrelevant for the distribution fitting.
   
    ncfile = master_directory + cmonth + '_apcp_sfc_h' + clead + '.nc'
    print (ncfile)
    nc = Dataset(ncfile)
    precip_middle = nc.variables['apcp_fcst'][:,jmin:jmax,imin:imax]
    nsamps_middle, ny_gefsv12, nx_gefsv12 = np.shape(precip_middle)
    lons_1d = nc.variables['lons_fcst'][imin:imax]
    lats_1d = nc.variables['lats_fcst'][jmin:jmax]
    nc.close()

    ncfile = master_directory + cmonth_early + '_apcp_sfc_h' + clead + '.nc'
    print (ncfile)
    nc = Dataset(ncfile)
    precip_early = nc.variables['apcp_fcst'][:,jmin:jmax,imin:imax]
    nsamps_early, ny_gefsv12, nx_gefsv12 = np.shape(precip_early)
    nc.close()

    ncfile = master_directory + cmonth_late + '_apcp_sfc_h' + clead + '.nc'
    print (ncfile)
    nc = Dataset(ncfile)
    precip_late = nc.variables['apcp_fcst'][:,jmin:jmax,imin:imax]
    nsamps_late, ny_gefsv12, nx_gefsv12 = np.shape(precip_late)
    nc.close()

    nsamps = nsamps_middle + nsamps_early + nsamps_late


    # ---- more initialization of output storage arrays now that 
    #      we know the array dimensions

    fzero = np.zeros((ny_gefsv12, nx_gefsv12), dtype=np.float)
    Dnstat = 0.10*np.ones((ny_gefsv12, nx_gefsv12), dtype=np.float)
   
    # ---- loop over the grid points and estimate the Gamma distributions
    #      for each parameter.  First see if a single Gamma distribution
    #      is appropriate; if not, try a mixture of two.   If that still
    #      doesn't fit well, try a mixture of three.   

    now = datetime.now()
    begin_time = now.strftime("%H:%M:%S")
    plotit = False
 
    spline_info_fcst = np.zeros((17,2,ny_gefsv12,nx_gefsv12), dtype=np.float64) 
    usegamma_fcst = np.zeros((ny_gefsv12, nx_gefsv12), dtype=np.int32)
    #spline_info = np.zeros((ny_gefsv12,nx_gefsv12,2,9), dtype=np.float64) 
    indices_to_query_fcst = np.zeros((9,ny_gefsv12,nx_gefsv12), dtype=np.float16)
    empirical_precipvals_fcst = np.zeros((9,ny_gefsv12,nx_gefsv12), dtype=np.float16)
                   
    for jy in range(0,ny_gefsv12,nstride):
    #for jy in range(ny_gefsv12//2, ny_gefsv12//2+1):
        
        now = datetime.now()
        current_time = now.strftime("%H:%M:%S")
        print ('***** begin time, current time, jy, ny_gefsv12, lat = ',\
            begin_time, current_time, jy, ny_gefsv12, lats_1d[jy])        
        
        for ix in range(0,nx_gefsv12,nstride):
        #for ix in range (nx_gefsv12//2, nx_gefsv12//2+1):
            
            # ---- there is a grib round-off error that can give negative
            #      values slightly smaller than teeny precip.  to make sure 
            #      that we don't have either negative values or lots of the 
            #      same tiny values, subtractoff teeny_precip
        
            precip_ens_1d_middle = precip_middle[:,jy,ix]
            precip_ens_1d_early = precip_early[:,jy,ix]
            precip_ens_1d_late = precip_late[:,jy,ix]
            precip_ens_1d = np.concatenate((precip_ens_1d_middle, \
                precip_ens_1d_early, precip_ens_1d_late))
        
            tp = np.min( [np.abs(np.min(precip_ens_1d)), 0.0] )
            teeny_precip = tp*np.ones(nsamps)
            precip_ens_1d = precip_ens_1d - teeny_precip[:]

            fraction_zero, precip_ens_nonzero, nz = \
                fraczero_possamps(nsamps, precip_ens_1d) # return sorted
            fzero[jy,ix] = fraction_zero 
            #print ('fraction_zero = ', fraction_zero)
            #print ('nz = ', nz)
        
            if nz > 40:

                # ---- spline fit with the focus on knots at higher quantiles

                query_these_indices = [ nz//10, nz//4, nz//2, (3*nz)//5, \
                    (4*nz)//5, (17*nz)//20, (9*nz)//10, (19*nz)//20, (39*nz)//40]
                indices_to_query_fcst[:,jy,ix] = query_these_indices[:]
                empirical_precipvals = precip_ens_nonzero[query_these_indices]
                empirical_precipvals_fcst[:,jy,ix] = empirical_precipvals[:]
                #print ('empirical_precipvals = ', empirical_precipvals)
        
                empirical_cdf = 1.0/(2.0*nz) + np.arange(nz)/nz      
                hazard_function_empirical = -np.log(1.0-empirical_cdf)
                spltemp = splrep(precip_ens_nonzero, hazard_function_empirical, \
                    xb=0., task=-1, t = empirical_precipvals)   
                spline_hazard = splev(precip_ens_nonzero, spltemp)
                spline_cdf = 1.0 - np.exp(-spline_hazard)       
                diff = np.abs(empirical_cdf - spline_cdf)
        
                # ---- save spline information to numpy array
        
                spline_info_fcst[:,0,jy,ix] = spltemp[0]
                spline_info_fcst[:,1,jy,ix] = spltemp[1]
                Dnstat[jy,ix] = np.max(diff)
        
            elif nz < 10:
            
                # --- over the whole training sample, there were not enough 
                #     nonzero precip values recorded.   Flag this as missing data.
            
                spline_info_fcst[:,0,jy,ix] = -99.99
                spline_info_fcst[:,1,jy,ix] = -99.99
                indices_to_query_fcst[:,jy,ix] = -99 
                usegamma_fcst[jy,ix] = 0
                Dnstat[jy,ix] = 0.0      

            else:
            
                # ---- too few samples to use splines; fit a single-parameter Gamma 
                #      using Thom (1958) estimator described in Wilks textbook,
                #      Statistical Methods in the Atmospheric Sciences.
            
                empirical_cdf = 1.0/(2.0*nz) + np.arange(nz)/nz

                pmean = np.mean(precip_ens_nonzero)
                lnxbar = np.log(pmean)
                if pflag == True: print ('pmean, lnxbar = ', pmean, lnxbar)
                meanlnxi = np.mean(np.log(precip_ens_nonzero))
                if pflag == True: print ('meanlnxi = ', meanlnxi)
                D = lnxbar - meanlnxi
                alpha_hat = (1.0 + np.sqrt(1.0+4.0*D/3.0)) / (4.0*D)
                beta_hat = pmean / alpha_hat
                if pflag == True: print('alpha_hat, beta_hat = ', alpha_hat, beta_hat)
                indices_to_query_fcst[:,jy,ix] = -1 # flag for using Gamma
                spline_info_fcst[:,0,jy,ix] = alpha_hat  # smoosh into the spline array
                spline_info_fcst[:,1,jy,ix] = beta_hat # smoosh into the spline array
                usegamma_fcst[jy,ix] = 1
            
                # --- evaluate Dn statistic, goodness of fit.
            
                y0 = precip_ens_nonzero / beta_hat
                fitted_CDF = stats.gamma.cdf(y0, alpha_hat)
                diff = np.abs(empirical_cdf - fitted_CDF)
                Dnstat[jy,ix] = np.max(diff)
        

            if plotit == True:
            
                # ---- plot the CDFs, forecast and analyzed, empirical and best fitted.

                f = plt.figure(figsize=(6.5,6.5))#
                ax = f.add_axes([.13,.14,.84,.75])
                title = 'Spline fit of hazard function, jy, ix = '+str(jy)+','+str(ix)
                ax.set_title(title,fontsize=13)
                ax.plot(precip_ens_nonzero, spline_hazard,color='Blue',\
                    lw=2,label='Cubic spline')
                ax.plot(precip_ens_nonzero, hazard_function_empirical,\
                    color='Red',lw=2,label='Empirical')
                plt.ylabel('Hazard function',fontsize=11)
                ax.legend(loc=0)
                ax.set_ylim(-0.2,6.0)
                plt.grid(True,lw=0.25,color='LightGray')
                ax.set_xlim(0,40)
                #ax.set_xlim(0,5)
                ax.set_xlabel('6-hourly total precipitation (mm)',fontsize=11)
                figname = 'hazard_function_precip_example_spline'+str(jy)+'.png'
                plt.savefig(figname,dpi=400)
                print ('Plot done', figname)
                plt.close()

            
                f = plt.figure(figsize=(6.5,6.5))#
                ax = f.add_axes([.13,.14,.84,.75])
                title = 'Spline fit of CDF via hazard function, jy, ix = '+\
                    str(jy)+','+str(ix)
                ax.set_title(title,fontsize=13)
                ax.plot(precip_ens_nonzero, spline_cdf,color='Blue',\
                    lw=2,label='Cubic spline')
                ax.plot(precip_ens_nonzero, empirical_cdf,color='Red',\
                    lw=2,label='Empirical')
                plt.ylabel('Non-exceedance probability',fontsize=11)
                ax.legend(loc=0)
                ax.set_ylim(0.,1)
                plt.grid(True,lw=0.25,color='LightGray')
                ax.set_xlim(0,40)
                #ax.set_xlim(0,5)
                ax.set_xlabel('6-hourly total precipitation (mm)',fontsize=11)
                figname = 'CDF_precip_example_spline'+str(jy)+'.png'
                plt.savefig(figname,dpi=400)
                print ('Plot done', figname)
                plt.close() 
                #sys.exit()

           
    # --- save to cPickle files

    #outfile = master_directory + cmonth+'_'+cdomain+\
    #    '_GEFSv12_spline_info_h' + clead + '.cPick'
    #print ('writing to ', outfile)
    #ouf = open(outfile, 'wb')
    #cPickle.dump(spline_info, ouf)
    #cPickle.dump(fzero, ouf)
    #cPickle.dump(indices_to_query, ouf)
    #cPickle.dump(empirical_precipvals_array, ouf)
    #ouf.close()

    print ('max Dnstat = ', np.max(Dnstat))
    outfile = master_directory_out + cmonthnum[imonth_index]+'_'+cdomain+\
        '_GEFSv12_Dnstat_h' + clead + '.cPick'
    print ('writing to ', outfile)
    ouf = open(outfile, 'wb')
    cPickle.dump(Dnstat, ouf)
    cPickle.dump(lons_1d, ouf)
    cPickle.dump(lats_1d, ouf)
    ouf.close()

    # ---- save to netCDF file

    outfile = master_directory_out + cmonthnum[imonth_index]+'_'+cdomain+\
        '_GEFSv12_spline_info_h' + clead + '.nc' 
    print ('writing to ',outfile)
    ncout = Dataset(outfile,'w',format='NETCDF4_CLASSIC')

    xf = ncout.createDimension('xf',nx_gefsv12)
    xvf = ncout.createVariable('xf','i4',('xf',))
    xvf.long_name = "eastward grid point number on NDFD grid"
    xvf.units = "n/a"

    yf = ncout.createDimension('yf',ny_gefsv12)
    yvf = ncout.createVariable('yf','i4',('yf',))
    yvf.long_name = "northward grid point number on NDFD grid"
    yvf.units = "n/a"

    xspd = ncout.createDimension('xspd',17)
    xspdf = ncout.createVariable('xspd','i4',('xspd',))
    xspdf.long_name = "index for spline dimension"
    xspdf.units = "n/a"

    x2 = ncout.createDimension('x2',2)
    x2f = ncout.createVariable('x2','i4',('x2',))
    x2f.long_name = "2nd index for spline dimension"
    x2f.units = "n/a"

    x9 = ncout.createDimension('x9',9)
    x9f = ncout.createVariable('x9','i4',('x9',))
    x9f.long_name = "first dimension of nonzero_indices_of_knots"
    x9f.units = "n/a"

    lonsa = ncout.createVariable('lons','f4',('xf',))
    lonsa.long_name = "longitude"
    lonsa.units = "degrees_east"

    latsa = ncout.createVariable('lats','f4',('yf',))
    latsa.long_name = "latitude"
    latsa.units = "degrees_north"

    spline_info_out = ncout.createVariable('spline_info',\
        'f4',('xspd','x2','yf','xf',),
        zlib=True,least_significant_digit=6)
    spline_info_out.units = "n/a"
    spline_info_out.long_name = \
        "Information for computing precipitation from"+\
        "spline (or Gamma CDF for dry points)"
    spline_info_out.missing_value = \
        np.array(-9999.99,dtype=np.float32)
    
    indices_to_query_out = ncout.createVariable('nonzero_indices_of_knots',\
        'i4',('x9','yf','xf',),
        zlib=True,least_significant_digit=6)
    indices_to_query_out.units = "n/a"
    indices_to_query_out.long_name = \
        "non-zero samples, indices of knots"
    indices_to_query_out.missing_value = \
        np.array(-99,dtype=np.int32)    

    fzero_out = ncout.createVariable('fzero','f4',('yf','xf',),
        zlib=True,least_significant_digit=6)
    fzero_out.units = "n/a"
    fzero_out.long_name = "fraction_zero"
    fzero_out.missing_value = np.array(-9999.99,dtype=np.float32)

    usegamma_out = ncout.createVariable('usegamma',\
        'i4',('yf','xf',), zlib=True)
    usegamma_out.units = "n/a"
    usegamma_out.long_name = "1 if fit CDF via Gamma distribution, 0 if not"
    usegamma_out.missing_value = np.array(-99,dtype=np.int32)

    # ---- metadata

    ncout.title = "NDFD domain spline inverse coefficients / Gamma parameters "
    ncout.history = "from CDF fitting code by Tom Hamill, PSL"
    ncout.institution =  "psl.noaa.gov"
    ncout.platform = "n/a"
    ncout.references = "n/a"
    
    # ---- copy the outputs to netCDF structures.

    xvf[:] = range(nx_gefsv12)
    yvf[:] = range(ny_gefsv12)
    xspdf[:] = range(17)
    x2f[:] = range(2)
    x9f[:] = range(9)
    lonsa[:] = lons_1d[:]
    latsa[:] = lats_1d[:]
    spline_info_out[:] = spline_info_fcst[:,:,:,:]
    fzero_out[:] = fzero[:,:]
    usegamma_out[:] = usegamma_fcst[:,:]
    indices_to_query_out[:] = indices_to_query_fcst[:,:,:] 

    ncout.close()




                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              ._CDF_spline_fitting_forecast_precip_mean.py                                                        000775  000765  000024  00000000476 14016263126 021506  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2       >                                      ATTR      >     &                      com.apple.lastuseddate#PS      (     com.macromates.bookmarks   3     com.macromates.selectionRange      9     com.macromates.visibleIndex  `    n     ( '486:5' )231:4811267                                                                                                                                                                                                  CDF_spline_fitting_forecast_precip_mean.py                                                          000775  000765  000024  00000046410 14016263126 021267  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         def CDF_spline_fitting_forecast_precip_mean(cmonth, clead, cdomain):

    """
    CDF_spline_fitting_forecast_precip_mean.py cmonth clead cdomain

    Fit spline to CDF and save info using cumulative hazard function.

    Designed by Tom Hamill, NOAA, with help from Michael Scheuerer, Dec 2020

    """

    import os, sys
    from datetime import datetime
    import numpy as np
    import _pickle as cPickle
    from netCDF4 import Dataset
    import _pickle as cPickle
    import scipy.stats as stats
    from scipy.interpolate import LSQUnivariateSpline, splrep, splev
    import matplotlib.pyplot as plt
    from matplotlib import rcParams
    from mpl_toolkits.basemap import Basemap, interp
    from mpl_toolkits.axes_grid1 import make_axes_locatable

    # =====================================================================

    def set_domain_boundaries(cdomain):
    
        """ used grib file of 2.5-km blend output grid to determine bounding 
            lat and lon, and from that, the domain bounding indices for the 
            0.25 GEFSv12 reforecast data that will encompass the domain.    
        """
        if cdomain == 'conus': 
            jmin = 93
            jmax = 246
            imin = 368
            imax = 686
        elif cdomain == 'pr':
            jmin = 243
            jmax = 256
            imin = 649
            imax = 667   
        elif cdomain == 'ak':
            jmin = 19
            jmax = 161
            imin = 201
            imax = 967
        else:
            print ('invalid domain.  Exiting.')     
            sys.exit()    
 
        return jmin, jmax, imin, imax

    # =====================================================================

    def get_surrounding_months(cmonth):

        if cmonth == 'Jan':
            cmonth_early = 'Dec'
            cmonth_late = 'Feb'
        elif cmonth == 'Feb':
            cmonth_early = 'Jan'
            cmonth_late = 'Mar'
        elif cmonth == 'Mar':
            cmonth_early = 'Feb'
            cmonth_late = 'Apr'
        elif cmonth == 'Apr':
            cmonth_early = 'Mar'
            cmonth_late = 'May'
        elif cmonth == 'May':
            cmonth_early = 'Apr'
            cmonth_late = 'Jun'
        elif cmonth == 'Jun':
            cmonth_early = 'May'
            cmonth_late = 'Jul'
        elif cmonth == 'Jul':
            cmonth_early = 'Jun'
            cmonth_late = 'Aug'
        elif cmonth == 'Aug':
            cmonth_early = 'Jul'
            cmonth_late = 'Sep'
        elif cmonth == 'Sep':
            cmonth_early = 'Aug'
            cmonth_late = 'Oct'
        elif cmonth == 'Oct':
            cmonth_early = 'Sep'
            cmonth_late = 'Nov'
        elif cmonth == 'Nov':
            cmonth_early = 'Oct'
            cmonth_late = 'Dec'
        elif cmonth == 'Dec':
            cmonth_early = 'Nov'
            cmonth_late = 'Jan'
        else:
            print ('invalid month')
            sys.exit()
    
        return cmonth_early, cmonth_late

    # =====================================================================

    def fraczero_possamps(nsamps, precip_ens):
    
        """
    
        from the vector input sample precip_ens, define the fraction of
        samples with zero precipitation.   For the positive samples, add
        a small random number to deal with the fact that the data was 
        discretized to ~0.1 mm, so that when later creating CDFs we don't 
        have empirical values with lots of tied amounts.  Also, sort the 
        nonzero amounts and return.
    
        """
        number_zeros = 0
    
        # data discretized, so add random component of this magnitude
        #precip_ens = np.where(precip_ens < 4.0, precip_ens* \
        #    np.random.uniform(low=-0.5,high=1.5,size=len(precip_ens)), precip_ens)
    
        precip_ens_nonzero = np.delete(precip_ens, \
            np.where(precip_ens <= 0.0))  # censor at 0.0 mm
        precip_ens_nonzero = precip_ens_nonzero + \
            np.random.uniform(low=-0.1,high=0.1,size=len(precip_ens_nonzero))
        precip_ens_nonzero = np.delete(precip_ens_nonzero, \
            np.where(precip_ens_nonzero <= 0.0))  # censor at 0.0 mm
        nz = len(precip_ens_nonzero)    
        
        #precip_ens_nonzero = precip_ens_nonzero + \
        #    np.random.uniform(low=-0.005,high=0.005,size=nz) 
        precip_ens_nonzero = np.sort(precip_ens_nonzero)  
        ntotal = len(precip_ens)
        nzero = ntotal - len(precip_ens_nonzero)
        fraction_zero = float(nzero) / float(ntotal)
        return fraction_zero, precip_ens_nonzero, nz

    # =====================================================================
    
    # ---- inputs from command line

    #cmonth = sys.argv[1] # 'Jan', 'Feb', etc.
    #clead = sys.argv[2] # 03, 06, 12, etc.
    #cdomain = sys.argv[3] # conus, etc.
    cmonth_list = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']
    cmonthnum = ['01','02','03','03','04','05','06','07','08','09','10','11','12']
    print ('cmonth = ', cmonth)
    print (cmonth_list)
    imonth_index = cmonth_list.index(cmonth)

    # ---- set parameters

    jmin, jmax, imin, imax = set_domain_boundaries(cdomain)
    nstride = 1
    cmonth_early, cmonth_late = get_surrounding_months(cmonth)
    pflag = False # for print statements
    master_directory = '/Volumes/NBM/'+cdomain+'_gefsv12/precip/netcdf/'
    master_directory_out = '/Volumes/NBM/'+cdomain+'_gefsv12/CDF_spline/'
    nmembers = 5

    # ---- read in the previously generated netCDF file with precipitation
    #      for this month and lead time.  All members, dates for this 
    #      month have been smushed into one leading index, dimension
    #      nsamps, since the date of the forecast within the month and 
    #      the member number is irrelevant for the distribution fitting.
   
    ncfile = master_directory + cmonth + '_apcp_sfc_h' + clead + '.nc'
    print (ncfile)
    nc = Dataset(ncfile)
    precip_middle = nc.variables['apcp_fcst'][:,jmin:jmax,imin:imax]
    nsamps_middle, ny_gefsv12, nx_gefsv12 = np.shape(precip_middle)
    lons_1d = nc.variables['lons_fcst'][imin:imax]
    lats_1d = nc.variables['lats_fcst'][jmin:jmax]
    nc.close()

    ncfile = master_directory + cmonth_early + '_apcp_sfc_h' + clead + '.nc'
    print (ncfile)
    nc = Dataset(ncfile)
    precip_early = nc.variables['apcp_fcst'][:,jmin:jmax,imin:imax]
    nsamps_early, ny_gefsv12, nx_gefsv12 = np.shape(precip_early)
    nc.close()

    ncfile = master_directory + cmonth_late + '_apcp_sfc_h' + clead + '.nc'
    print (ncfile)
    nc = Dataset(ncfile)
    precip_late = nc.variables['apcp_fcst'][:,jmin:jmax,imin:imax]
    nsamps_late, ny_gefsv12, nx_gefsv12 = np.shape(precip_late)
    nc.close()

    # total samples of all members. 
    nsamps = nsamps_middle + nsamps_early + nsamps_late
    nsamps_mean = nsamps//5
    
    
    # ---- the first dimension of precip arrays are cases x members.  
    #      average over all the members
    
    precip_middle_mean = np.zeros((nsamps_middle//5, ny_gefsv12, nx_gefsv12), dtype=np.float64)
    for i in range(0, nsamps_middle,5):
        precip_middle_mean[i//5,:,:] = np.mean(precip_middle[i:i+5,:,:], axis=0)
    precip_early_mean = np.zeros((nsamps_early//5, ny_gefsv12, nx_gefsv12), dtype=np.float64)
    for i in range(0, nsamps_early,5):
        precip_early_mean[i//5,:,:] = np.mean(precip_early[i:i+5,:,:], axis=0)    
    precip_late_mean = np.zeros((nsamps_late//5, ny_gefsv12, nx_gefsv12), dtype=np.float64)
    for i in range(0, nsamps_late,5):
        precip_late_mean[i//5,:,:] = np.mean(precip_late[i:i+5,:,:], axis=0)    

    # ---- more initialization of output storage arrays now that 
    #      we know the array dimensions

    fzero = np.zeros((ny_gefsv12, nx_gefsv12), dtype=np.float)
    Dnstat = 0.10*np.ones((ny_gefsv12, nx_gefsv12), dtype=np.float)
   
    # ---- loop over the grid points and estimate the Gamma distributions
    #      for each parameter.  First see if a single Gamma distribution
    #      is appropriate; if not, try a mixture of two.   If that still
    #      doesn't fit well, try a mixture of three.   

    now = datetime.now()
    begin_time = now.strftime("%H:%M:%S")
    plotit = False
 
    spline_info_fcst = np.zeros((17,2,ny_gefsv12,nx_gefsv12), dtype=np.float64) 
    usegamma_fcst = np.zeros((ny_gefsv12, nx_gefsv12), dtype=np.int32)
    #spline_info = np.zeros((ny_gefsv12,nx_gefsv12,2,9), dtype=np.float64) 
    indices_to_query_fcst = np.zeros((9,ny_gefsv12,nx_gefsv12), dtype=np.float16)
    empirical_precipvals_fcst = np.zeros((9,ny_gefsv12,nx_gefsv12), dtype=np.float16)
                   
    for jy in range(0,ny_gefsv12,nstride):
    #for jy in range(ny_gefsv12//2, ny_gefsv12//2+1):
        
        now = datetime.now()
        current_time = now.strftime("%H:%M:%S")
        if jy%20 == 0: print ('***** begin time, current time, jy, ny_gefsv12, lat = ',\
            begin_time, current_time, jy, ny_gefsv12, lats_1d[jy])        
        
        for ix in range(0,nx_gefsv12,nstride):
        #for ix in range (nx_gefsv12//2, nx_gefsv12//2+1):
            
            # ---- there is a grib round-off error that can give negative
            #      values slightly smaller than teeny precip.  to make sure 
            #      that we don't have either negative values or lots of the 
            #      same tiny values, subtractoff teeny_precip
        
            precip_ens_1d_middle = precip_middle_mean[:,jy,ix]
            precip_ens_1d_early = precip_early_mean[:,jy,ix]
            precip_ens_1d_late = precip_late_mean[:,jy,ix]
            precip_ens_1d = np.concatenate((precip_ens_1d_middle, \
                precip_ens_1d_early, precip_ens_1d_late))
        
            tp = np.min( [np.abs(np.min(precip_ens_1d)), 0.0] )
            teeny_precip = tp*np.ones(nsamps_mean)
            precip_ens_1d = precip_ens_1d - teeny_precip[:]

            fraction_zero, precip_ens_nonzero, nz = \
                fraczero_possamps(nsamps_mean, precip_ens_1d) # return sorted
            fzero[jy,ix] = fraction_zero 
        
            if nz > 40:

                # ---- spline fit with the focus on knots at higher quantiles

                query_these_indices = [ nz//10, nz//4, nz//2, (3*nz)//5, \
                    (4*nz)//5, (17*nz)//20, (9*nz)//10, (19*nz)//20, (39*nz)//40]
                indices_to_query_fcst[:,jy,ix] = query_these_indices[:]
                empirical_precipvals = precip_ens_nonzero[query_these_indices]
                empirical_precipvals_fcst[:,jy,ix] = empirical_precipvals[:]
                #print ('empirical_precipvals = ', empirical_precipvals)
        
                empirical_cdf = 1.0/(2.0*nz) + np.arange(nz)/nz      
                hazard_function_empirical = -np.log(1.0-empirical_cdf)
                spltemp = splrep(precip_ens_nonzero, hazard_function_empirical, \
                    xb=0., task=-1, t = empirical_precipvals)   
                spline_hazard = splev(precip_ens_nonzero, spltemp)
                spline_cdf = 1.0 - np.exp(-spline_hazard)       
                diff = np.abs(empirical_cdf - spline_cdf)
        
                # ---- save spline information to numpy array
        
                spline_info_fcst[:,0,jy,ix] = spltemp[0]
                spline_info_fcst[:,1,jy,ix] = spltemp[1]
                Dnstat[jy,ix] = np.max(diff)
        
            elif nz < 10:
            
                # --- over the whole training sample, there were not enough 
                #     nonzero precip values recorded.   Flag this as missing data.
            
                spline_info_fcst[:,0,jy,ix] = -99.99
                spline_info_fcst[:,1,jy,ix] = -99.99
                indices_to_query_fcst[:,jy,ix] = -99 
                usegamma_fcst[jy,ix] = 0
                Dnstat[jy,ix] = 0.0      

            else:
            
                # ---- too few samples to use splines; fit a single-parameter Gamma 
                #      using Thom (1958) estimator described in Wilks textbook,
                #      Statistical Methods in the Atmospheric Sciences.
            
                empirical_cdf = 1.0/(2.0*nz) + np.arange(nz)/nz

                pmean = np.mean(precip_ens_nonzero)
                lnxbar = np.log(pmean)
                if pflag == True: print ('pmean, lnxbar = ', pmean, lnxbar)
                meanlnxi = np.mean(np.log(precip_ens_nonzero))
                if pflag == True: print ('meanlnxi = ', meanlnxi)
                D = lnxbar - meanlnxi
                alpha_hat = (1.0 + np.sqrt(1.0+4.0*D/3.0)) / (4.0*D)
                beta_hat = pmean / alpha_hat
                if pflag == True: print('alpha_hat, beta_hat = ', alpha_hat, beta_hat)
                indices_to_query_fcst[:,jy,ix] = -1 # flag for using Gamma
                spline_info_fcst[:,0,jy,ix] = alpha_hat  # smoosh into the spline array
                spline_info_fcst[:,1,jy,ix] = beta_hat # smoosh into the spline array
                usegamma_fcst[jy,ix] = 1
            
                # --- evaluate Dn statistic, goodness of fit.
            
                y0 = precip_ens_nonzero / beta_hat
                fitted_CDF = stats.gamma.cdf(y0, alpha_hat)
                diff = np.abs(empirical_cdf - fitted_CDF)
                Dnstat[jy,ix] = np.max(diff)
        

            if plotit == True:
            
                # ---- plot the CDFs, forecast and analyzed, empirical and best fitted.

                f = plt.figure(figsize=(6.5,6.5))#
                ax = f.add_axes([.13,.14,.84,.75])
                title = 'Spline fit of hazard function, jy, ix = '+str(jy)+','+str(ix)
                ax.set_title(title,fontsize=13)
                ax.plot(precip_ens_nonzero, spline_hazard,color='Blue',\
                    lw=2,label='Cubic spline')
                ax.plot(precip_ens_nonzero, hazard_function_empirical,\
                    color='Red',lw=2,label='Empirical')
                plt.ylabel('Hazard function',fontsize=11)
                ax.legend(loc=0)
                ax.set_ylim(-0.2,6.0)
                plt.grid(True,lw=0.25,color='LightGray')
                ax.set_xlim(0,40)
                #ax.set_xlim(0,5)
                ax.set_xlabel('6-hourly total precipitation (mm)',fontsize=11)
                figname = 'hazard_function_precip_example_spline'+str(jy)+'.png'
                plt.savefig(figname,dpi=400)
                print ('Plot done', figname)
                plt.close()

            
                f = plt.figure(figsize=(6.5,6.5))#
                ax = f.add_axes([.13,.14,.84,.75])
                title = 'Spline fit of CDF via hazard function, jy, ix = '+\
                    str(jy)+','+str(ix)
                ax.set_title(title,fontsize=13)
                ax.plot(precip_ens_nonzero, spline_cdf,color='Blue',\
                    lw=2,label='Cubic spline')
                ax.plot(precip_ens_nonzero, empirical_cdf,color='Red',\
                    lw=2,label='Empirical')
                plt.ylabel('Non-exceedance probability',fontsize=11)
                ax.legend(loc=0)
                ax.set_ylim(0.,1)
                plt.grid(True,lw=0.25,color='LightGray')
                ax.set_xlim(0,40)
                #ax.set_xlim(0,5)
                ax.set_xlabel('6-hourly total precipitation (mm)',fontsize=11)
                figname = 'CDF_precip_example_spline'+str(jy)+'.png'
                plt.savefig(figname,dpi=400)
                print ('Plot done', figname)
                plt.close() 
                #sys.exit()

           
    # --- save to cPickle files

    #outfile = master_directory + cmonth+'_'+cdomain+\
    #    '_GEFSv12_spline_info_h' + clead + '.cPick'
    #print ('writing to ', outfile)
    #ouf = open(outfile, 'wb')
    #cPickle.dump(spline_info, ouf)
    #cPickle.dump(fzero, ouf)
    #cPickle.dump(indices_to_query, ouf)
    #cPickle.dump(empirical_precipvals_array, ouf)
    #ouf.close()

    print ('max Dnstat = ', np.max(Dnstat))
    outfile = master_directory_out + cmonthnum[imonth_index]+'_'+cdomain+\
        '_GEFSv12_mean_Dnstat_h' + clead + '.cPick'
    print ('writing to ', outfile)
    ouf = open(outfile, 'wb')
    cPickle.dump(Dnstat, ouf)
    cPickle.dump(lons_1d, ouf)
    cPickle.dump(lats_1d, ouf)
    ouf.close()

    # ---- save to netCDF file

    outfile = master_directory_out + cmonthnum[imonth_index]+'_'+cdomain+\
        '_GEFSv12_mean_spline_info_h' + clead + '.nc' 
    print ('writing to ',outfile)
    ncout = Dataset(outfile,'w',format='NETCDF4_CLASSIC')

    xf = ncout.createDimension('xf',nx_gefsv12)
    xvf = ncout.createVariable('xf','i4',('xf',))
    xvf.long_name = "eastward grid point number on NDFD grid"
    xvf.units = "n/a"

    yf = ncout.createDimension('yf',ny_gefsv12)
    yvf = ncout.createVariable('yf','i4',('yf',))
    yvf.long_name = "northward grid point number on NDFD grid"
    yvf.units = "n/a"

    xspd = ncout.createDimension('xspd',17)
    xspdf = ncout.createVariable('xspd','i4',('xspd',))
    xspdf.long_name = "index for spline dimension"
    xspdf.units = "n/a"

    x2 = ncout.createDimension('x2',2)
    x2f = ncout.createVariable('x2','i4',('x2',))
    x2f.long_name = "2nd index for spline dimension"
    x2f.units = "n/a"

    x9 = ncout.createDimension('x9',9)
    x9f = ncout.createVariable('x9','i4',('x9',))
    x9f.long_name = "first dimension of nonzero_indices_of_knots"
    x9f.units = "n/a"

    lonsa = ncout.createVariable('lons','f4',('xf',))
    lonsa.long_name = "longitude"
    lonsa.units = "degrees_east"

    latsa = ncout.createVariable('lats','f4',('yf',))
    latsa.long_name = "latitude"
    latsa.units = "degrees_north"

    spline_info_out = ncout.createVariable('spline_info',\
        'f4',('xspd','x2','yf','xf',),
        zlib=True,least_significant_digit=6)
    spline_info_out.units = "n/a"
    spline_info_out.long_name = \
        "Information for computing precipitation from"+\
        "spline (or Gamma CDF for dry points)"
    spline_info_out.missing_value = \
        np.array(-9999.99,dtype=np.float32)
    
    indices_to_query_out = ncout.createVariable('nonzero_indices_of_knots',\
        'i4',('x9','yf','xf',),
        zlib=True,least_significant_digit=6)
    indices_to_query_out.units = "n/a"
    indices_to_query_out.long_name = \
        "non-zero samples, indices of knots"
    indices_to_query_out.missing_value = \
        np.array(-99,dtype=np.int32)    

    fzero_out = ncout.createVariable('fzero','f4',('yf','xf',),
        zlib=True,least_significant_digit=6)
    fzero_out.units = "n/a"
    fzero_out.long_name = "fraction_zero"
    fzero_out.missing_value = np.array(-9999.99,dtype=np.float32)

    usegamma_out = ncout.createVariable('usegamma',\
        'i4',('yf','xf',), zlib=True)
    usegamma_out.units = "n/a"
    usegamma_out.long_name = "1 if fit CDF via Gamma distribution, 0 if not"
    usegamma_out.missing_value = np.array(-99,dtype=np.int32)

    # ---- metadata

    ncout.title = "NDFD domain spline inverse coefficients / Gamma parameters for 5-mem mean"
    ncout.history = "from CDF fitting code by Tom Hamill, PSL"
    ncout.institution =  "psl.noaa.gov"
    ncout.platform = "n/a"
    ncout.references = "n/a"
    
    # ---- copy the outputs to netCDF structures.

    xvf[:] = range(nx_gefsv12)
    yvf[:] = range(ny_gefsv12)
    xspdf[:] = range(17)
    x2f[:] = range(2)
    x9f[:] = range(9)
    lonsa[:] = lons_1d[:]
    latsa[:] = lats_1d[:]
    spline_info_out[:] = spline_info_fcst[:,:,:,:]
    fzero_out[:] = fzero[:,:]
    usegamma_out[:] = usegamma_fcst[:,:]
    indices_to_query_out[:] = indices_to_query_fcst[:,:,:] 

    ncout.close()




                                                                                                                                                                                                                                                        ._CDF_spline_fitting_forecast_precip_v2.py                                                          000775  000765  000024  00000000474 14024752621 021115  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2  
     <                                      ATTR      <     $                      com.apple.lastuseddate#PS      (     com.macromates.bookmarks   3     com.macromates.selectionRange      8     com.macromates.visibleIndex  `         ( '409:5' )260:98967                                                                                                                                                                                                    CDF_spline_fitting_forecast_precip_v2.py                                                            000775  000765  000024  00000037545 14024752621 020711  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         def CDF_spline_fitting_forecast_precip_v2(cmonth, clead, cdomain):

    """
    CDF_spline_fitting_forecast_precip_v2.py cmonth clead cdomain
    
    where cmonth = 'Jan', 'Feb' etc
    clead = 

    fit cubic spline to cumulative hazard function of precipitation and save
    for spline parameters for fitting of precipitation CDFs.

    Designed by Tom Hamill, NOAA, with help from Michael Scheuerer, Dec 2020

    """

    import os, sys
    from datetime import datetime
    import numpy as np
    import _pickle as cPickle
    from netCDF4 import Dataset
    import _pickle as cPickle
    import scipy.stats as stats
    from scipy.interpolate import LSQUnivariateSpline, splrep, splev
    import matplotlib.pyplot as plt
    from matplotlib import rcParams
    from mpl_toolkits.basemap import Basemap, interp
    from mpl_toolkits.axes_grid1 import make_axes_locatable

    # =====================================================================

    def set_domain_boundaries(cdomain):
    
        """ used grib file of 2.5-km blend output grid to determine bounding 
            lat and lon, and from that, the domain bounding indices for the 
            0.25 GEFSv12 reforecast data that will encompass the domain.    
        """
        if cdomain == 'conus': 
            jmin = 93
            jmax = 246
            imin = 368
            imax = 686
        elif cdomain == 'pr':
            jmin = 243
            jmax = 256
            imin = 649
            
            imax = 667   
        elif cdomain == 'ak':
            jmin = 19
            jmax = 161
            imin = 201
            imax = 967
        else:
            print ('invalid domain.  Exiting.')     
            sys.exit()    
 
        return jmin, jmax, imin, imax

    # =====================================================================

    def get_surrounding_months(cmonth):

        if cmonth == 'Jan':
            cmonth_early = 'Dec'
            cmonth_late = 'Feb'
        elif cmonth == 'Feb':
            cmonth_early = 'Jan'
            cmonth_late = 'Mar'
        elif cmonth == 'Mar':
            cmonth_early = 'Feb'
            cmonth_late = 'Apr'
        elif cmonth == 'Apr':
            cmonth_early = 'Mar'
            cmonth_late = 'May'
        elif cmonth == 'May':
            cmonth_early = 'Apr'
            cmonth_late = 'Jun'
        elif cmonth == 'Jun':
            cmonth_early = 'May'
            cmonth_late = 'Jul'
        elif cmonth == 'Jul':
            cmonth_early = 'Jun'
            cmonth_late = 'Aug'
        elif cmonth == 'Aug':
            cmonth_early = 'Jul'
            cmonth_late = 'Sep'
        elif cmonth == 'Sep':
            cmonth_early = 'Aug'
            cmonth_late = 'Oct'
        elif cmonth == 'Oct':
            cmonth_early = 'Sep'
            cmonth_late = 'Nov'
        elif cmonth == 'Nov':
            cmonth_early = 'Oct'
            cmonth_late = 'Dec'
        elif cmonth == 'Dec':
            cmonth_early = 'Nov'
            cmonth_late = 'Jan'
        else:
            print ('invalid month')
            sys.exit()
    
        return cmonth_early, cmonth_late

    # =====================================================================

    def fraczero_possamps(nsamps, precip_ens):
    
        """
    
        from the vector input sample precip_ens, define the fraction of
        samples with zero precipitation.   For the positive samples, add
        a small random number to deal with the fact that the data was 
        discretized to ~0.1 mm, so that when later creating CDFs we don't 
        have empirical values with lots of tied amounts.  Also, sort the 
        nonzero amounts and return.
    
        """
        number_zeros = 0
    
        # data discretized, so add random component of this magnitude
        #precip_ens = np.where(precip_ens < 4.0, precip_ens* \
        #    np.random.uniform(low=-0.5,high=1.5,size=len(precip_ens)), precip_ens)
    
        precip_ens_nonzero = np.delete(precip_ens, \
            np.where(precip_ens <= 0.0))  # censor at 0.0 mm
        precip_ens_nonzero = precip_ens_nonzero + \
            np.random.uniform(low=-0.1,high=0.1,size=len(precip_ens_nonzero))
        precip_ens_nonzero = np.delete(precip_ens_nonzero, \
            np.where(precip_ens_nonzero <= 0.0))  # censor at 0.0 mm
        nz = len(precip_ens_nonzero)    
        
        #precip_ens_nonzero = precip_ens_nonzero + \
        #    np.random.uniform(low=-0.005,high=0.005,size=nz) 
        precip_ens_nonzero = np.sort(precip_ens_nonzero)  
        ntotal = len(precip_ens)
        nzero = ntotal - len(precip_ens_nonzero)
        fraction_zero = float(nzero) / float(ntotal)
        return fraction_zero, precip_ens_nonzero, nz

    # =====================================================================
    
    # ---- set parameters
        
    cmonth_list = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']
    cmonthnum = ['01','02','03','03','04','05','06','07','08','09','10','11','12']
    print ('cmonth = ', cmonth)
    print (cmonth_list)
    imonth_index = cmonth_list.index(cmonth)
    jmin, jmax, imin, imax = set_domain_boundaries(cdomain)
    nstride = 1
    cmonth_early, cmonth_late = get_surrounding_months(cmonth)
    pflag = False # for print statements
    master_directory = '/Volumes/NBM/'+cdomain+'_gefsv12/precip/netcdf/'
    master_directory_out = '/Volumes/NBM/'+cdomain+'_gefsv12/CDF_spline/'
    nmembers = 5

    # ---- read in the previously generated netCDF file with precipitation
    #      for this month and lead time.  All members, dates for this 
    #      month have been smushed into one leading index, dimension
    #      nsamps, since the date of the forecast within the month and 
    #      the member number is irrelevant for the distribution fitting.
   
    ncfile = master_directory + cmonth + '_apcp_sfc_h' + clead + '.nc'
    print (ncfile)
    nc = Dataset(ncfile)
    precip_middle = nc.variables['apcp_fcst'][:,jmin:jmax,imin:imax]
    nsamps_middle, ny_gefsv12, nx_gefsv12 = np.shape(precip_middle)
    lons_1d = nc.variables['lons_fcst'][imin:imax]
    lats_1d = nc.variables['lats_fcst'][jmin:jmax]
    nc.close()

    ncfile = master_directory + cmonth_early + '_apcp_sfc_h' + clead + '.nc'
    print (ncfile)
    nc = Dataset(ncfile)
    precip_early = nc.variables['apcp_fcst'][:,jmin:jmax,imin:imax]
    nsamps_early, ny_gefsv12, nx_gefsv12 = np.shape(precip_early)
    nc.close()

    ncfile = master_directory + cmonth_late + '_apcp_sfc_h' + clead + '.nc'
    print (ncfile)
    nc = Dataset(ncfile)
    precip_late = nc.variables['apcp_fcst'][:,jmin:jmax,imin:imax]
    nsamps_late, ny_gefsv12, nx_gefsv12 = np.shape(precip_late)
    nc.close()

    nsamps = nsamps_middle + nsamps_early + nsamps_late

    # ---- more initialization of output storage arrays now that 
    #      we know the array dimensions

    fzero = np.zeros((ny_gefsv12, nx_gefsv12), dtype=np.float)
    Dnstat = 0.10*np.ones((ny_gefsv12, nx_gefsv12), dtype=np.float)
   
    # ---- loop over the grid points and estimate the Gamma distributions
    #      for each parameter.  First see if a single Gamma distribution
    #      is appropriate; if not, try a mixture of two.   If that still
    #      doesn't fit well, try a mixture of three.   

    now = datetime.now()
    begin_time = now.strftime("%H:%M:%S")
    plotit = False
 
    spline_info_fcst = np.zeros((ny_gefsv12,nx_gefsv12,2,17), dtype=np.float64) 
    usegamma_fcst = np.zeros((ny_gefsv12, nx_gefsv12), dtype=np.int32)
    empirical_precipvals_fcst = np.zeros((9,ny_gefsv12,nx_gefsv12), dtype=np.float16)
    q98 = np.zeros((ny_gefsv12,nx_gefsv12), dtype=np.float32)
                   
    for jy in range(0,ny_gefsv12,nstride):
    #for jy in range(ny_gefsv12//2, ny_gefsv12//2+1):
        
        now = datetime.now()
        current_time = now.strftime("%H:%M:%S")
        print ('***** begin time, current time, jy, ny_gefsv12, lat = ',\
            begin_time, current_time, jy, ny_gefsv12, lats_1d[jy])        
        
        for ix in range(0,nx_gefsv12,nstride):
        #for ix in range (nx_gefsv12//2, nx_gefsv12//2+1):
            
            # ---- there is a grib round-off error that can give negative
            #      values slightly smaller than teeny precip.  to make sure 
            #      that we don't have either negative values or lots of the 
            #      same tiny values, so subtract teeny_precip
        
            precip_ens_1d_middle = precip_middle[:,jy,ix]
            precip_ens_1d_early = precip_early[:,jy,ix]
            precip_ens_1d_late = precip_late[:,jy,ix]
            precip_ens_1d = np.concatenate((precip_ens_1d_middle, \
                precip_ens_1d_early, precip_ens_1d_late))
        
            tp = np.min( [np.abs(np.min(precip_ens_1d)), 0.0] )
            teeny_precip = tp*np.ones(nsamps)
            precip_ens_1d = precip_ens_1d - teeny_precip[:]

            fraction_zero, precip_ens_nonzero, nz = \
                fraczero_possamps(nsamps, precip_ens_1d) # return sorted
            fzero[jy,ix] = fraction_zero 
        
            if nz > 50:

                # ---- spline fit with the focus on knots at higher quantiles

                usegamma_fcst[jy,ix] = 0
                query_these_indices = [ nz//10, nz//4, nz//2, (3*nz)//5, \
                    (4*nz)//5, (17*nz)//20, (9*nz)//10, (24*nz)//25, (49*nz)//50]
                empirical_precipvals = precip_ens_nonzero[query_these_indices]
                empirical_precipvals_fcst[:,jy,ix] = empirical_precipvals[:]
                #print ('empirical_precipvals = ', empirical_precipvals)
                q98[jy,ix] = empirical_precipvals[-1]
        
                empirical_cdf = 1.0/(2.0*nz) + np.arange(nz)/nz      
                hazard_function_empirical = -np.log(1.0 - empirical_cdf)
                spltemp = splrep(precip_ens_nonzero, hazard_function_empirical, \
                    xb=0., task=-1, t = empirical_precipvals)   
                spline_hazard = splev(precip_ens_nonzero, spltemp)
                spline_cdf = 1.0 - np.exp(-spline_hazard)       
                diff = np.abs(empirical_cdf - spline_cdf)
        
                # ---- save spline information to numpy array
        
                spline_info_fcst[jy,ix,0,:] = spltemp[0]
                spline_info_fcst[jy,ix,1,:] = spltemp[1]
                Dnstat[jy,ix] = np.max(diff)
        
            elif nz < 10:
            
                # --- over the whole training sample, there were not enough 
                #     nonzero precip values recorded.   Flag this as missing data.
            
                usegamma_fcst[jy,ix] = -1  # flag as insufficient data to do any qmapping
                spline_info_fcst[jy,ix,0,:] = -99.99
                spline_info_fcst[jy,ix,1,:] = -99.99
                Dnstat[jy,ix] = 0.0
                q98[jy,ix] = -99.99

            else:
            
                # ---- too few samples to use splines; fit a single-parameter Gamma 
                #      using Thom (1958) estimator described in Wilks textbook,
                #      Statistical Methods in the Atmospheric Sciences.
                
                usegamma_fcst[jy,ix] = 1  # yes, estimate with Gamma distribution
                empirical_cdf = 1.0/(2.0*nz) + np.arange(nz)/nz
                pmean = np.mean(precip_ens_nonzero)
                lnxbar = np.log(pmean)
                if pflag == True: print ('pmean, lnxbar = ', pmean, lnxbar)
                meanlnxi = np.mean(np.log(precip_ens_nonzero))
                if pflag == True: print ('meanlnxi = ', meanlnxi)
                D = lnxbar - meanlnxi
                alpha_hat = (1.0 + np.sqrt(1.0+4.0*D/3.0)) / (4.0*D)
                beta_hat = pmean / alpha_hat
                if pflag == True: print('alpha_hat, beta_hat = ', alpha_hat, beta_hat)
                spline_info_fcst[jy,ix,0,:] = alpha_hat  # smoosh into the spline array
                spline_info_fcst[jy,ix,1,:] = beta_hat # smoosh into the spline array
                q98[jy,ix] = precip_ens_nonzero[-1] 
            
                # --- evaluate Dn statistic, goodness of fit.
            
                y0 = precip_ens_nonzero / beta_hat
                fitted_CDF = stats.gamma.cdf(y0, alpha_hat)
                diff = np.abs(empirical_cdf - fitted_CDF)
                Dnstat[jy,ix] = np.max(diff)
           
    # --- save Dnstat goodness of fit to cPickled file

    print ('max Dnstat = ', np.max(Dnstat))
    outfile = master_directory_out + cmonthnum[imonth_index]+'_'+cdomain+\
        '_GEFSv12_Dnstat_h' + clead + '.cPick'
    print ('writing to ', outfile)
    ouf = open(outfile, 'wb')
    cPickle.dump(Dnstat, ouf)
    cPickle.dump(lons_1d, ouf)
    cPickle.dump(lats_1d, ouf)
    ouf.close()

    # ---- save spline information to netCDF file

    #outfile = master_directory_out + cmonthnum[imonth_index]+'_'+cdomain+\
    #    '_GEFSv12_spline_info_h' + clead + '.nc' 
        
    outfile = master_directory_out + cmonth_list[imonth_index]+'_'+cdomain+\
        '_GEFSv12_spline_info_h' + clead + '.nc'
    print ('writing to ',outfile)
    ncout = Dataset(outfile,'w',format='NETCDF4_CLASSIC')

    xf = ncout.createDimension('xf',nx_gefsv12)
    xvf = ncout.createVariable('xf','i4',('xf',))
    xvf.long_name = "eastward grid point number on NDFD grid"
    xvf.units = "n/a"

    yf = ncout.createDimension('yf',ny_gefsv12)
    yvf = ncout.createVariable('yf','i4',('yf',))
    yvf.long_name = "northward grid point number on NDFD grid"
    yvf.units = "n/a"

    xspd = ncout.createDimension('xspd',17)
    xspdf = ncout.createVariable('xspd','i4',('xspd',))
    xspdf.long_name = "index for spline dimension"
    xspdf.units = "n/a"

    x2 = ncout.createDimension('x2',2)
    x2f = ncout.createVariable('x2','i4',('x2',))
    x2f.long_name = "2nd index for spline dimension"
    x2f.units = "n/a"

    x9 = ncout.createDimension('x9',9)
    x9f = ncout.createVariable('x9','i4',('x9',))
    x9f.long_name = "first dimension of nonzero_indices_of_knots"
    x9f.units = "n/a"

    lonsa = ncout.createVariable('lons','f4',('xf',))
    lonsa.long_name = "longitude"
    lonsa.units = "degrees_east"

    latsa = ncout.createVariable('lats','f4',('yf',))
    latsa.long_name = "latitude"
    latsa.units = "degrees_north"

    spline_info_out = ncout.createVariable('spline_info',\
        'f4',('yf','xf','x2','xspd'),
        zlib=True,least_significant_digit=6)
    spline_info_out.units = "n/a"
    spline_info_out.long_name = \
        "Information for computing precipitation from"+\
        "spline (or Gamma CDF for dry points)"
    spline_info_out.missing_value = \
        np.array(-9999.99,dtype=np.float32)   

    fzero_out = ncout.createVariable('fzero','f4',('yf','xf',),
        zlib=True,least_significant_digit=6)
    fzero_out.units = "n/a"
    fzero_out.long_name = "fraction_zero"
    fzero_out.missing_value = np.array(-9999.99,dtype=np.float32)

    usegamma_out = ncout.createVariable('usegamma',\
        'i4',('yf','xf',), zlib=True)
    usegamma_out.units = "n/a"
    usegamma_out.long_name = "1 if fit CDF via Gamma distribution, 0 if not"
    usegamma_out.missing_value = np.array(-99,dtype=np.int32)
    
    quantile_98 = ncout.createVariable('quantile_98',\
        'f8',('yf','xf',), zlib=True)
    quantile_98.units = "mm"
    quantile_98.long_name = "98th percentile of precipitation CDF"
    quantile_98.missing_value = np.array(-99.99,dtype=np.float32)

    # ---- metadata

    ncout.title = "NDFD domain spline inverse coefficients / Gamma parameters "
    ncout.history = "from CDF fitting code by Tom Hamill, PSL"
    ncout.institution =  "psl.noaa.gov"
    ncout.platform = "n/a"
    ncout.references = "n/a"
    
    # ---- copy the outputs to netCDF structures.

    xvf[:] = range(nx_gefsv12)
    yvf[:] = range(ny_gefsv12)
    xspdf[:] = range(17)
    x2f[:] = range(2)
    x9f[:] = range(9)
    lonsa[:] = lons_1d[:]
    latsa[:] = lats_1d[:]
    spline_info_out[:] = spline_info_fcst[:,:,:,:]
    fzero_out[:] = fzero[:,:]
    usegamma_out[:] = usegamma_fcst[:,:]
    quantile_98[:] = q98[:,:]

    ncout.close()




                                                                                                                                                           ._CDF_spline_fitting_forecast_precip_v3.py                                                          000664  000765  000024  00000000476 14070405546 021117  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2       >                                      ATTR      >     &                      com.apple.lastuseddate#PS      (     com.macromates.bookmarks   3     com.macromates.selectionRange      9     com.macromates.visibleIndex  `    4j     ( '413:5' )253:6410163                                                                                                                                                                                                  CDF_spline_fitting_forecast_precip_v3.py                                                            000664  000765  000024  00000040176 14070405546 020703  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         def CDF_spline_fitting_forecast_precip_v3(cmonth, clead, cdomain):

    """
    CDF_spline_fitting_forecast_precip_v2.py cmonth clead cdomain
    
    where cmonth = 'Jan', 'Feb' etc
    clead = 

    fit cubic spline to cumulative hazard function of precipitation and save
    for spline parameters for fitting of precipitation CDFs.

    Designed by Tom Hamill, NOAA, with help from Michael Scheuerer, Dec 2020

    """

    import os, sys
    from datetime import datetime
    import numpy as np
    import _pickle as cPickle
    from netCDF4 import Dataset
    import _pickle as cPickle
    import scipy.stats as stats
    from scipy.interpolate import LSQUnivariateSpline, splrep, splev
    import matplotlib.pyplot as plt
    from matplotlib import rcParams
    from mpl_toolkits.basemap import Basemap, interp
    from mpl_toolkits.axes_grid1 import make_axes_locatable

    # =====================================================================

    def set_domain_boundaries(cdomain):
    
        """ used grib file of 2.5-km blend output grid to determine bounding 
            lat and lon, and from that, the domain bounding indices for the 
            0.25 GEFSv12 reforecast data that will encompass the domain.    
        """
        if cdomain == 'conus': 
            jmin = 93
            jmax = 246
            imin = 368
            imax = 686
        elif cdomain == 'pr':
            jmin = 243
            jmax = 256
            imin = 649
            
            imax = 667   
        elif cdomain == 'ak':
            jmin = 19
            jmax = 161
            imin = 201
            imax = 967
        else:
            print ('invalid domain.  Exiting.')     
            sys.exit()    
 
        return jmin, jmax, imin, imax

    # =====================================================================

    def get_surrounding_months(cmonth):

        if cmonth == 'Jan':
            cmonth_early = 'Dec'
            cmonth_late = 'Feb'
        elif cmonth == 'Feb':
            cmonth_early = 'Jan'
            cmonth_late = 'Mar'
        elif cmonth == 'Mar':
            cmonth_early = 'Feb'
            cmonth_late = 'Apr'
        elif cmonth == 'Apr':
            cmonth_early = 'Mar'
            cmonth_late = 'May'
        elif cmonth == 'May':
            cmonth_early = 'Apr'
            cmonth_late = 'Jun'
        elif cmonth == 'Jun':
            cmonth_early = 'May'
            cmonth_late = 'Jul'
        elif cmonth == 'Jul':
            cmonth_early = 'Jun'
            cmonth_late = 'Aug'
        elif cmonth == 'Aug':
            cmonth_early = 'Jul'
            cmonth_late = 'Sep'
        elif cmonth == 'Sep':
            cmonth_early = 'Aug'
            cmonth_late = 'Oct'
        elif cmonth == 'Oct':
            cmonth_early = 'Sep'
            cmonth_late = 'Nov'
        elif cmonth == 'Nov':
            cmonth_early = 'Oct'
            cmonth_late = 'Dec'
        elif cmonth == 'Dec':
            cmonth_early = 'Nov'
            cmonth_late = 'Jan'
        else:
            print ('invalid month')
            sys.exit()
    
        return cmonth_early, cmonth_late

    # =====================================================================

    def fraczero_possamps(nsamps, precip_ens):
    
        """
    
        from the vector input sample precip_ens, define the fraction of
        samples with zero precipitation.   For the positive samples, add
        a small random number to deal with the fact that the data was 
        discretized to ~0.1 mm, so that when later creating CDFs we don't 
        have empirical values with lots of tied amounts.  Also, sort the 
        nonzero amounts and return.
    
        """
        number_zeros = 0
    
        # data discretized, so add random component of this magnitude
        #precip_ens = np.where(precip_ens < 4.0, precip_ens* \
        #    np.random.uniform(low=-0.5,high=1.5,size=len(precip_ens)), precip_ens)
    
        precip_ens_nonzero = np.delete(precip_ens, \
            np.where(precip_ens <= 0.0))  # censor at 0.0 mm
        precip_ens_nonzero = precip_ens_nonzero + \
            np.random.uniform(low=-0.1,high=0.1,size=len(precip_ens_nonzero))
        precip_ens_nonzero = np.delete(precip_ens_nonzero, \
            np.where(precip_ens_nonzero <= 0.0))  # censor at 0.0 mm
        nz = len(precip_ens_nonzero)    
        
        #precip_ens_nonzero = precip_ens_nonzero + \
        #    np.random.uniform(low=-0.005,high=0.005,size=nz) 
        precip_ens_nonzero = np.sort(precip_ens_nonzero)  
        ntotal = len(precip_ens)
        nzero = ntotal - len(precip_ens_nonzero)
        fraction_zero = float(nzero) / float(ntotal)
        return fraction_zero, precip_ens_nonzero, nz

    # =====================================================================
    
    # ---- set parameters
        
    cmonth_list = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']
    cmonthnum = ['01','02','03','03','04','05','06','07','08','09','10','11','12']
    print ('cmonth = ', cmonth)
    print (cmonth_list)
    imonth_index = cmonth_list.index(cmonth)
    jmin, jmax, imin, imax = set_domain_boundaries(cdomain)
    nstride = 1
    cmonth_early, cmonth_late = get_surrounding_months(cmonth)
    pflag = False # for print statements
    master_directory = '/Volumes/NBM/'+cdomain+'_gefsv12/precip/netcdf/'
    master_directory_out = '/Volumes/NBM/'+cdomain+'_gefsv12/CDF_spline/'
    nmembers = 5

    # ---- read in the previously generated netCDF file with precipitation
    #      for this month and lead time.  All members, dates for this 
    #      month have been smushed into one leading index, dimension
    #      nsamps, since the date of the forecast within the month and 
    #      the member number is irrelevant for the distribution fitting.
   
    ncfile = master_directory + cmonth + '_apcp_sfc_h' + clead + '.nc'
    print (ncfile)
    nc = Dataset(ncfile)
    precip_middle = nc.variables['apcp_fcst'][:,jmin:jmax,imin:imax]
    nsamps_middle, ny_gefsv12, nx_gefsv12 = np.shape(precip_middle)
    lons_1d = nc.variables['lons_fcst'][imin:imax]
    lats_1d = nc.variables['lats_fcst'][jmin:jmax]
    nc.close()

    ncfile = master_directory + cmonth_early + '_apcp_sfc_h' + clead + '.nc'
    print (ncfile)
    nc = Dataset(ncfile)
    precip_early = nc.variables['apcp_fcst'][:,jmin:jmax,imin:imax]
    nsamps_early, ny_gefsv12, nx_gefsv12 = np.shape(precip_early)
    nc.close()

    ncfile = master_directory + cmonth_late + '_apcp_sfc_h' + clead + '.nc'
    print (ncfile)
    nc = Dataset(ncfile)
    precip_late = nc.variables['apcp_fcst'][:,jmin:jmax,imin:imax]
    nsamps_late, ny_gefsv12, nx_gefsv12 = np.shape(precip_late)
    nc.close()

    nsamps = nsamps_middle + nsamps_early + nsamps_late

    # ---- more initialization of output storage arrays now that 
    #      we know the array dimensions

    fzero = np.zeros((ny_gefsv12, nx_gefsv12), dtype=np.float)
    Dnstat = 0.10*np.ones((ny_gefsv12, nx_gefsv12), dtype=np.float)
   
    # ---- loop over the grid points and estimate the Gamma distributions
    #      for each parameter.  First see if a single Gamma distribution
    #      is appropriate; if not, try a mixture of two.   If that still
    #      doesn't fit well, try a mixture of three.   

    now = datetime.now()
    begin_time = now.strftime("%H:%M:%S")
    plotit = False
 
    spline_info_fcst = np.zeros((ny_gefsv12,nx_gefsv12,2,17), dtype=np.float64) 
    usegamma_fcst = np.zeros((ny_gefsv12, nx_gefsv12), dtype=np.int32)
    empirical_precipvals_fcst = np.zeros((9,ny_gefsv12,nx_gefsv12), dtype=np.float16)
    q99 = np.zeros((ny_gefsv12,nx_gefsv12), dtype=np.float32)
                   
    for jy in range(0,ny_gefsv12,nstride):
    #for jy in range(ny_gefsv12//2, ny_gefsv12//2+1):
        
        now = datetime.now()
        current_time = now.strftime("%H:%M:%S")
        print ('***** begin time, current time, jy, ny_gefsv12, lat = ',\
            begin_time, current_time, jy, ny_gefsv12, lats_1d[jy])        
        
        for ix in range(0,nx_gefsv12,nstride):
        #for ix in range (nx_gefsv12//2, nx_gefsv12//2+1):
            
            # ---- there is a grib round-off error that can give negative
            #      values slightly smaller than teeny precip.  to make sure 
            #      that we don't have either negative values or lots of the 
            #      same tiny values, so subtract teeny_precip
        
            precip_ens_1d_middle = precip_middle[:,jy,ix]
            precip_ens_1d_early = precip_early[:,jy,ix]
            precip_ens_1d_late = precip_late[:,jy,ix]
            precip_ens_1d = np.concatenate((precip_ens_1d_middle, \
                precip_ens_1d_early, precip_ens_1d_late))
        
            tp = np.min( [np.abs(np.min(precip_ens_1d)), 0.0] )
            teeny_precip = tp*np.ones(nsamps)
            precip_ens_1d = precip_ens_1d - teeny_precip[:]

            fraction_zero, precip_ens_nonzero, nz = \
                fraczero_possamps(nsamps, precip_ens_1d) # return sorted
            fzero[jy,ix] = fraction_zero 
        
            if nz > 100:

                # ---- spline fit with the focus on knots at higher quantiles

                usegamma_fcst[jy,ix] = 0
                #query_these_indices = [ nz//10, nz//4, nz//2, (3*nz)//5, \
                #    (4*nz)//5, (17*nz)//20, (9*nz)//10, (24*nz)//25, (49*nz)//50]
                query_these_indices = [ nz//10, nz//4, nz//2, (3*nz)//5, \
                    (4*nz)//5, (17*nz)//20, (9*nz)//10, (24*nz)//25, (99*nz)//100]
                empirical_precipvals = precip_ens_nonzero[query_these_indices]
                empirical_precipvals_fcst[:,jy,ix] = empirical_precipvals[:]
                #print ('empirical_precipvals = ', empirical_precipvals)
                #q98[jy,ix] = empirical_precipvals[-1]
                #q99[jy,ix] = empirical_precipvals[-1]
                q99[jy,ix] = empirical_precipvals[(99*nz)//100]
        
                empirical_cdf = 1.0/(2.0*nz) + np.arange(nz)/nz      
                hazard_function_empirical = -np.log(1.0 - empirical_cdf)
                spltemp = splrep(precip_ens_nonzero, hazard_function_empirical, \
                    xb=0., task=-1, t = empirical_precipvals)   
                spline_hazard = splev(precip_ens_nonzero, spltemp)
                spline_cdf = 1.0 - np.exp(-spline_hazard)       
                diff = np.abs(empirical_cdf - spline_cdf)
        
                # ---- save spline information to numpy array
        
                spline_info_fcst[jy,ix,0,:] = spltemp[0]
                spline_info_fcst[jy,ix,1,:] = spltemp[1]
                Dnstat[jy,ix] = np.max(diff)
        
            elif nz < 10:
            
                # --- over the whole training sample, there were not enough 
                #     nonzero precip values recorded.   Flag this as missing data.
            
                usegamma_fcst[jy,ix] = -1  # flag as insufficient data to do any qmapping
                spline_info_fcst[jy,ix,0,:] = -99.99
                spline_info_fcst[jy,ix,1,:] = -99.99
                Dnstat[jy,ix] = 0.0
                q99[jy,ix] = -99.99

            else:
            
                # ---- too few samples to use splines; fit a single-parameter Gamma 
                #      using Thom (1958) estimator described in Wilks textbook,
                #      Statistical Methods in the Atmospheric Sciences.
                
                usegamma_fcst[jy,ix] = 1  # yes, estimate with Gamma distribution
                empirical_cdf = 1.0/(2.0*nz) + np.arange(nz)/nz
                pmean = np.mean(precip_ens_nonzero)
                lnxbar = np.log(pmean)
                if pflag == True: print ('pmean, lnxbar = ', pmean, lnxbar)
                meanlnxi = np.mean(np.log(precip_ens_nonzero))
                if pflag == True: print ('meanlnxi = ', meanlnxi)
                D = lnxbar - meanlnxi
                alpha_hat = (1.0 + np.sqrt(1.0+4.0*D/3.0)) / (4.0*D)
                beta_hat = pmean / alpha_hat
                if pflag == True: print('alpha_hat, beta_hat = ', alpha_hat, beta_hat)
                spline_info_fcst[jy,ix,0,:] = alpha_hat  # smoosh into the spline array
                spline_info_fcst[jy,ix,1,:] = beta_hat # smoosh into the spline array
                q99[jy,ix] = precip_ens_nonzero[-1] 
            
                # --- evaluate Dn statistic, goodness of fit.
            
                y0 = precip_ens_nonzero / beta_hat
                fitted_CDF = stats.gamma.cdf(y0, alpha_hat)
                diff = np.abs(empirical_cdf - fitted_CDF)
                Dnstat[jy,ix] = np.max(diff)
           
    # --- save Dnstat goodness of fit to cPickled file

    print ('max Dnstat = ', np.max(Dnstat))
    outfile = master_directory_out + cmonthnum[imonth_index]+'_'+cdomain+\
        '_GEFSv12_Dnstat_h' + clead + '.cPick'
    print ('writing to ', outfile)
    ouf = open(outfile, 'wb')
    cPickle.dump(Dnstat, ouf)
    cPickle.dump(lons_1d, ouf)
    cPickle.dump(lats_1d, ouf)
    ouf.close()

    # ---- save spline information to netCDF file

    #outfile = master_directory_out + cmonthnum[imonth_index]+'_'+cdomain+\
    #    '_GEFSv12_spline_info_h' + clead + '.nc' 
        
    outfile = master_directory_out + cmonth_list[imonth_index]+'_'+cdomain+\
        '_GEFSv12_spline_info_h' + clead + '.nc'
    print ('writing to ',outfile)
    ncout = Dataset(outfile,'w',format='NETCDF4_CLASSIC')

    xf = ncout.createDimension('xf',nx_gefsv12)
    xvf = ncout.createVariable('xf','i4',('xf',))
    xvf.long_name = "eastward grid point number on NDFD grid"
    xvf.units = "n/a"

    yf = ncout.createDimension('yf',ny_gefsv12)
    yvf = ncout.createVariable('yf','i4',('yf',))
    yvf.long_name = "northward grid point number on NDFD grid"
    yvf.units = "n/a"

    xspd = ncout.createDimension('xspd',17)
    xspdf = ncout.createVariable('xspd','i4',('xspd',))
    xspdf.long_name = "index for spline dimension"
    xspdf.units = "n/a"

    x2 = ncout.createDimension('x2',2)
    x2f = ncout.createVariable('x2','i4',('x2',))
    x2f.long_name = "2nd index for spline dimension"
    x2f.units = "n/a"

    x9 = ncout.createDimension('x9',9)
    x9f = ncout.createVariable('x9','i4',('x9',))
    x9f.long_name = "first dimension of nonzero_indices_of_knots"
    x9f.units = "n/a"

    lonsa = ncout.createVariable('lons','f4',('xf',))
    lonsa.long_name = "longitude"
    lonsa.units = "degrees_east"

    latsa = ncout.createVariable('lats','f4',('yf',))
    latsa.long_name = "latitude"
    latsa.units = "degrees_north"

    spline_info_out = ncout.createVariable('spline_info',\
        'f4',('yf','xf','x2','xspd'),
        zlib=True,least_significant_digit=6)
    spline_info_out.units = "n/a"
    spline_info_out.long_name = \
        "Information for computing precipitation from"+\
        "spline (or Gamma CDF for dry points)"
    spline_info_out.missing_value = \
        np.array(-9999.99,dtype=np.float32)   

    fzero_out = ncout.createVariable('fzero','f4',('yf','xf',),
        zlib=True,least_significant_digit=6)
    fzero_out.units = "n/a"
    fzero_out.long_name = "fraction_zero"
    fzero_out.missing_value = np.array(-9999.99,dtype=np.float32)

    usegamma_out = ncout.createVariable('usegamma',\
        'i4',('yf','xf',), zlib=True)
    usegamma_out.units = "n/a"
    usegamma_out.long_name = "1 if fit CDF via Gamma distribution, 0 if not"
    usegamma_out.missing_value = np.array(-99,dtype=np.int32)
    
    quantile_99 = ncout.createVariable('quantile_99',\
        'f8',('yf','xf',), zlib=True)
    quantile_99.units = "mm"
    quantile_99.long_name = "99th percentile of precipitation CDF"
    quantile_99.missing_value = np.array(-99.99,dtype=np.float32)

    # ---- metadata

    ncout.title = "NDFD domain spline inverse coefficients / Gamma parameters "
    ncout.history = "from CDF fitting code by Tom Hamill, PSL"
    ncout.institution =  "psl.noaa.gov"
    ncout.platform = "n/a"
    ncout.references = "n/a"
    
    # ---- copy the outputs to netCDF structures.

    xvf[:] = range(nx_gefsv12)
    yvf[:] = range(ny_gefsv12)
    xspdf[:] = range(17)
    x2f[:] = range(2)
    x9f[:] = range(9)
    lonsa[:] = lons_1d[:]
    latsa[:] = lats_1d[:]
    spline_info_out[:] = spline_info_fcst[:,:,:,:]
    fzero_out[:] = fzero[:,:]
    usegamma_out[:] = usegamma_fcst[:,:]
    quantile_99[:] = q99[:,:]

    ncout.close()




                                                                                                                                                                                                                                                                                                                                                                                                  ._CDF_spline_fitting_forecast_precip_v4.py                                                          000644  000765  000024  00000000472 14073570375 021120  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2       :                                      ATTR      :     "                      com.apple.lastuseddate#PS      (     com.macromates.bookmarks   3     com.macromates.selectionRange      9     com.macromates.visibleIndex  `    4j     ( '456:5' )235:630                                                                                                                                                                                                      CDF_spline_fitting_forecast_precip_v4.py                                                            000644  000765  000024  00000043627 14073570375 020714  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         def CDF_spline_fitting_forecast_precip_v4(cmonth, clead, cdomain):

    """
    CDF_spline_fitting_forecast_precip_v4.py cmonth clead cdomain
    
    where cmonth = 'Jan', 'Feb' etc
    clead = '024' or similar (3 digits) and cdomain typically is "conus"

    fit cubic spline to cumulative hazard function of precipitation and save
    for spline parameters for fitting of precipitation CDFs.
    
    This "flexiknot" version here makes a modification on the number of knots used
    in the spline fit, which is related to the number of samples with positive
    precipitation; more samples, more knots.

    Designed by Tom Hamill, NOAA, with help from Michael Scheuerer, Dec 2020 -
    July 2021

    """

    import os, sys
    from datetime import datetime
    import numpy as np
    import _pickle as cPickle
    from netCDF4 import Dataset
    import _pickle as cPickle
    import scipy.stats as stats
    from scipy.interpolate import LSQUnivariateSpline, splrep, splev
    import matplotlib.pyplot as plt
    from matplotlib import rcParams
    from mpl_toolkits.basemap import Basemap, interp
    from mpl_toolkits.axes_grid1 import make_axes_locatable

    # =====================================================================

    def set_domain_boundaries(cdomain):
    
        """ used grib file of 2.5-km blend output grid to determine bounding 
            lat and lon, and from that, the domain bounding indices for the 
            0.25 GEFSv12 reforecast data that will encompass the domain.    
        """
        if cdomain == 'conus': 
            jmin = 93
            jmax = 246
            imin = 368
            imax = 686
        elif cdomain == 'pr':
            jmin = 243
            jmax = 256
            imin = 649
            
            imax = 667   
        elif cdomain == 'ak':
            jmin = 19
            jmax = 161
            imin = 201
            imax = 967
        else:
            print ('invalid domain.  Exiting.')     
            sys.exit()    
 
        return jmin, jmax, imin, imax

    # =====================================================================

    def get_surrounding_months(cmonth):

        if cmonth == 'Jan':
            cmonth_early = 'Dec'
            cmonth_late = 'Feb'
        elif cmonth == 'Feb':
            cmonth_early = 'Jan'
            cmonth_late = 'Mar'
        elif cmonth == 'Mar':
            cmonth_early = 'Feb'
            cmonth_late = 'Apr'
        elif cmonth == 'Apr':
            cmonth_early = 'Mar'
            cmonth_late = 'May'
        elif cmonth == 'May':
            cmonth_early = 'Apr'
            cmonth_late = 'Jun'
        elif cmonth == 'Jun':
            cmonth_early = 'May'
            cmonth_late = 'Jul'
        elif cmonth == 'Jul':
            cmonth_early = 'Jun'
            cmonth_late = 'Aug'
        elif cmonth == 'Aug':
            cmonth_early = 'Jul'
            cmonth_late = 'Sep'
        elif cmonth == 'Sep':
            cmonth_early = 'Aug'
            cmonth_late = 'Oct'
        elif cmonth == 'Oct':
            cmonth_early = 'Sep'
            cmonth_late = 'Nov'
        elif cmonth == 'Nov':
            cmonth_early = 'Oct'
            cmonth_late = 'Dec'
        elif cmonth == 'Dec':
            cmonth_early = 'Nov'
            cmonth_late = 'Jan'
        else:
            print ('invalid month')
            sys.exit()
    
        return cmonth_early, cmonth_late

    # =====================================================================

    def fraczero_possamps(nsamps, precip_ens):
    
        """
    
        from the vector input sample precip_ens, define the fraction of
        samples with zero precipitation.   For the positive samples, add
        a small random number to deal with the fact that the data was 
        discretized to ~0.1 mm, so that when later creating CDFs we don't 
        have empirical values with lots of tied amounts.  Also, sort the 
        nonzero amounts and return.
    
        """
        number_zeros = 0
    
        # data discretized, so add random component of this magnitude
        #precip_ens = np.where(precip_ens < 4.0, precip_ens* \
        #    np.random.uniform(low=-0.5,high=1.5,size=len(precip_ens)), precip_ens)
    
        precip_ens_nonzero = np.delete(precip_ens, \
            np.where(precip_ens <= 0.0))  # censor at 0.0 mm
        precip_ens_nonzero = precip_ens_nonzero + \
            np.random.uniform(low=-0.001,high=0.001,size=len(precip_ens_nonzero))
        precip_ens_nonzero = np.delete(precip_ens_nonzero, \
            np.where(precip_ens_nonzero <= 0.0))  # censor at 0.0 mm
        nz = len(precip_ens_nonzero)    
        precip_ens_nonzero = np.sort(precip_ens_nonzero)  
        ntotal = len(precip_ens)
        nzero = ntotal - len(precip_ens_nonzero)
        fraction_zero = float(nzero) / float(ntotal)
        return fraction_zero, precip_ens_nonzero, nz

    # =====================================================================
    
    def define_knot_locations(nz, precip_samples_nonzero):
        
        # define_knot_locations:  choose the number of knots and the indices 
        # in the sorted precipitation samples according to the number of
        # positive precipitation amounts.   We want fewer knots for small
        # samples, and we want the precipitation values of the chosen knots
        # to emphasize the upper quantiles of the distribution, as that's where
        # we care most about an accurate fit.

        # ---- these beta parameters be used to create a beta distribution 
        #      that will emphasize the upper quantiles of the distribution
        #      
        
        rp = 3.5
        rq = 1.0
        
        # ---- the number of (interior) knots in the cubic spline will
        #      be set to be no greater than 9.0, and for a sample size
        #      of 100 will be 3.
        
        nknots = min([9,nz//30])
        query_these_indices = []
        cdf_at_indices = []
        
        for iknot in range (1,nknots+1):
            rknot = float(iknot)/(nknots+1)
            xloc = stats.beta.ppf(rknot, rp, rq)
            c = stats.beta.cdf(xloc, rp, rq)
            iloc = int(nz*xloc)
            query_these_indices.append(iloc)
            c = (1./(2.*nz)) + float(iloc)/float(nz)
            cdf_at_indices.append(c)
        
        return nknots, query_these_indices, cdf_at_indices
        
    # =====================================================================
        
    
    # ---- set parameters
        
    cmonth_list = ['Jan','Feb','Mar','Apr','May','Jun',\
        'Jul','Aug','Sep','Oct','Nov','Dec']
    cmonthnum = ['01','02','03','03','04','05',\
        '06','07','08','09','10','11','12']
    print ('cmonth = ', cmonth)
    print (cmonth_list)
    imonth_index = cmonth_list.index(cmonth)
    jmin, jmax, imin, imax = set_domain_boundaries(cdomain)
    nstride = 1
    cmonth_early, cmonth_late = get_surrounding_months(cmonth)
    pflag = False # for print statements
    master_directory = '/Volumes/NBM/'+cdomain+'_gefsv12/precip/netcdf/'
    master_directory_out = '/Volumes/NBM/'+cdomain+'_gefsv12/CDF_spline/'
    nmembers = 5

    # ---- read in the previously generated netCDF file with precipitation
    #      for this month and lead time + surrounding months.  
    #      All members, dates for this month have been
    #      smushed into one leading index, dimension
    #      nsamps, since the date of the forecast within the month and 
    #      the member number is irrelevant for the distribution fitting.
   
    ncfile = master_directory + cmonth + '_apcp_sfc_h' + clead + '.nc'
    print (ncfile)
    nc = Dataset(ncfile)
    precip_middle = nc.variables['apcp_fcst'][:,jmin:jmax,imin:imax]
    nsamps_middle, ny_gefsv12, nx_gefsv12 = np.shape(precip_middle)
    lons_1d = nc.variables['lons_fcst'][imin:imax]
    lats_1d = nc.variables['lats_fcst'][jmin:jmax]
    nc.close()

    ncfile = master_directory + cmonth_early + '_apcp_sfc_h' + clead + '.nc'
    print (ncfile)
    nc = Dataset(ncfile)
    precip_early = nc.variables['apcp_fcst'][:,jmin:jmax,imin:imax]
    nsamps_early, ny_gefsv12, nx_gefsv12 = np.shape(precip_early)
    nc.close()

    ncfile = master_directory + cmonth_late + '_apcp_sfc_h' + clead + '.nc'
    print (ncfile)
    nc = Dataset(ncfile)
    precip_late = nc.variables['apcp_fcst'][:,jmin:jmax,imin:imax]
    nsamps_late, ny_gefsv12, nx_gefsv12 = np.shape(precip_late)
    nc.close()

    nsamps = nsamps_middle + nsamps_early + nsamps_late

    # ---- more initialization of output storage arrays now that 
    #      we know the array dimensions

    fzero = np.zeros((ny_gefsv12, nx_gefsv12), dtype=np.float)
   
    # ---- loop over the grid points and estimate the Gamma distributions
    #      for each parameter.  First see if a single Gamma distribution
    #      is appropriate; if not, try a mixture of two.   If that still
    #      doesn't fit well, try a mixture of three.   

    now = datetime.now()
    begin_time = now.strftime("%H:%M:%S")
    plotit = False
 
    spline_info_fcst = np.zeros((ny_gefsv12,nx_gefsv12,2,17), dtype=np.float64) 
    usegamma_fcst = np.zeros((ny_gefsv12, nx_gefsv12), dtype=np.int32)
    q99 = np.zeros((ny_gefsv12,nx_gefsv12), dtype=np.float32)
    number_knots = np.zeros((ny_gefsv12,nx_gefsv12), dtype=np.int32)
                   
    for jy in range(0,ny_gefsv12,nstride):
    #for jy in range(528,529):
        
        now = datetime.now()
        current_time = now.strftime("%H:%M:%S")
        print ('***** begin time, current time, jy, ny_gefsv12, lat = ',\
            begin_time, current_time, jy, ny_gefsv12, lats_1d[jy])        
        
        for ix in range(0,nx_gefsv12,nstride):
        #for ix in range (465,466):
            
            # ---- there is a grib round-off error that can give negative
            #      values slightly smaller than teeny precip.  to make sure 
            #      that we don't have either negative values or lots of the 
            #      same tiny values, so subtract teeny_precip
        
            precip_ens_1d_middle = precip_middle[:,jy,ix]
            precip_ens_1d_early = precip_early[:,jy,ix]
            precip_ens_1d_late = precip_late[:,jy,ix]
            precip_ens_1d = np.concatenate((precip_ens_1d_middle, \
                precip_ens_1d_early, precip_ens_1d_late))
        
            tp = np.min( [np.abs(np.min(precip_ens_1d)), 0.0] )
            teeny_precip = tp*np.ones(nsamps)
            precip_ens_1d = precip_ens_1d - teeny_precip[:]
            
            fraction_zero, precip_ens_nonzero, nz = \
                fraczero_possamps(nsamps, precip_ens_1d) # return sorted
            fzero[jy,ix] = fraction_zero 
        
            if pflag == True: print ('precip_ens_nonzero = ', precip_ens_nonzero)
            if pflag == True: print ('nz = ', nz)
                
            if nz > 100:

                # ---- spline fit with the focus on knots at higher quantiles

                usegamma_fcst[jy,ix] = 0
                q99[jy,ix] = precip_ens_nonzero[(99*nz)//100]
                empirical_cdf = 1.0/(2.0*nz) + np.arange(nz)/nz      
                hazard_function_empirical = -np.log(1.0 - empirical_cdf)
                
                # ---- set the indices in the sorted precipitation sample 
                #      where spline knots are, the CDF at these values,
                #      the number of knots, the precipitation amount at
                #      chosen percentiles.
                    
                nknots, query_these_indices, cdf_at_indices = \
                    define_knot_locations(nz, precip_ens_nonzero)
                empirical_precipvals = precip_ens_nonzero[query_these_indices]

                if pflag == True: print ('nknots = ', nknots)
                if pflag == True: print ('query_these_indices ', query_these_indices)
                if pflag == True: print ('cdf_at_indices = ', cdf_at_indices)

                spltemp_inv = splrep(precip_ens_nonzero, hazard_function_empirical, \
                    xb=0., task=-1, t = empirical_precipvals)   
                lspline = len(spltemp_inv[0])  # final # knots including @ boundaries
                
                if pflag == True: print ('hazard_function_empirical = ',hazard_function_empirical)
                if pflag == True: print ('spltemp_inv = ', spltemp_inv)
                if pflag == True: print ('lspline = ', lspline)
        
                # --- set the number of knots and splines coefficients into arrays to
                #     be dumped to netCDF file.
            
                spline_info_fcst[jy,ix,0,0:lspline] = spltemp_inv[0]
                spline_info_fcst[jy,ix,1,0:lspline] = spltemp_inv[1]
                number_knots[jy,ix] = lspline
        
            elif nz < 10:
            
                # --- over the whole training sample, there were not enough 
                #     nonzero precip values recorded.   Flag this as missing data.
            
                usegamma_fcst[jy,ix] = -1  # flag as insufficient data to do any qmapping
                number_knots[jy,ix] = 0
                spline_info_fcst[jy,ix,0,:] = -99.99
                spline_info_fcst[jy,ix,1,:] = -99.99
                q99[jy,ix] = -99.99

            else:
            
                # ---- too few samples to use splines; fit a single-parameter Gamma 
                #      using Thom (1958) estimator described in Wilks textbook,
                #      Statistical Methods in the Atmospheric Sciences.
                
                usegamma_fcst[jy,ix] = 1  # yes, estimate with Gamma distribution
                number_knots[jy,ix] = 0
                empirical_cdf = 1.0/(2.0*nz) + np.arange(nz)/nz
                pmean = np.mean(precip_ens_nonzero)
                lnxbar = np.log(pmean)
                if pflag == True: print ('pmean, lnxbar = ', pmean, lnxbar)
                meanlnxi = np.mean(np.log(precip_ens_nonzero))
                if pflag == True: print ('meanlnxi = ', meanlnxi)
                D = lnxbar - meanlnxi
                alpha_hat = (1.0 + np.sqrt(1.0+4.0*D/3.0)) / (4.0*D)
                beta_hat = pmean / alpha_hat
                if pflag == True: print('alpha_hat, beta_hat = ', alpha_hat, beta_hat)
                spline_info_fcst[jy,ix,0,:] = alpha_hat  # smoosh into the spline array
                spline_info_fcst[jy,ix,1,:] = beta_hat # smoosh into the spline array
                q99[jy,ix] = precip_ens_nonzero[-1] 
            
                # --- evaluate Dn statistic, goodness of fit.
            
                y0 = precip_ens_nonzero / beta_hat
                fitted_CDF = stats.gamma.cdf(y0, alpha_hat)
           
    # ---- save spline information to netCDF file
        
    outfile = master_directory_out + cmonth_list[imonth_index]+'_'+cdomain+\
        '_GEFSv12_spline_info_h' + clead + '.nc'
    print ('writing to ',outfile)
    ncout = Dataset(outfile,'w',format='NETCDF4_CLASSIC')

    xf = ncout.createDimension('xf',nx_gefsv12)
    xvf = ncout.createVariable('xf','i4',('xf',))
    xvf.long_name = "eastward grid point number on NDFD grid"
    xvf.units = "n/a"

    yf = ncout.createDimension('yf',ny_gefsv12)
    yvf = ncout.createVariable('yf','i4',('yf',))
    yvf.long_name = "northward grid point number on NDFD grid"
    yvf.units = "n/a"

    xspd = ncout.createDimension('xspd',17)
    xspdf = ncout.createVariable('xspd','i4',('xspd',))
    xspdf.long_name = "index for spline dimension"
    xspdf.units = "n/a"

    x2 = ncout.createDimension('x2',2)
    x2f = ncout.createVariable('x2','i4',('x2',))
    x2f.long_name = "2nd index for spline dimension"
    x2f.units = "n/a"

    x9 = ncout.createDimension('x9',9)
    x9f = ncout.createVariable('x9','i4',('x9',))
    x9f.long_name = "first dimension of nonzero_indices_of_knots"
    x9f.units = "n/a"

    lonsa = ncout.createVariable('lons','f4',('xf',))
    lonsa.long_name = "longitude"
    lonsa.units = "degrees_east"

    latsa = ncout.createVariable('lats','f4',('yf',))
    latsa.long_name = "latitude"
    latsa.units = "degrees_north"

    spline_info_out = ncout.createVariable('spline_info',\
        'f4',('yf','xf','x2','xspd'),
        zlib=True,least_significant_digit=6)
    spline_info_out.units = "n/a"
    spline_info_out.long_name = \
        "Information for computing precipitation from"+\
        "spline (or Gamma CDF for dry points)"
    spline_info_out.missing_value = \
        np.array(-9999.99,dtype=np.float32)   

    fzero_out = ncout.createVariable('fzero','f4',('yf','xf',),
        zlib=True,least_significant_digit=6)
    fzero_out.units = "n/a"
    fzero_out.long_name = "fraction_zero"
    fzero_out.missing_value = np.array(-9999.99,dtype=np.float32)

    usegamma_out = ncout.createVariable('usegamma',\
        'i4',('yf','xf',), zlib=True)
    usegamma_out.units = "n/a"
    usegamma_out.long_name = "1 if fit CDF via Gamma distribution, 0 if not"
    usegamma_out.missing_value = np.array(-99,dtype=np.int32)
    
    quantile_99 = ncout.createVariable('quantile_99',\
        'f8',('yf','xf',), zlib=True)
    quantile_99.units = "mm"
    quantile_99.long_name = "99th percentile of precipitation CDF"
    quantile_99.missing_value = np.array(-99.99,dtype=np.float32)
    
    numberknots_out = ncout.createVariable('number_knots',\
        'i4',('yf','xf',), zlib=True)
    numberknots_out.units = "n/a"
    numberknots_out.long_name = "number of knots when using spline"
    numberknots_out.missing_value = np.array(-99,dtype=np.int32)

    # ---- metadata

    ncout.title = "NDFD domain spline inverse coefficients / Gamma parameters "
    ncout.history = "from CDF fitting code by Tom Hamill, PSL"
    ncout.institution =  "psl.noaa.gov"
    ncout.platform = "n/a"
    ncout.references = "n/a"
    
    # ---- copy the outputs to netCDF structures.

    xvf[:] = range(nx_gefsv12)
    yvf[:] = range(ny_gefsv12)
    xspdf[:] = range(17)
    x2f[:] = range(2)
    x9f[:] = range(9)
    lonsa[:] = lons_1d[:]
    latsa[:] = lats_1d[:]
    spline_info_out[:] = spline_info_fcst[:,:,:,:]
    fzero_out[:] = fzero[:,:]
    usegamma_out[:] = usegamma_fcst[:,:]
    quantile_99[:] = q99[:,:]
    numberknots_out[:] = number_knots[:,:]

    ncout.close()




                                                                                                         ._KFgain_GEFSv12_biascorr_2019.py                                                                   000775  000765  000024  00000000420 14016263127 016405  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2                                              ATTR                                   com.apple.lastuseddate#PS           com.macromates.selectionRange           com.macromates.visibleIndex  `    ;     35:41-35:560                                                                                                                                                                                                                                                KFgain_GEFSv12_biascorr_2019.py                                                                     000775  000765  000024  00000005722 14016263127 016202  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         
def KFgain_GEFSv12_biascorr_2019(npts, nlats, nlons, \
    forecast_3d, analyses_3d, beta_3d, lsmask, \
    date_list_forecast, clead, cpath_gain):
    
    """ apply Kalman filter bias correction to forecasts in 2019. 
    """
    import numpy as np
    from datetime import datetime
    import sys
    import _pickle as cPickle
    
    cmonths = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']
         
    # ---- sequentially loop through dates during the sample, updating
    #      the previous day's bias correction to the new days fcst vs. obs
    #      discrepancy.
    
    first = True
    ndates = int(len(date_list_forecast))
    obsinc_2d = np.zeros((nlats, nlons), dtype=np.float64)
    for idate, date in enumerate(date_list_forecast[1:]):

        cdd = date[6:8]
        cmm = date[4:6]
        if first == True or cdd == '01':
            
            cmonth = cmonths[int(cmm)-1]
            first = False
            
            # ---- define the Kalman gain per eq. (39) in Dick Dee
            #      Bias and Data Assimilation article, 
            #      https://rmets.onlinelibrary.wiley.com/doi/epdf/10.1256/qj.05.137
    
            gain_infile = cpath_gain + 'GEFSv12_KFgain_'+cmonth+'_lead'+clead+'.cPick'
            inf = open(gain_infile, 'rb')
            Kalman_gain_beta_4d = cPickle.load(inf)
            inf.close()

        # ---- calculate the "observation" increment (term in parentheses
        #      in eq. 37 in Dee paper)
        
        now = datetime.now()
        current_time = now.strftime("%H:%M:%S")
        if idate > 0: 
            obsinc_2d[:,:] = analyses_3d[idate,:,:] - \
                (forecast_3d[idate,:,:] - beta_3d[idate-1,:,:])
        else:
            obsinc_2d[:,:] = analyses_3d[idate,:,:] - forecast_3d[idate,:,:]
        
        for i in range(nlons):
            for j in range(nlats):
                    
                # ---- update the bias correction estimate, eq. 37 in Dee. 
                    
                #if lsmask[j,i] == 1:
                #    if idate > 0:
                #        beta_3d[idate,j,i] = beta_3d[idate-1,j,i] - \
                #            np.sum(Kalman_gain_beta_4d[j,i,:,:]*obsinc_2d[:,:]) 
                #   else:
                #        beta_3d[idate,j,i] = -np.sum(Kalman_gain_beta_4d[j,i,:,:]* \
                #            obsinc_2d[:,:])
                #else:
                #    beta_3d[idate,j,i] = 0.0
                    
                    
                if idate > 0:
                    beta_3d[idate,j,i] = beta_3d[idate-1,j,i] - \
                        np.sum(Kalman_gain_beta_4d[j,i,:,:]*obsinc_2d[:,:]) 
                else:
                    beta_3d[idate,j,i] = -np.sum(Kalman_gain_beta_4d[j,i,:,:]* \
                        obsinc_2d[:,:])
                                          
    now = datetime.now()
    current_time = now.strftime("%H:%M:%S")
    print ('finishing KFgain_GEFSv12_biascorr_2019. ', current_time)
    
    return beta_3d                                              ._KFgain_GEFSv12_biascorr_2019_orig.py                                                              000775  000765  000024  00000000411 14016263127 017425  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2        	                                      ATTR      	                             com.apple.lastuseddate#PS           com.macromates.selectionRange           com.macromates.visibleIndex  `    p     8:230                                                                                                                                                                                                                                                       KFgain_GEFSv12_biascorr_2019_orig.py                                                                000775  000765  000024  00000005521 14016263127 017217  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         
def KFgain_GEFSv12_biascorr_2019(npts, nlats, nlons, \
    forecast_3d, analyses_3d, beta_3d, \
    date_list_forecast, clead, cpath_gain):
    
    """ apply Kalman filter bias correction to forecasts in 2019. 
    """
    import numpy as np
    from datetime import datetime
    import sys
    import _pickle as cPickle
    
    cmonths = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']
         
    # ---- sequentially loop through dates during the sample, updating
    #      the previous day's bias correction to the new days fcst vs. obs
    #      discrepancy.
    
    first = True
    ndates = int(len(date_list_forecast))
    obsinc_2d = np.zeros((nlats, nlons), dtype=np.float64)
    for idate, date in enumerate(date_list_forecast[1:]):

        cdd = date[6:8]
        cmm = date[4:6]
        #print (date, cdd, cmm)
        if first == True or cdd == '01':
            
            cmonth = cmonths[int(cmm)-1]
            first = False
            
            # ---- define the Kalman gain per eq. (39) in Dick Dee
            #      Bias and Data Assimilation article, 
            #      https://rmets.onlinelibrary.wiley.com/doi/epdf/10.1256/qj.05.137
    
            gain_infile = cpath_gain + 'GEFSv12_KFgain_'+cmonth+'_lead'+clead+'.cPick'
            #print ('   reading Kalman gain from ', gain_infile)
            inf = open(gain_infile, 'rb')
            Kalman_gain_beta_4d = cPickle.load(inf)
            inf.close()
            #print ('   done reading')

        # ---- calculate the "observation" increment (term in parentheses
        #      in eq. 37 in Dee paper)
        
        now = datetime.now()
        current_time = now.strftime("%H:%M:%S")
        #print ('processing date, current time = ', date, current_time ) 
        if idate > 0: 
            obsinc_2d[:,:] = analyses_3d[idate,:,:] - \
                (forecast_3d[idate,:,:] - beta_3d[idate-1,:,:])
        else:
            obsinc_2d[:,:] = analyses_3d[idate,:,:] - forecast_3d[idate,:,:]
        
        for i in range(nlons):
            for j in range(nlats):
                    
                # ---- update the bias correction estimate, eq. 37 in Dee. 
                    
                if idate > 0:
                    beta_3d[idate,j,i] = beta_3d[idate-1,j,i] - \
                        np.sum(Kalman_gain_beta_4d[j,i,:,:]*obsinc_2d[:,:])  
                else:
                    beta_3d[idate,j,i] = -np.sum(Kalman_gain_beta_4d[j,i,:,:]* \
                        obsinc_2d[:,:])
         
                
        print ('idate, date, max, min beta_3d = ', idate, date, \
            np.max(beta_3d[idate,:,:]), np.min(beta_3d[idate,:,:]) )
                                          
    now = datetime.now()
    current_time = now.strftime("%H:%M:%S")
    print ('finishing KFgain_GEFSv12_biascorr_2019. ', current_time)
    
    return beta_3d                                                                                                                                                                               ._KFgain_GEFSv12_biascorr_together_2019.py                                                          000775  000765  000024  00000000411 14016263127 020306  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2        	                                      ATTR      	                             com.apple.lastuseddate#PS           com.macromates.selectionRange           com.macromates.visibleIndex  `          50:90                                                                                                                                                                                                                                                       KFgain_GEFSv12_biascorr_together_2019.py                                                            000775  000765  000024  00000005623 14016263127 020103  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         
def KFgain_GEFSv12_biascorr_together_2019(npts, nlats, nlons, \
    forecast_3d, analyses_3d, beta_3d, \
    date_list_forecast, clead, cpath_gain):
    
    """ apply Kalman filter bias correction to forecasts in 2019. 
    """
    import numpy as np
    from datetime import datetime
    import sys
    import _pickle as cPickle
    
    cmonths = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']
         
    # ---- sequentially loop through dates during the sample, updating
    #      the previous day's bias correction to the new days fcst vs. obs
    #      discrepancy.
    
    first = True
    ndates = int(len(date_list_forecast))
    obsinc_2d = np.zeros((nlats, nlons), dtype=np.float64)
    beta_2d = np.zeros((nlats, nlons), dtype=np.float64)
    for idate, date in enumerate(date_list_forecast[1:]):

        cdd = date[6:8]
        cmm = date[4:6]
        #print (date, cdd, cmm)
        if first == True or cdd == '01':
            
            cmonth = cmonths[int(cmm)-1]
            first = False
            
            # ---- define the Kalman gain per eq. (39) in Dick Dee
            #      Bias and Data Assimilation article, 
            #      https://rmets.onlinelibrary.wiley.com/doi/epdf/10.1256/qj.05.137
    
            gain_infile = cpath_gain + 'GEFSv12_KFgain_together_'+cmonth+'_lead'+clead+'.cPick'
            #print ('   reading Kalman gain from ', gain_infile)
            inf = open(gain_infile, 'rb')
            Kalman_gain_beta_4d = cPickle.load(inf)
            inf.close()
            #print ('   done reading')

        # ---- calculate the "observation" increment (term in parentheses
        #      in eq. 37 in Dee paper)
        
        now = datetime.now()
        current_time = now.strftime("%H:%M:%S")
        #print ('processing date, current time = ', date, current_time ) 
        if idate > 0: 
            obsinc_2d[:,:] = analyses_3d[idate,:,:] - \
                (forecast_3d[idate,:,:] - beta_3d[idate-1,:,:])
        else:
            obsinc_2d[:,:] = analyses_3d[idate,:,:] - forecast_3d[idate,:,:]
        
        for i in range(nlons):
            for j in range(nlats):
                    
                # ---- update the bias correction estimate, eq. 37 in Dee. 
                    
                if idate > 0:
                    beta_3d[idate,j,i] = beta_3d[idate-1,j,i] - \
                        np.sum(Kalman_gain_beta_4d[j,i,:,:]*obsinc_2d[:,:]) 
                else:
                    beta_3d[idate,j,i] = -np.sum(Kalman_gain_beta_4d[j,i,:,:]* \
                        obsinc_2d[:,:])
                
        #print ('idate, date, max, min beta_3d = ', idate, date, \
        #    np.max(beta_3d[idate,:,:]), np.min(beta_3d[idate,:,:]) )
                                          
    now = datetime.now()
    current_time = now.strftime("%H:%M:%S")
    print ('finishing KFgain_GEFSv12_biascorr_2019. ', current_time)
    
    return beta_3d                                                                                                             ._Kalman_filter_biascorr_2019.py                                                                    000775  000765  000024  00000000414 14016263127 016664  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2                                              ATTR                                   com.apple.lastuseddate#PS           com.macromates.selectionRange      	     com.macromates.visibleIndex  `    F     22:25903                                                                                                                                                                                                                                                    Kalman_filter_biascorr_2019.py                                                                      000775  000765  000024  00000010332 14016263127 016447  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         
def Kalman_filter_biascorr_2019(npts, nlats, nlons, \
    forecast_3d, analyses_3d, beta_3d, \
    date_list_forecast, clead, cpath_gain):
    
    """ apply Kalman filter bias correction to forecasts in 2019. 
    """
    import numpy as np
    from datetime import datetime
    import sys
    import _pickle as cPickle
    
    # -------------------------------------------------------------
    
    # ---- based roughly on what had the lowest error in 2018, set the Kalman
    #      localization radii used
    
    if clead == '24':
        flocal_warm = '400.0'
        blocal_warm = '600.0' # '1200.0'
        flocal_cold = '400.0'
        blocal_cold = '600.0' # '1200.0'
    elif clead == '48':
        flocal_warm = '400.0'
        blocal_warm = '600.0' # ''1200.0'
        flocal_cold = '400.0'
        blocal_cold = '600.0' # '1200.0'
    elif clead == '72':
        flocal_warm = '400.0'
        blocal_warm = '1200.0'
        flocal_cold = '600.0'
        blocal_cold = '1200.0'
    elif clead == '96':
        flocal_warm = '400.0'
        blocal_warm = '1200.0'
        flocal_cold = '600.0'
        blocal_cold = '1200.0'
    elif clead == '120':
        flocal_warm = '200.0'
        blocal_warm = '1200.0'
        flocal_cold = '600.0'
        blocal_cold = '1200.0'   
    
    # ---- define the Kalman gain per eq. (39) in Dick Dee
    #      Bias and Data Assimilation article, 
    #      https://rmets.onlinelibrary.wiley.com/doi/epdf/10.1256/qj.05.137
   
    gainfile_cold = cpath_gain + '2018_KFgain_flocal'+\
        flocal_cold+'_blocal'+blocal_cold+\
        '_2018_cold_lead'+clead+'.cPick'
    gainfile_warm = cpath_gain + '2018_KFgain_flocal'+\
        flocal_warm+'_blocal'+blocal_warm+\
        '_2018_warm_lead'+clead+'.cPick'

    print ('reading cold_season Kalman gain from ', gainfile_cold) 
    inf = open(gainfile_cold, 'rb')
    Kalman_gain_beta_4d_cold = cPickle.load(inf)
    inf.close()
    print ('done reading')
    
    print ('reading warm_season Kalman gain from ', gainfile_warm) 
    inf = open(gainfile_warm, 'rb')
    Kalman_gain_beta_4d_warm = cPickle.load(inf)
    inf.close()
    print ('done reading')
         
    # ---- sequentially loop through dates during the sample, updating
    #      the previous day's bias correction to the new days fcst vs. obs
    #      discrepancy.
    
    ndates = int(len(date_list_forecast))
    obsinc_2d = np.zeros((nlats, nlons), dtype=np.float64)
    beta_2d = np.zeros((nlats, nlons), dtype=np.float64)
    for idate, date in enumerate(date_list_forecast[1:]):

        # ---- determine which gain file to use
        
        if idate == 0 : #'2019010100':
            Kalman_gain_beta_4d = Kalman_gain_beta_4d_cold
        elif date == '2019040100':
            Kalman_gain_beta_4d = Kalman_gain_beta_4d_warm
        elif date == '2019100100':
            Kalman_gain_beta_4d = Kalman_gain_beta_4d_cold

        # ---- calculate the "observation" increment (term in parentheses
        #      in eq. 37 in Dee paper)
        
        now = datetime.now()
        current_time = now.strftime("%H:%M:%S")
        #print ('processing date, current time = ', date, current_time ) 
        if idate > 0: 
            obsinc_2d[:,:] = analyses_3d[idate,:,:] - \
                (forecast_3d[idate,:,:] - beta_3d[idate-1,:,:])
        else:
            obsinc_2d[:,:] = analyses_3d[idate,:,:] - forecast_3d[idate,:,:]
        
        for i in range(nlons):
            for j in range(nlats):
                ktr = nlats*j + i
                    
                # ---- update the bias correction estimate, eq. 37 in Dee. 
                    
                if idate > 0:
                    beta_3d[idate,j,i] = beta_3d[idate-1,j,i] - \
                        np.sum(Kalman_gain_beta_4d[j,i,:,:]*obsinc_2d[:,:]) 
                else:
                    beta_3d[idate,j,i] = -np.sum(Kalman_gain_beta_4d[j,i,:,:]* \
                        obsinc_2d[:,:])
                
        print ('idate, date, max, min beta_3d = ', idate, date, \
            np.max(beta_3d[idate,:,:]), np.min(beta_3d[idate,:,:]) )
                                          
    now = datetime.now()
    current_time = now.strftime("%H:%M:%S")
    print ('finishing Kalman_filter_biascorr_2019. ', current_time)
    
    return beta_3d                                                                                                                                                                                                                                                                                                      ._Kalman_filter_biascorrection.py                                                                   000775  000765  000024  00000000415 14016263127 017414  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2        
                                      ATTR      
                             com.apple.lastuseddate#PS           com.macromates.selectionRange      	     com.macromates.visibleIndex  `         54:401378                                                                                                                                                                                                                                                   Kalman_filter_biascorrection.py                                                                     000775  000765  000024  00000010064 14016263127 017200  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         
def Kalman_filter_biascorrection(npts, nlats, nlons, \
    forecast_3d, analyses_3d, beta_3d, \
    date_list_forecast, R, Bx, Bbeta, \
    savefile, already_decay):
    
    """ apply decaying average Kalman filter bias correction 
        to forecasts.  Note the mix of some arrays shapes; 
        Kalman gain is shaped (nlats*nlons, nlats*nlons)
    """
    import numpy as np
    from datetime import datetime
    import sys
    import _pickle as cPickle
    from reformat_gain_to_4d_f90 import reformat_gain_to_4d_f90
    
    # -------------------------------------------------------------
    
    def reformat_gain_to_4d(nlats, nlons, gain_2D):
    
        # ---- reform 2D Kalman gain into 4D-array.

        now = datetime.now()
        current_time = now.strftime("%H:%M:%S")
        gain_4D = np.zeros((nlats, nlons, nlats, nlons), dtype=np.float64)
        ktr1 = 0
        for i1 in range(nlons):
            now = datetime.now()
            current_time = now.strftime("%H:%M:%S")
            for j1 in range(nlats):
                ktr2 = 0
                for i2 in range(nlons):
                    for j2 in range(nlats):
                        gain_4D[j1,i1,j2,i2] = gain_2D[ktr1,ktr2]
                        ktr2 = ktr2 + 1
                ktr1 = ktr1 + 1
        return gain_4D
    
    # -------------------------------------------------------------
    
    if already_decay == True:
        print ('reading beta_3d from ', savefile) 
        inf = open(savefile, 'rb')
        beta_3d = cPickle.load(inf)
        inf.close()
        print ('done reading')
    else:  
        # ---- define the Kalman gain per eq. (39) in Dick Dee
        #      Bias and Data Assimilation article, 
        #      https://rmets.onlinelibrary.wiley.com/doi/epdf/10.1256/qj.05.137
        
        now = datetime.now()
        current_time = now.strftime("%H:%M:%S")
        Bbeta_plus_Bx_plus_R = R + Bx + Bbeta
        Bbeta_plus_Bx_plus_R_inv = \
            np.linalg.inv(Bbeta_plus_Bx_plus_R)
        Kalman_gain_beta = np.matmul(Bbeta, Bbeta_plus_Bx_plus_R_inv)
        now = datetime.now()
        current_time = now.strftime("%H:%M:%S")
        Kalman_gain_beta_4d = reformat_gain_to_4d_f90(\
            Kalman_gain_beta, nlats, nlons)        
    
        # ---- sequentially loop through dates during the sample, 
        #      updating the previous day's bias correction 
        #      to the new days fcst vs. obs discrepancy.
    
        ndates = int(len(date_list_forecast))
        obsinc_2d = np.zeros((nlats, nlons), dtype=np.float64)
        beta_2d = np.zeros((nlats, nlons), dtype=np.float64)
        for idate, date in enumerate(date_list_forecast[1:]):

            # ---- calculate the "observation" increment (term in parentheses
            #      in eq. 37 in Dee paper)
            
            if idate > 0: 
                obsinc_2d[:,:] = analyses_3d[idate,:,:] - \
                    (forecast_3d[idate,:,:] - beta_3d[idate-1,:,:])
            else:
                obsinc_2d[:,:] = analyses_3d[idate,:,:] - \
                    forecast_3d[idate,:,:]
        
            for i in range(nlons):
                for j in range(nlats):
                    ktr = nlats*j + i
                    
                    # ---- update the bias correction estimate, eq. 37 in Dee. 
                    
                    if idate > 0:
                        beta_3d[idate,j,i] = beta_3d[idate-1,j,i] - \
                        np.sum(Kalman_gain_beta_4d[j,i,:,:]*obsinc_2d[:,:]) 
        
                    else:
                        beta_3d[idate,j,i] = \
                            -np.sum(Kalman_gain_beta_4d[j,i,:,:]*\
                            obsinc_2d[:,:])
                                          
        now = datetime.now()
        current_time = now.strftime("%H:%M:%S")
        print ('ending decay avg Kalman_filter_biascorrection.py.'+\
            '  Current time = ', current_time)
        print ('writing beta_3d to ', savefile) 
        ouf = open(savefile, 'wb')
        cPickle.dump(beta_3d, ouf)
        ouf.close()
        print ('done writing')
    
    return beta_3d                                                                                                                                                                                                                                                                                                                                                                                                                                                                            ._Kalman_filter_biascorrection2.py                                                                  000775  000765  000024  00000000414 14016263127 017475  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2                                              ATTR                                   com.apple.lastuseddate#PS           com.macromates.selectionRange      	     com.macromates.visibleIndex  `    SK    58:38523                                                                                                                                                                                                                                                    Kalman_filter_biascorrection2.py                                                                    000775  000765  000024  00000010321 14016263127 017256  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         
def Kalman_filter_biascorrection2(npts, nlats, nlons, \
    forecast_3d, analyses_3d, beta_3d, \
    date_list_forecast, R, Bbeta,  \
    savefile, already_decay):
    
    """ apply decaying average Kalman filter bias correction 
        to forecasts.  Note the mix of some arrays shapes; 
        Kalman gain is shaped (nlats*nlons, nlats*nlons)
    """
    import numpy as np
    from datetime import datetime
    import sys
    import _pickle as cPickle
    from reformat_gain_to_4d_f90 import reformat_gain_to_4d_f90
    
    # -------------------------------------------------------------
    
    def reformat_gain_to_4d(nlats, nlons, gain_2D):
    
        # ---- reform 2D Kalman gain into 4D-array.

        now = datetime.now()
        current_time = now.strftime("%H:%M:%S")
        gain_4D = np.zeros((nlats, nlons, nlats, nlons), dtype=np.float64)
        ktr1 = 0
        for i1 in range(nlons):
            now = datetime.now()
            current_time = now.strftime("%H:%M:%S")
            for j1 in range(nlats):
                ktr2 = 0
                for i2 in range(nlons):
                    for j2 in range(nlats):
                        gain_4D[j1,i1,j2,i2] = gain_2D[ktr1,ktr2]
                        ktr2 = ktr2 + 1
                ktr1 = ktr1 + 1
        return gain_4D
    
    # -------------------------------------------------------------
    
    if already_decay == True:
        print ('   reading beta_3d from ', savefile) 
        inf = open(savefile, 'rb')
        beta_3d = cPickle.load(inf)
        inf.close()
        print ('   done reading')
    else:  
        # ---- define the Kalman gain per eq. (39) in Dick Dee
        #      Bias and Data Assimilation article, 
        #      https://rmets.onlinelibrary.wiley.com/doi/epdf/10.1256/qj.05.137
    
        now = datetime.now()
        current_time = now.strftime("%H:%M:%S")
        print ('   max, min Bbeta = ',np.max(Bbeta), np.min(Bbeta))
        Bbeta_plus_R = R + Bbeta
        Bbeta_plus_R_inv = \
            np.linalg.inv(Bbeta_plus_R)
        Kalman_gain_beta = np.matmul(Bbeta, Bbeta_plus_R_inv)
        print ('   max, min Kalman_gain_beta  = ', \
            np.max(Kalman_gain_beta), np.min(Kalman_gain_beta))
        now = datetime.now()
        current_time = now.strftime("%H:%M:%S")
        Kalman_gain_beta_4d = reformat_gain_to_4d_f90(\
            Kalman_gain_beta, nlats, nlons)        
    
        # ---- sequentially loop through dates during the sample, 
        #      updating the previous day's bias correction 
        #      to the new days fcst vs. obs discrepancy.
    
        ndates = int(len(date_list_forecast))
        obsinc_2d = np.zeros((nlats, nlons), dtype=np.float64)
        beta_2d = np.zeros((nlats, nlons), dtype=np.float64)
        for idate, date in enumerate(date_list_forecast[1:]):

            # ---- calculate the "observation" increment (term in parentheses
            #      in eq. 37 in Dee paper)
            
            if idate > 0: 
                obsinc_2d[:,:] = analyses_3d[idate,:,:] - \
                    (forecast_3d[idate,:,:] - beta_3d[idate-1,:,:])
            else:
                obsinc_2d[:,:] = analyses_3d[idate,:,:] - \
                    forecast_3d[idate,:,:]
        
            for i in range(nlons):
                for j in range(nlats):
                    ktr = nlats*j + i
                    
                    # ---- update the bias correction estimate, eq. 37 in Dee. 
                    
                    if idate > 0:
                        beta_3d[idate,j,i] = beta_3d[idate-1,j,i] - \
                        np.sum(Kalman_gain_beta_4d[j,i,:,:]*obsinc_2d[:,:]) 
        
                    else:
                        beta_3d[idate,j,i] = \
                            -np.sum(Kalman_gain_beta_4d[j,i,:,:]*\
                            obsinc_2d[:,:])
                                          
        now = datetime.now()
        current_time = now.strftime("%H:%M:%S")
        print ('   ending decay avg Kalman_filter_biascorrection.py.'+\
            '  Current time = ', current_time)
        print ('   writing beta_3d to ', savefile) 
        ouf = open(savefile, 'wb')
        cPickle.dump(beta_3d, ouf)
        ouf.close()
        print ('   done writing')
    
    return beta_3d                                                                                                                                                                                                                                                                                                               ._Kalman_filter_biascorrection_2018gain.py                                                          000775  000765  000024  00000000412 14016263127 020722  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2        
                                      ATTR      
                             com.apple.lastuseddate#PS           com.macromates.selectionRange      	     com.macromates.visibleIndex  `        10:150                                                                                                                                                                                                                                                      Kalman_filter_biascorrection_2018gain.py                                                            000775  000765  000024  00000003515 14016263127 020514  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         
def Kalman_filter_biascorrection_2018gain(npts, nlats, nlons, \
    forecast_3d, analyses_3d, beta_3d, \
    date_list_forecast, Kalman_gain_beta_4D):
    
    """ apply Kalman filter bias correction to forecasts.  
    """
    import numpy as np
    from datetime import datetime
    import sys
         
    # ---- sequentially loop through dates during the sample, updating
    #      the previous day's bias correction to the new days fcst vs. obs
    #      discrepancy.
    
    ndates = int(len(date_list_forecast))
    obsinc_2d = np.zeros((nlats, nlons), dtype=np.float64)
    beta_2d = np.zeros((nlats, nlons), dtype=np.float64)
    for idate, date in enumerate(date_list_forecast[1:]):

        # ---- calculate the "observation" increment (term in parentheses
        #      in eq. 37 in Dee paper)
            
        if idate > 0: 
            obsinc_2d[:,:] = analyses_3d[idate,:,:] - \
                (forecast_3d[idate,:,:] - beta_3d[idate-1,:,:])
        else:
            obsinc_2d[:,:] = analyses_3d[idate,:,:] - forecast_3d[idate,:,:]
        
        for i in range(nlons):
            for j in range(nlats):
                ktr = nlats*j + i
                    
                # ---- update the bias correction estimate, eq. 37 in Dee. 
                    
                if idate > 0:
                    beta_3d[idate,j,i] = beta_3d[idate-1,j,i] - \
                        np.sum(Kalman_gain_beta_4D[j,i,:,:]*obsinc_2d[:,:]) 
                    e = beta_3d[idate-1,j,i]
                else:
                    beta_3d[idate,j,i] = -np.sum(Kalman_gain_beta_4D[j,i,:,:]*obsinc_2d[:,:])
                    e = 0.0
                                          
    now = datetime.now()
    current_time = now.strftime("%H:%M:%S")
    #print ('ending Kalman_filter_biascorrection.py.  Current time = ', current_time)
    
    return beta_3d                                                                                                                                                                                   ._Kalman_filter_biascorrection_savegain.py                                                          000775  000765  000024  00000000415 14016263127 021271  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2        
                                      ATTR      
                             com.apple.lastuseddate#PS           com.macromates.selectionRange           com.macromates.visibleIndex  `    Y    2:5-2:420                                                                                                                                                                                                                                                   Kalman_filter_biascorrection_savegain.py                                                            000775  000765  000024  00000011162 14016263127 021055  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         
def Kalman_filter_biascorrection_savegain(npts, nlats, nlons, \
    forecast_3d, analyses_3d, beta_3d, \
    date_list_forecast, R, Bx, Bbeta, gainfile, already):
    
    """ apply Kalman filter bias correction to forecasts.  Note
        the mix of some arrays shapes; Kalman gain is shaped
        (nlats*nlons, nlats*nlons)
    """
    import numpy as np
    from datetime import datetime
    import sys
    from reformat_gain_to_4d_f90 import reformat_gain_to_4d_f90
    from update_beta import update_beta
    import _pickle as cPickle
    
    # -------------------------------------------------------------
    
    def reformat_gain_to_4d(nlats, nlons, gain_2D):
    
        # ---- reform 2D Kalman gain into 4D-array.

        now = datetime.now()
        current_time = now.strftime("%H:%M:%S")
        #print ('Reforming into 4D Kalman gain matrix. Current time = ', current_time)
        gain_4D = np.zeros((nlats, nlons, nlats, nlons), dtype=np.float64)
        ktr1 = 0
        for i1 in range(nlons):
            now = datetime.now()
            current_time = now.strftime("%H:%M:%S")
            #print ('processing i1 = ',i1,' of ',nlons,'. Current time = ',current_time)
            for j1 in range(nlats):
                ktr2 = 0
                for i2 in range(nlons):
                    for j2 in range(nlats):
                        gain_4D[j1,i1,j2,i2] = gain_2D[ktr1,ktr2]
                        ktr2 = ktr2 + 1
                ktr1 = ktr1 + 1
        return gain_4D
    
    # -------------------------------------------------------------
    
    
    # ---- define the Kalman gain per eq. (39) in Dick Dee
    #      Bias and Data Assimilation article, 
    #      https://rmets.onlinelibrary.wiley.com/doi/epdf/10.1256/qj.05.137
    
    print (update_beta.__doc__)
    if already == False:   
        now = datetime.now()
        current_time = now.strftime("%H:%M:%S")
        #print ('starting Kalman_filter_biascorrection.py.  Current time = ', current_time)
        Bbeta_plus_Bx_plus_R = R + Bx + Bbeta
        Bbeta_plus_Bx_plus_R_inv = np.linalg.inv(Bbeta_plus_Bx_plus_R)
        Kalman_gain_beta = np.matmul(Bbeta, Bbeta_plus_Bx_plus_R_inv)
        now = datetime.now()
        current_time = now.strftime("%H:%M:%S")
        Kalman_gain_beta_4d = reformat_gain_to_4d_f90(Kalman_gain_beta, nlats, nlons)  
    
        print ('writing Kalman gain to ', gainfile) 
        ouf = open(gainfile, 'wb')
        cPickle.dump(Kalman_gain_beta_4d, ouf)
        ouf.close()
        print ('done writing')
    else:
        print ('reading Kalman gain from ', gainfile) 
        inf = open(gainfile, 'rb')
        Kalman_gain_beta_4d = cPickle.load(inf)
        inf.close()
        print ('done reading')
         
    # ---- sequentially loop through dates during the sample, updating
    #      the previous day's bias correction to the new days fcst vs. obs
    #      discrepancy.
    
    ndates = int(len(date_list_forecast))
    obsinc_2d = np.zeros((nlats, nlons), dtype=np.float64)
    beta_2d = np.zeros((nlats, nlons), dtype=np.float64)
    for idate, date in enumerate(date_list_forecast[1:]):

        # ---- calculate the "observation" increment (term in parentheses
        #      in eq. 37 in Dee paper)
        
        now = datetime.now()
        current_time = now.strftime("%H:%M:%S")
        #print ('processing date, current time = ', date, current_time ) 
        if idate > 0: 
            obsinc_2d[:,:] = analyses_3d[idate,:,:] - \
                (forecast_3d[idate,:,:] - beta_3d[idate-1,:,:])
        else:
            obsinc_2d[:,:] = analyses_3d[idate,:,:] - forecast_3d[idate,:,:]
        
        for i in range(nlons):
            for j in range(nlats):
                ktr = nlats*j + i
                    
                # ---- update the bias correction estimate, eq. 37 in Dee. 
                    
                if idate > 0:
                    beta_3d[idate,j,i] = beta_3d[idate-1,j,i] - \
                        np.sum(Kalman_gain_beta_4d[j,i,:,:]*obsinc_2d[:,:]) 
                else:
                    beta_3d[idate,j,i] = -np.sum(Kalman_gain_beta_4d[j,i,:,:]* \
                        obsinc_2d[:,:])
                
        #print (update_beta.__doc__)
        #beta_3d = update_beta(Kalman_gain_beta_4d, obsinc_2d, \
        #    nlats, nlons, ndates,  idate+1)
        
        print ('idate, date, max, min beta_3d = ', idate, date, \
            np.max(beta_3d[idate,:,:]), np.min(beta_3d[idate,:,:]) )
                                          
    now = datetime.now()
    current_time = now.strftime("%H:%M:%S")
    #print ('ending Kalman_filter_biascorrection.py.  Current time = ', current_time)
    
    return beta_3d                                                                                                                                                                                                                                                                                                                                                                                                              ._Kalman_filter_biascorrection_savegain2.py                                                         000775  000765  000024  00000000412 14016263127 021350  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2        
                                      ATTR      
                             com.apple.lastuseddate#PS           com.macromates.selectionRange      	     com.macromates.visibleIndex  `    f    50:590                                                                                                                                                                                                                                                      Kalman_filter_biascorrection_savegain2.py                                                           000775  000765  000024  00000010565 14016263127 021145  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         
def Kalman_filter_biascorrection_savegain2(npts, nlats, nlons, \
    forecast_3d, analyses_3d, beta_3d, \
    date_list_forecast, R, Bbeta, b_beta_var, gainfile, already):
    
    """ apply Kalman filter-like bias correction to forecasts.  Note
        the mix of some arrays shapes; Kalman gain is shaped
        (nlats*nlons, nlats*nlons)
    """
    
    import numpy as np
    from datetime import datetime
    import sys
    from reformat_gain_to_4d_f90 import reformat_gain_to_4d_f90
    from update_beta import update_beta
    import _pickle as cPickle
    
    # -------------------------------------------------------------
    
    def reformat_gain_to_4d(nlats, nlons, gain_2D):
    
        # ---- reform 2D Kalman gain into 4D-array.

        now = datetime.now()
        current_time = now.strftime("%H:%M:%S")
        #print ('Reforming into 4D Kalman gain matrix. Current time = ', current_time)
        gain_4D = np.zeros((nlats, nlons, nlats, nlons), dtype=np.float64)
        ktr1 = 0
        for i1 in range(nlons):
            now = datetime.now()
            current_time = now.strftime("%H:%M:%S")
            #print ('processing i1 = ',i1,' of ',nlons,'. Current time = ',current_time)
            for j1 in range(nlats):
                ktr2 = 0
                for i2 in range(nlons):
                    for j2 in range(nlats):
                        gain_4D[j1,i1,j2,i2] = gain_2D[ktr1,ktr2]
                        ktr2 = ktr2 + 1
                ktr1 = ktr1 + 1
        return gain_4D
    
    # -------------------------------------------------------------
    
    #print (update_beta.__doc__)
    if already == False:   
        now = datetime.now()
        current_time = now.strftime("%H:%M:%S")
        Bbeta_plus_R = R + Bbeta*b_beta_var*0.01
        Bbeta_plus_R_inv = np.linalg.inv(Bbeta_plus_R)
        Kalman_gain_beta = np.matmul(Bbeta*b_beta_var*0.01, Bbeta_plus_R_inv)
        print ('   max, min Bbeta = ',np.max(Bbeta), np.min(Bbeta))
        print ('   max, min Kalman_gain_beta  = ', \
            np.max(Kalman_gain_beta), np.min(Kalman_gain_beta))
        now = datetime.now()
        current_time = now.strftime("%H:%M:%S")
        Kalman_gain_beta_4d = reformat_gain_to_4d_f90(Kalman_gain_beta, nlats, nlons)  
    
        print ('   writing Kalman gain to ', gainfile) 
        ouf = open(gainfile, 'wb')
        cPickle.dump(Kalman_gain_beta_4d, ouf)
        ouf.close()
        print ('   done writing')
    else:
        print ('   reading Kalman gain from ', gainfile) 
        inf = open(gainfile, 'rb')
        Kalman_gain_beta_4d = cPickle.load(inf)
        inf.close()
        print ('   done reading')
         
    # ---- sequentially loop through dates during the sample, updating
    #      the previous day's bias correction to the new days fcst vs. obs
    #      discrepancy.
    
    ndates = int(len(date_list_forecast))
    obsinc_2d = np.zeros((nlats, nlons), dtype=np.float64)
    beta_2d = np.zeros((nlats, nlons), dtype=np.float64)
    for idate, date in enumerate(date_list_forecast[1:]):

        # ---- calculate the "observation" increment (term in parentheses
        #      in eq. 37 in Dee paper)
        
        now = datetime.now()
        current_time = now.strftime("%H:%M:%S")
        #print ('processing date, current time = ', date, current_time ) 
        if idate > 0: 
            obsinc_2d[:,:] = analyses_3d[idate,:,:] - \
                (forecast_3d[idate,:,:] - beta_3d[idate-1,:,:])
        else:
            obsinc_2d[:,:] = analyses_3d[idate,:,:] - forecast_3d[idate,:,:]
        
        for i in range(nlons):
            for j in range(nlats):
                ktr = nlats*j + i
                    
                # ---- update the bias correction estimate 
                    
                if idate > 0:
                    beta_3d[idate,j,i] = beta_3d[idate-1,j,i] - \
                        np.sum(Kalman_gain_beta_4d[j,i,:,:]*obsinc_2d[:,:]) 
                else:
                    beta_3d[idate,j,i] = -np.sum(Kalman_gain_beta_4d[j,i,:,:]* \
                        obsinc_2d[:,:])
        
        print ('idate, date, max, min beta_3d = ', idate, date, \
            np.max(beta_3d[idate,:,:]), np.min(beta_3d[idate,:,:]) )
                                          
    now = datetime.now()
    current_time = now.strftime("%H:%M:%S")
    #print ('ending Kalman_filter_biascorrection.py.  Current time = ', current_time)
    
    return beta_3d                                                                                                                                           ._MOS_forecast_2019.py                                                                              000775  000765  000024  00000000414 14016263127 014554  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2                                              ATTR                                   com.apple.lastuseddate#PS           com.macromates.selectionRange      	     com.macromates.visibleIndex  `        80:13966                                                                                                                                                                                                                                                    MOS_forecast_2019.py                                                                                000775  000765  000024  00000006743 14016263127 014352  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         
def MOS_forecast_2019(npts, nlats, nlons, \
    forecast_3d, analyses_3d, beta_3d, lsmask, \
    date_list_forecast, clead, cpath_forecast, \
    cpath_era5):
    
    """ apply MOS forecast regression procedure in 2019. 
    """
    import numpy as np
    from datetime import datetime
    import sys
    import _pickle as cPickle
    from dateutils import daterange, dateshift, dayofyear, splitdate
    
    cmonths = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']
         
    # ---- read the climatology file.
    
    if clead == '12' or clead == '36' or clead == '60' or clead == '84' or clead == '108':
        infile = cpath_era5 + 'ERA5_temperature_climatology_12UTC.cPick'
    else:
        infile = cpath_era5 + 'ERA5_temperature_climatology_00UTC.cPick'
    inf = open(infile,'rb')
    climo_temps_estimated = cPickle.load(inf)
    inf.close()
         
    # ---- sequentially loop through dates during the sample, updating
    #      the previous day's bias correction to the new days fcst vs. obs
    #      discrepancy.
    
    fcollect = []
    acollect = []
    fcollect_feb = []
    acollect_feb = []
    first = True
    ndates = int(len(date_list_forecast))
    obsinc_2d = np.zeros((nlats, nlons), dtype=np.float64)
    #for idate, date in enumerate(date_list_forecast[1:]):
    for idate, date in enumerate(date_list_forecast):

        cdd = date[6:8]
        cmm = date[4:6]
        cyearf = date[0:4]
        if first == True or cdd == '01':
            
            cmonth = cmonths[int(cmm)-1]
            first = False
            
            # ---- read the MOS regression coefficient data
            
            infile = cpath_forecast+'MOS_slope_intercept_'+cmonth+\
                '_lead='+clead+'.cPick'
            inf = open(infile,'rb')
            slope = cPickle.load(inf)
            intercept = cPickle.load(inf)
            inf.close()
            
        # ---- determine the julian day

        imm = int(cmm)
        idd = int(cdd)
        iyear_full = int(cyearf)
        julday = dayofyear(iyear_full, imm, idd) - 1
        if julday > 364: julday = 364 
         
        #ftoday = forecast_3d[idate,:,:] - climo_temps_estimated[julday,:,:]
        #atoday = analyses_3d[idate,:,:] - climo_temps_estimated[julday,:,:]
        ftoday = forecast_3d[idate,:,:] 
        atoday = analyses_3d[idate,:,:] 
        regressed = slope[:,:]*ftoday[:,:] + intercept[:,:]
        beta_3d[idate,:,:] = lsmask[:,:]*(ftoday[:,:] - regressed[:,:])
        
        if cmonth == 'Jul' and clead == '24':
            fcollect.append(ftoday[20,40])
            acollect.append(atoday[20,40])
            #print ('f,o collection for idate, date = ',ftoday[20,40], atoday[20,40], idate,date)
        if cmonth == 'Feb' and clead == '24':
            fcollect_feb.append(ftoday[20,40])
            acollect_feb.append(atoday[20,40])
            print ('f,o collection for idate, date = ',ftoday[20,40], atoday[20,40], idate,date)
        
    now = datetime.now()
    current_time = now.strftime("%H:%M:%S")
    
    if clead == '24':
        outfile = 'boulder_july_data_2019_lead='+clead+'h.cPick'
        ouf = open(outfile,'wb')
        cPickle.dump(np.array(acollect), ouf)
        cPickle.dump(np.array(fcollect), ouf)
        ouf.close()
        
        outfile = 'boulder_feb_data_2019_lead='+clead+'h.cPick'
        ouf = open(outfile,'wb')
        cPickle.dump(np.array(acollect_feb), ouf)
        cPickle.dump(np.array(fcollect_feb), ouf)
        ouf.close()
    
    return beta_3d                             ._MOS_multiple_regr_2019.py                                                                         000775  000765  000024  00000000420 14016263127 015615  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2                                              ATTR                                   com.apple.lastuseddate#PS           com.macromates.selectionRange           com.macromates.visibleIndex  `        2:5-2:271487                                                                                                                                                                                                                                                MOS_multiple_regr_2019.py                                                                           000775  000765  000024  00000010630 14016263127 015404  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         
def MOS_multiple_regr_2019(npts, nlats, nlons, \
    analyses_3d, forecast_3d, beta_decay_3d, beta_qmap_3d, beta_3d, \
    lsmask, date_list_forecast, clead, cpath_forecast, \
    cpath_era5):
    
    """ apply MOS multiple regression procedure in 2019. 
    """
    import numpy as np
    from datetime import datetime
    import sys
    import _pickle as cPickle
    from dateutils import daterange, dateshift, dayofyear, splitdate
    
    cmonths = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']
    iskip = int(clead) // 24
    if clead == '12' or clead == '36' or clead == '60' or clead == '84' or clead == '108':
        iskip = iskip+1
         
    # ---- read the climatology file.
    
    if clead == '12' or clead == '36' or clead == '60' or clead == '84' or clead == '108':
        infile = cpath_era5 + 'ERA5_temperature_climatology_12UTC.cPick'
    else:
        infile = cpath_era5 + 'ERA5_temperature_climatology_00UTC.cPick'
    inf = open(infile,'rb')
    climo_temps_estimated = cPickle.load(inf)
    inf.close()
         
    # ---- sequentially loop through dates during the sample, updating
    #      the previous day's bias correction to the new days fcst vs. obs
    #      discrepancy.
    
    first = True
    ndates = int(len(date_list_forecast))
    obsinc_2d = np.zeros((nlats, nlons), dtype=np.float64)
    beta_today = np.zeros((nlats, nlons), dtype=np.float64)


    #for idate, date in enumerate(date_list_forecast[1:]):
    for idate, date in enumerate(date_list_forecast):

        cdd = date[6:8]
        cmm = date[4:6]
        cyearf = date[0:4]
        if first == True or cdd == '01':
            
            cmonth = cmonths[int(cmm)-1]
            first = False
            
            # ---- read the MOS regression coefficient data
            
            infile = cpath_forecast+'MOS_multiple_regr_'+cmonth+\
                '_lead='+clead+'.cPick'
            print (idate, date, infile)
            inf = open(infile,'rb')
            regr_coefs = cPickle.load(inf)
            inf.close()
            intercept = regr_coefs[0,:,:]
            slope = regr_coefs[1,:,:]
            beta_slope = regr_coefs[2,:,:]
            qmap_slope = regr_coefs[3,:,:]
            #qm_beta_interact = regr_coefs[4,:,:]
            #f_beta_interact = regr_coefs[5,:,:]
            #print ('date, int sl beta qmap = ', \
            #    idate, date, intercept[nlats//2, nlons//2], \
            #    slope[nlats//2, nlons//2], \
            #    beta_slope[nlats//2, nlons//2],\
            #    qmap_slope[nlats//2, nlons//2])
            
        # ---- determine the julian day

        imm = int(cmm)
        idd = int(cdd)
        iyear_full = int(cyearf)
        julday = dayofyear(iyear_full, imm, idd) - 1
        if julday > 364: julday = 364 
         
        #ftoday = forecast_3d[idate,:,:] - climo_temps_estimated[julday,:,:]
        #atoday = analyses_3d[idate,:,:] - climo_temps_estimated[julday,:,:]
        ftoday = forecast_3d[idate,:,:] 
        atoday = analyses_3d[idate,:,:] 
        
        if idate - iskip >= 0:
            beta_today = beta_decay_3d[idate-iskip,:,:]
        else:
            beta_today[:,:] = 0.0
        qmap_today = beta_qmap_3d[idate,:,:]

        idxmax = np.argmax(qmap_today*qmap_slope)
        idxmin = np.argmin(qmap_today*qmap_slope)
        regressed = intercept[:,:] + slope[:,:]*ftoday[:,:] + \
            beta_slope[:,:]*beta_today[:,:] + \
            qmap_slope[:,:]*qmap_today[:,:] 
        beta_3d[idate,:,:] = lsmask[:,:]*(ftoday[:,:]-regressed[:,:])
        
        
        
        #print (idate, ftoday[nlats//2, nlons//2], qmap_today[nlats//2, nlons//2], \
        #    beta_today[nlats//2, nlons//2], regressed[nlats//2, nlons//2])
            
        #diff = analyses_3d[idate,59,72] - \
        #    (forecast_3d[idate,59,72] - beta_3d[idate,59,72])
        #if np.abs(diff) > 10.0:
        #    print (idate,date,analyses_3d[idate,59,72], forecast_3d[idate,59,72], beta_3d[idate,59,72] )
        #    print ('idate, fprime, f, cl = ', idate, ftoday[59,72], forecast_3d[idate,59,72], \
        #        climo_temps_estimated[julday,59,72])
        #    print ('    beta_decay, qmap_today = ', beta_today[59,72], qmap_today[59,72])
        #    print ('    i,s, bs, qs = ', intercept[59,72], slope[59,72], beta_slope[59,72], qmap_slope[59,72])
        
    now = datetime.now()
    current_time = now.strftime("%H:%M:%S")
    
    
 
        
    return beta_3d                                                                                                        ._MOS_multiple_regr_soilw_2019.py                                                                   000775  000765  000024  00000000412 14016263127 017033  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2        
                                      ATTR      
                             com.apple.lastuseddate#PS           com.macromates.selectionRange      	     com.macromates.visibleIndex  `        54:470                                                                                                                                                                                                                                                      MOS_multiple_regr_soilw_2019.py                                                                     000775  000765  000024  00000005625 14016263127 016631  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         def MOS_multiple_regr_soilw_2019(npts, nlats, nlons, \
    analyses_3d, forecast_3d, forecast_3d_soilw, forecast_3d_cloud, \
    beta_3d, lsmask, date_list_forecast, clead, cpath_forecast):
    
    """ apply MOS multiple regression procedure in 2019. 
    """
    import numpy as np
    from datetime import datetime
    import sys
    import _pickle as cPickle
    from dateutils import daterange, dateshift, dayofyear, splitdate
    
    cmonths = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']
    iskip = int(clead) // 24
    if clead == '12' or clead == '36' or clead == '60' or clead == '84' or clead == '108':
        iskip = iskip+1
         

    # ---- sequentially loop through dates during the sample, updating
    #      the previous day's bias correction to the new days fcst vs. obs
    #      discrepancy.
    
    first = True
    ndates = int(len(date_list_forecast))
    obsinc_2d = np.zeros((nlats, nlons), dtype=np.float64)
    beta_today = np.zeros((nlats, nlons), dtype=np.float64)


    #for idate, date in enumerate(date_list_forecast[1:]):
    for idate, date in enumerate(date_list_forecast):

        cdd = date[6:8]
        cmm = date[4:6]
        cyearf = date[0:4]
        if first == True or cdd == '01':
            
            cmonth = cmonths[int(cmm)-1]
            first = False
            
            # ---- read the MOS regression coefficient data
            
            infile = cpath_forecast+'MOS_soil_cloud_'+cmonth+\
                '_lead='+clead+'.cPick'
            print (idate, date, infile)
            inf = open(infile,'rb')
            regr_coefs = cPickle.load(inf)
            inf.close()
            intercept = regr_coefs[0,:,:]
            slope = regr_coefs[1,:,:]
            soilw_slope = regr_coefs[2,:,:]
            cloud_slope = regr_coefs[3,:,:]
            soil_interact = regr_coefs[4,:,:]
            cloud_interact = regr_coefs[5,:,:]   
            cloud_soil_interact = regr_coefs[6,:,:]  
            
        # ---- determine the julian day

        imm = int(cmm)
        idd = int(cdd)
        iyear_full = int(cyearf)
        julday = dayofyear(iyear_full, imm, idd) - 1
        if julday > 364: julday = 364 
         
        ftoday = forecast_3d[idate,:,:] 
        ftoday_soilw = forecast_3d_soilw[idate,:,:] 
        ftoday_cloud = forecast_3d_cloud[idate,:,:] 
        atoday = analyses_3d[idate,:,:] 
    
        regressed = intercept[:,:] + slope[:,:]*ftoday[:,:] + \
            soilw_slope[:,:]*ftoday_soilw[:,:] + \
            cloud_slope[:,:]*ftoday_cloud[:,:] + \
            soil_interact[:,:]*ftoday[:,:]*ftoday_soilw[:,:] + \
            cloud_interact[:,:]*ftoday[:,:]*ftoday_cloud[:,:] + \
            cloud_soil_interact[:,:]*ftoday_soilw[:,:]*ftoday_cloud[:,:] 
        beta_3d[idate,:,:] = lsmask[:,:]*(ftoday[:,:]-regressed[:,:])
        
    now = datetime.now()
    current_time = now.strftime("%H:%M:%S")
    
    return beta_3d                                                                                                           ._analog_forecast_2019.py                                                                           000775  000765  000024  00000000414 14016263133 015354  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2                                              ATTR                                   com.apple.lastuseddate#PS           com.macromates.selectionRange      	     com.macromates.visibleIndex  `    EQ)    75:51877                                                                                                                                                                                                                                                    analog_forecast_2019.py                                                                             000775  000765  000024  00000007011 14016263133 015137  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         
def analog_forecast_2019(npts, nlats, nlons, \
    forecast_3d, analyses_3d, beta_3d, lsmask, \
    date_list_forecast, clead, cpath_forecast, \
    cpath_era5):
    
    """ apply MOS forecast regression procedure in 2019. 
    """
    import numpy as np
    from datetime import datetime
    import sys
    import _pickle as cPickle
    from dateutils import daterange, dateshift, dayofyear, splitdate
    
    
    cmonths = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']
         
    # ---- read the climatology file.
    
    if clead == '12' or clead == '36' or clead == '60' or clead == '84' or clead == '108':
        infile = cpath_era5 + 'ERA5_temperature_climatology_12UTC.cPick'
    else:
        infile = cpath_era5 + 'ERA5_temperature_climatology_00UTC.cPick'
    inf = open(infile,'rb')
    climo_temps_estimated = cPickle.load(inf)
    inf.close()
         
    # ---- sequentially loop through dates during the sample, updating
    #      the previous day's bias correction to the new days fcst vs. obs
    #      discrepancy.
    
    fcollect = []
    acollect = []
    first = True
    ndates = int(len(date_list_forecast))
    obsinc_2d = np.zeros((nlats, nlons), dtype=np.float64)
    #for idate, date in enumerate(date_list_forecast[1:]):
    for idate, date in enumerate(date_list_forecast):

        cdd = date[6:8]
        cmm = date[4:6]
        cyearf = date[0:4]
        if first == True or cdd == '01':
            
            cmonth = cmonths[int(cmm)-1]
            first = False
            
            # ---- read the sorted forecasts and associated analyzed from file
            
            infile = cpath_forecast+'forecast_analyzed_sorted_'+cmonth+\
                '_lead='+clead+'.cPick'
            inf = open(infile,'rb')
            forecast_validdates_sorted = cPickle.load(inf)
            analysis_validdates_sorted = cPickle.load(inf)
            inf.close()
            nasamps, nalats, nalons = np.shape(forecast_validdates_sorted)
            
        # ---- find the closest forecast in the sorted data to today's forecast.
        #      form mean forecast and analyzed from nearest 25 samples.   Bias
        #      correction is then the mean(F) - mean(A)
        
        
        for jy in range(nlats):
            for ix in range(nlons):
        #for jy in range(nlats//2,nlats//2+1):
        #    for ix in range(nlons//2, nlons//2+1):
                if lsmask[jy,ix] == 1:
                    f = forecast_validdates_sorted[:,jy,ix]
                    a = analysis_validdates_sorted[:,jy,ix]
                    #print ('todays forecast = ', forecast_3d[idate,jy,ix])
                    #print ('f[0:-1:10] = ', f[0:-1:10])
                    #print ('a[0:-1:10] = ', a[0:-1:10])
                    idx = np.argmin(np.abs(f-forecast_3d[idate,jy,ix]))
                    idxmin = np.max([idx-21,0])
                    idxmax = np.min([nasamps,idx+21])
                    #print ('idx, idxmin, idxmax = ', idx, idxmin, idxmax)
                    #print ('f[idxmin:idxmax] = ', f[idxmin:idxmax])
                    #print ('a[idxmin:idxmax] = ', a[idxmin:idxmax])
                    #print ('fmean, amean = ', np.mean(f[idxmin:idxmax]),  np.mean(a[idxmin:idxmax]))
                    beta_3d[idate,jy,ix] = np.mean(f[idxmin:idxmax]) - \
                        np.mean(a[idxmin:idxmax])
                    #print ('beta_3d = ', beta_3d[idate,jy,ix])
                    #sys.exit()
                
    now = datetime.now()
    current_time = now.strftime("%H:%M:%S")
    
    return beta_3d                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       ._analyzed climatological_probability.py                                                            000664  000765  000024  00000000412 14031616205 020717  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2        
                                      ATTR      
                             com.apple.lastuseddate#PS           com.macromates.selectionRange      	     com.macromates.visibleIndex  `    *    13:320                                                                                                                                                                                                                                                      analyzed climatological_probability.py                                                              000664  000765  000024  00000014030 14031616205 020503  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         """
analyzed_climatological_probability.py cmonth clead ctype

"""

import os, sys
from datetime import datetime
import numpy as np
import numpy.ma as ma
import _pickle as cPickle
from netCDF4 import Dataset
import matplotlib.pyplot as plt
from matplotlib import rcParams
from mpl_toolkits.basemap import Basemap, interp
from mpl_toolkits.axes_grid1 import make_axes_locatable
import scipy.stats as stats
rcParams['legend.fontsize']='medium'

# =====================================================================
    
    
# ---- inputs from command line

cmonth = sys.argv[1] # '01', '02' etc.
cend_hour = sys.argv[2] # 06, 12, 18, 00 -- end hour of 6-h period
ctype = sys.argv[3] # thinned, upscaled
imonth = int(cmonth) - 1
nstride = 1

# ---- read in the forecast precipitation thresholds

forecast_directory = '/Volumes/NBM/conus_gefsv12/'+ctype+'/'
infile = forecast_directory + cmonth + cyyyy + \
     '_lead018_probabilities_'+ctype+'.nc'
print ('reading probability thresholds from ', infile)
nc = Dataset(infile)
thresholds = nc.variables['thresholdv'][:]
print ('thresholds = ', thresholds)
nthresh = len(thresholds)
nc.close()

# ---- set parameters

pflag = False # for print statements
master_directory = '/Volumes/NBM/conus_panal/'
ndaysomo = [31, 28, 31,  30, 31, 30,  31, 31, 30,  31, 30, 31]
ndaysomo_leap = [31, 28, 31,  30, 31, 30,  31, 31, 30,  31, 30, 31]
cmonths = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']
cmonths_early = ['12','01','02','03','04','05','06','07','08','09','10','11']
cmonths_late =  ['02','03','04','05','06','07','08','09','10','11','12','01']

# ---- determine the overall number of daily precipitation 
#      samples across all years for this month

iearly = int(cmonths_early[imonth])-1
ilate = int(cmonths_late[imonth])-1

if imonth != 1:  # not Feb
    nsamps_mid = ndaysomo[imonth]*18
else:
    nsamps_mid = 4*ndaysomo_leap[imonth] + 14*ndaysomo[imonth]
    
if iearly != 1:  # not Feb    
    nsamps_early = ndaysomo[iearly]*20
else:
    nsamps_early = 4*ndaysomo_leap[iearly] + 14*ndaysomo[iearly]
if ilate != 1:  # not Feb    
    nsamps_late = ndaysomo[ilate]*20
else:
    nsamps_late = 4*ndaysomo_leap[ilate] + 14*ndaysomo[ilate]
nsamps = nsamps_mid + nsamps_early + nsamps_late

# ---- read in the previously generated netCDF file with precipitation
#      for this month and lead time as well as the surrounding
#      two months.  All dates for this month have
#      been smushed into one leading index, dimension nsamps,
#      since the date of the forecast within the month and 
#      the member number is irrelevant for the distribution 
#      fitting.

ktr = 0
for iyear in range(2002,2020):
    print (iyear)
    for cmo in [cmonth, cmonths_early[imonth], cmonths_late[imonth]]:
        imo = int(cmo)-1
        if iyear%4 == 0:
            ndays = ndaysomo_leap[imo]
        else:
            ndays = ndaysomo[imo]
        cyear = str(iyear)    
        infile = master_directory + cyear + cmo + \
            '_ccpa_on_ndfd_grid_6hourly_'+ctype+'.nc'        
        
        nc = Dataset(infile)
        for iday in range(1,ndays+1):
            precip_in = np.squeeze(nc.variables['apcp_anal'][idx,:,:])
            if iyear == 2002 and iday == 1 and cmo == cmonth:
                nyin, nxin = np.shape(precip_in)
                precip_tseries = np.zeros((nsamps,nyin,nxin), \
                    dtype=np.float64)
                missingv = -99.99*np.ones((nyin, nxin), dtype=np.float64)
                lons = nc.variables['lons'][:,:]
                lats = nc.variables['lats'][:,:]
            precip_in = np.where(precip_in < 500., precip_in, missingv)
            precip_tseries[ktr,:,:] = precip_in[:,:]
            ktr = ktr+1
        nc.close()


# --- set up for making plots

colorst = ['White','#E4FFFF','#C4E8FF','#8FB3FF','#D8F9D8',\
    '#A6ECA6','#42F742','Yellow','Gold','Orange','#FCD5D9','#F6A3AE',\
    '#FA5257','Orchid','#AD8ADB','#A449FF','LightGray']
clevs = [0,0.05,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,0.95,1.0]
m = Basemap(llcrnrlon=233.7234,llcrnrlat=19.229,\
    urcrnrlon = 300.95782, urcrnrlat = 54.37,\
    projection='lcc',lat_1=25.,lat_2=25.,lon_0=265.,\
    resolution ='l',area_thresh=1000.)
x, y = m(lons, lats)

# --- loop over thresholds, calculate probability, and plot

bine = np.zeros((nsamps,nyin,nxin), dtype=np.int32)
climatological_probability = np.zeros((nthresh, nyin, nxin), dtype=np.float64)

for ithresh, thresh in enumerate(thresholds):
    
    # --- calculate probability
    
    bine[:,:] = 0.0
    a = np.where(precip_tseries >= thresh)
    if a[0] != -1:
        bine[a] = 1
    climatological_probability[ithresh,:,:] = \
        np.sum(bine, axis=0) / np.float(nsamps)
    
    # --- make plots of probability.

    fig = plt.figure(figsize=(8.,6.5))
    axloc = [0.02,0.1,0.96,0.81]
    ax1 = fig.add_axes(axloc)
    title = cmonths[imonth]+' CCPA/MSWEP probability of exceeding '+\
        str(thresh)+' mm, 6 hour period ending'+cend_hour+' UTC'
    ax1.set_title(title, fontsize=13,color='Black')
    CS2 = m.contourf(x, y, climatological_probability[ithresh,:,:], clevs,\
        cmap=None, colors=colorst, extend='both')
    
    m.drawcoastlines(linewidth=0.8,color='Gray')
    m.drawcountries(linewidth=0.8,color='Gray')
    m.drawstates(linewidth=0.8,color='Gray')
    
    # ---- use axes_grid toolkit to make colorbar axes.

    cax = fig.add_axes([0.06,0.07,0.88,0.02])
    cb = plt.colorbar(CS2,orientation='horizontal',cax=cax,\
        drawedges=True,ticks=clevs,format='%g')
    cb.ax.tick_params(labelsize=7)
    cb.set_label('Probability',fontsize=9)

    # ---- set plot title

    plot_title = 'ccpa_probability_'+str(thresh)+\
        'mm_'+cend_hour+'UTC.png'
    fig.savefig(plot_title, dpi=300)
    print ('saving plot to file = ',plot_title)

    
# ---- save array to cPickle file
    
outfile = master_directory + cyear + cmo + \
    '_ccpa_on_ndfd_grid_6hourly_climo_probability_'+ctype+'.nc'
print ('writing to ', outfile) 
ouf = open(outfile, 'wb')
cPickle.dump(climatological_probability, ouf) 
cPickle.dump(lons, ouf)
cPickle.dump(lats, ouf)
ouf.close()

print ('Done!')                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        ._analyzed_climatological_probability.py                                                            000664  000765  000024  00000000471 14032703332 021022  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2       9                                      ATTR      9     !                      com.apple.lastuseddate#PS      (     com.macromates.bookmarks   0     com.macromates.selectionRange      5     com.macromates.visibleIndex  `    *    ( '87' )83:621451                                                                                                                                                                                                       analyzed_climatological_probability.py                                                              000664  000765  000024  00000015215 14032703332 020607  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         """
analyzed_climatological_probability.py cmonth clead ctype

"""

import os, sys
from datetime import datetime
import numpy as np
import numpy.ma as ma
import _pickle as cPickle
from netCDF4 import Dataset
import matplotlib.pyplot as plt
from matplotlib import rcParams
from mpl_toolkits.basemap import Basemap, interp
from mpl_toolkits.axes_grid1 import make_axes_locatable
import scipy.stats as stats
rcParams['legend.fontsize']='medium'

# =====================================================================
    
    
# ---- inputs from command line

cmonth = sys.argv[1] # '01', '02' etc.
cend_hour = sys.argv[2] # 06, 12, 18, 00 -- end hour of 6-h period
ctype = sys.argv[3] # thinned, upscaled
imonth = int(cmonth) - 1
nstride = 1
plotit = False

# ---- set parameters

pflag = False # for print statements
master_directory = '/Volumes/NBM/conus_panal/'
ndaysomo = [31, 28, 31,  30, 31, 30,  31, 31, 30,  31, 30, 31]
ndaysomo_leap = [31, 28, 31,  30, 31, 30,  31, 31, 30,  31, 30, 31]
cmonths = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']
cmonths_early = ['12','01','02','03','04','05','06','07','08','09','10','11']
cmonths_late =  ['02','03','04','05','06','07','08','09','10','11','12','01']
thresholds = [0.254, 1.0, 5.0, 10.0, 25.0]
nthresh = len(thresholds)

# ---- determine the overall number of daily precipitation 
#      samples across all years for this month

iearly = int(cmonths_early[imonth])-1
ilate = int(cmonths_late[imonth])-1

if imonth != 1:  # not Feb
    nsamps_mid = ndaysomo[imonth]*18
else:
    nsamps_mid = 4*ndaysomo_leap[imonth] + 14*ndaysomo[imonth]
    
if iearly != 1:  # not Feb    
    nsamps_early = ndaysomo[iearly]*20
else:
    nsamps_early = 4*ndaysomo_leap[iearly] + 14*ndaysomo[iearly]
if ilate != 1:  # not Feb    
    nsamps_late = ndaysomo[ilate]*20
else:
    nsamps_late = 4*ndaysomo_leap[ilate] + 14*ndaysomo[ilate]
nsamps = nsamps_mid + nsamps_early + nsamps_late

# ---- read in the previously generated netCDF file with precipitation
#      for this month and lead time as well as the surrounding
#      two months.  All dates for this month have
#      been smushed into one leading index, dimension nsamps,
#      since the date of the forecast within the month and 
#      the member number is irrelevant for the distribution 
#      fitting.

ktr = 0
for iyear in range(2002,2020):
    #print (iyear)
    for cmo in [cmonth, cmonths_early[imonth], cmonths_late[imonth]]:
        imo = int(cmo)-1
        if iyear%4 == 0:
            ndays = ndaysomo_leap[imo]
        else:
            ndays = ndaysomo[imo]
        cyear = str(iyear)    
        infile = master_directory + cyear + cmo + \
            '_ccpa_on_ndfd_grid_6hourly_'+ctype+'.nc'        
        print (infile)
        nc = Dataset(infile)
        
        for iday in range(1,ndays+1):
            precip_in = np.squeeze(nc.variables['apcp_anal'][iday-1,:,:])
            if iyear == 2002 and iday == 1 and cmo == cmonth:
                nyin, nxin = np.shape(precip_in)
                print ('nyin, nxin = ', nyin, nxin)
                precip_tseries = np.zeros((nsamps,nyin,nxin), \
                    dtype=np.float64)
                missingv = -99.99*np.ones((nyin, nxin), dtype=np.float64)
                lons = nc.variables['lons'][:,:]
                lats = nc.variables['lats'][:,:]
                ones = np.ones((nyin, nxin), dtype=np.float64)
                zeros = np.zeros((nyin, nxin), dtype=np.float64)
            precip_in = np.where(precip_in < 500., precip_in, missingv)
            precip_tseries[ktr,:,:] = precip_in[:,:]
            ktr = ktr+1
        nc.close()


#print ('max, min precip_tseries = ', np.max(precip_tseries), np.min(precip_tseries))
#print ('precip_tseries[0:-1:20,nyin//2, nxin//2] = ', precip_tseries[0:-1:20,nyin//2, nxin//2])

# --- set up for making plots

colorst = ['White','#E4FFFF','#C4E8FF','#8FB3FF','#D8F9D8',\
    '#A6ECA6','#42F742','Yellow','Gold','Orange','#FCD5D9','#F6A3AE',\
    '#FA5257','Orchid','#AD8ADB','#A449FF','LightGray']
clevs = [0,0.01, 0.03, 0.05,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8 ]
m = Basemap(llcrnrlon=233.7234,llcrnrlat=19.229,\
    urcrnrlon = 300.95782, urcrnrlat = 54.37,\
    projection='lcc',lat_1=25.,lat_2=25.,lon_0=265.,\
    resolution ='l',area_thresh=1000.)
x, y = m(lons, lats)

# --- loop over thresholds, calculate probability, and plot

bine_numerator = np.zeros((nsamps,nyin,nxin), dtype=np.int32)
bine_denominator = np.zeros((nsamps,nyin,nxin), dtype=np.int32)
climatological_probability = np.zeros((nthresh, nyin, nxin), dtype=np.float64)

#print (np.shape(precip_tseries))
for ithresh, thresh in enumerate(thresholds):
    
    # --- calculate probability
    
    bine_numerator[:,:] = 0
    bine_denominator[:,:] = 0
    a = np.where(precip_tseries >= thresh)
    apos = np.where(precip_tseries >= 0.0)
    bine_numerator[a] = 1
    bine_denominator[apos] = 1
    bine_numerator_count = np.sum(bine_numerator, axis=0)
    bine_denominator_count = np.sum(bine_denominator, axis=0)
    climatological_probability[ithresh,:,:] = \
        np.where(bine_denominator_count > 0,
        bine_numerator_count.astype(float) / bine_denominator_count.astype(float),\
        -99.99*ones)
    
    
    if plotit == True:
        
        # --- make plots of probability.

        fig = plt.figure(figsize=(8.,6.5))
        axloc = [0.02,0.1,0.96,0.81]
        ax1 = fig.add_axes(axloc)
        title = cmonths[imonth]+' CCPA/MSWEP probability of exceeding '+\
            str(thresh)+' mm, 6 hour period ending '+cend_hour+' UTC'
        ax1.set_title(title, fontsize=13,color='Black')
        CS2 = m.contourf(x, y, climatological_probability[ithresh,:,:], clevs,\
            cmap=None, colors=colorst, extend='both')
    
        m.drawcoastlines(linewidth=0.8,color='Gray')
        m.drawcountries(linewidth=0.8,color='Gray')
        m.drawstates(linewidth=0.8,color='Gray')
    
        # ---- use axes_grid toolkit to make colorbar axes.

        cax = fig.add_axes([0.06,0.07,0.88,0.02])
        cb = plt.colorbar(CS2,orientation='horizontal',cax=cax,\
            drawedges=True,ticks=clevs,format='%g')
        cb.ax.tick_params(labelsize=7)
        cb.set_label('Probability',fontsize=9)

        # ---- set plot title

        plot_title = 'ccpa_probability_'+str(thresh)+\
            'mm_'+cend_hour+'UTC.png'
        fig.savefig(plot_title, dpi=300)
        print ('saving plot to file = ',plot_title)

    
# ---- save array to cPickle file
    
outfile = master_directory + cmo + \
    '_ccpa_on_ndfd_grid_6hourly_climo_probability_'+ctype+'.cPick'
print ('writing to ', outfile) 
ouf = open(outfile, 'wb')
cPickle.dump(climatological_probability, ouf) 
cPickle.dump(lons, ouf)
cPickle.dump(lats, ouf)
ouf.close()

print ('Done!')                                                                                                                                                                                                                                                                                                                                                                                   ._avg_wordlength.py                                                                                 000775  000765  000024  00000000260 14016263133 014563  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2   ~                                            ATTR                                    com.apple.lastuseddate#PS    `    d+                                                                                                                                                                                                                                                                                                                                                    avg_wordlength.py                                                                                   000775  000765  000024  00000001301 14016263133 014343  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         #  avg_word_length
#    given a sentence, calculate the average word length

import string  # include string library for the split function.

def main():
    print "Given input sentence, determine avg. word length"
    lettersum = 0.
    wordcount = 0.
    textin = raw_input("Enter sentence : ")
    for word in string.split(textin):
        numltrs = len(word)
        if (word[-1] == "." or
        word[-1] == ";" or
        word[-1] == ":" or
        word[-1] == "!" or
        word[-1] == "?" or
        word[-1] == ",") :
            numltrs = numltrs-1
        lettersum = lettersum + numltrs
        wordcount = wordcount + 1.0
    print "The average word length is ",lettersum/wordcount
main()

                                                                                                                                                                                                                                                                                                                               ._begin_end_stream_soilq.py                                                                         000775  000765  000024  00000000420 14016263133 016243  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2                                              ATTR                                   com.apple.lastuseddate#PS           com.macromates.selectionRange           com.macromates.visibleIndex  `    ,    21-25:211281                                                                                                                                                                                                                                                begin_end_stream_soilq.py                                                                           000775  000765  000024  00000005565 14016263133 016045  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         ### begin_end_stream_soilq.py ###

from netCDF4 import Dataset
import numpy as np
from dateutils import daterange
import sys
import os
import os.path
from os import path
import numpy.ma as ma
import _pickle as cPickle

def find_nearest(vec, value):
    idx = np.abs(vec-value).argmin()
    return idx

# ---- commmand line inputs 

cstreams = ['1999','2003','2007','2011','2015']

clonlow = sys.argv[1]
clatlow = sys.argv[2]
clonhi = sys.argv[3]
clathi = sys.argv[4]
ctitle = sys.argv[5]
rlonlow = float(clonlow)
rlatlow = float(clatlow)
rlonhi = float(clonhi)
rlathi = float(clathi)


# --- read in sample lat/lon indices

infile = '/Users/Tom/python/gefsv12/1999/bfg_2003123100_fhr00_control2.nc4'
nc = Dataset(infile)
lon = nc.variables['lon'][:]
lat = nc.variables['lat'][:]
nlons = len(lon)
nlats = len(lat)
nc.close()

# ---- determine the nearest grid index for box boundaries

imin = find_nearest(lon, rlonlow)
jmin = find_nearest(lat, rlatlow)
imax = find_nearest(lon, rlonhi)
jmax = find_nearest(lat, rlathi)
nj = jmin - jmax 
ni = imax - imin 

# ---- loop thru streams

sw1_save = np.zeros((nj,ni,4), dtype=np.float32)
sw2_save = np.zeros((nj,ni,4), dtype=np.float32)
sw3_save = np.zeros((nj,ni,4), dtype=np.float32)

for istream, cstream in enumerate(cstreams[0:-1]):

    # ---- determine the dates

    cstream2 = cstreams[istream+1]
    print ('istream, cstream, cstream2 = ', istream, cstream, cstream2 )
    if cstream == '1999':
        date1 = '2003123100'
        date2 = '2004010100'
        date3 = '2004010200'
    elif cstream == '2003':
        date1 = '2007123100'
        date2 = '2008010100'
        date3 = '2008010200'
    elif cstream == '2007':
        date1 = '2011123100'
        date2 = '2012010100'
        date3 = '2012010200'
    elif cstream == '2011':
        date1 = '2015123100'
        date2 = '2016010100'
        date3 = '2016010200'
    
    infile1 = '/Users/Tom/python/gefsv12/'+cstream+'/bfg_'+date1+'_fhr00_control2.nc4'
    infile2 = '/Users/Tom/python/gefsv12/'+cstream2+'/bfg_'+date2+'_fhr00_control2.nc4'
    infile3 = '/Users/Tom/python/gefsv12/'+cstream2+'/bfg_'+date3+'_fhr00_control2.nc4'
   
    nc = Dataset(infile1)
    print (infile1)
    sw1 = nc.variables['soilw10_40cmdow'][0,jmax:jmin,imin:imax]
    ls = nc.variables['landsfc'][0,:,:]
    nc.close()

    nc = Dataset(infile2)
    print (infile2)
    sw2 = nc.variables['soilw10_40cmdow'][0,jmax:jmin,imin:imax]
    nc.close()
    
    nc = Dataset(infile3)
    print (infile3)
    sw3 = nc.variables['soilw10_40cmdow'][0,jmax:jmin,imin:imax]
    nc.close() 
    
    sw1_save[:,:,istream] = sw1[:,:] 
    sw2_save[:,:,istream] = sw2[:,:] 
    sw3_save[:,:,istream] = sw3[:,:]     

# ---- save to file.

outfile = 'gefsv12/'+ctitle+'_streamboundary_soilq.dump'
print ('writing to ', outfile)
ouf = open(outfile,'wb')
cPickle.dump(sw1_save, ouf)
cPickle.dump(sw2_save, ouf)
cPickle.dump(sw3_save, ouf)
ouf.close()

                                                                                                                                           ._bias_corr_conusKF.py                                                                              000775  000765  000024  00000000415 14016263133 015146  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2        
                                      ATTR      
                             com.apple.lastuseddate#PS           com.macromates.selectionRange      	     com.macromates.visibleIndex  `    F-    51:911750                                                                                                                                                                                                                                                   bias_corr_conusKF.py                                                                                000775  000765  000024  00000006451 14016263133 014737  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         """
bias_corr_conusKF.py &
using ECMWF forecast and ERA5 verification data, implement Kalman-filter
type bias correction approach to estimate forecast bias.   Grid covers
CONUS, approximately.  Coded by Tom Hamill, May-Jun 2020.
tom.hamill@noaa.gov
"""

import pygrib
from dateutils import daterange, dateshift, dayofyear, splitdate
import os, sys
import numpy as np
import _pickle as cPickle

# =====================================================================

def decayavg_bias_simultaneousKF(Kalman_gain_4D, nlats, nlons, \
    obs, fcst, bias_estimate):

    # ---- update the bias correction using Kalman filter approach and 
    #      simultaneous use of all F and O across CONUS
    
    for i in range(nlons):
        for j in range(nlats):
            bias_estimate[j,i] = bias_estimate[j,i] - \
                np.sum(Kalman_gain_4D[j,i,:,:]* \
                (obs[:,:] - (forecast[:,:] - bias_estimate[:,:])))

    return bias_estimate

# =====================================================================

clead = sys.argv[1]  # lead time, e.g., 12, 72, 120 (in hours)
    
# --- read in the Kalman gain generated by invert_localized_covs_2019.py  
    
infile = 'Kalman_gain_4D_lead='+clead+'.cPick'
print (infile)
inf = open(infile, 'rb')
Kalman_gain_4D = cPickle.load(inf)
inf.close()

# --- other initialization stuff    
    
ilead = int(clead)
datadir = '/Users/Tom/python/ecmwf/'
cvariable = '2t'
tally_statistics = True

dateend = dateshift('2019123100',-ilead)
date_list_anal = daterange('2018110100',dateend,24) # initial time of the current forecast
ndates = len(date_list_anal)
date_list_fcst = []
for idate in range(ndates):
    date_list_fcst.append(dateshift(date_list_anal[idate],ilead)) # initial times of fcst

# ---- loop over dates and update bias estimates

for idate, datea in enumerate(date_list_anal):
    
    datef = date_list_fcst[idate]
    print ('------ processing analysis, forecast dates = ', datea, datef)
    if int(datea) >= 2019010100: tally_statistics = True

    # ---- read the ECMWF ERA5 reanalysis at the forecast date/time
    
    infile = datadir + 't2m_era5_halfdegree_'+datef+'.cPick'
    #print (infile)
    inf = open(infile, 'rb')
    analysis = cPickle.load(inf)
    if idate == 0:
        lats = cPickle.load(inf)
        lons = cPickle.load(inf)
        nlats, nlons = np.shape(lats)
        bias_decayavg = np.zeros((nlats, nlons), dtype=np.float32)
        bias_estimate = np.zeros((nlats, nlons), dtype=np.float64)            
    inf.close()
    
    # ---- read the ECMWF control forecast at this lead time and initial date
 
    infile = datadir + cvariable+'_'+datea+'_f'+clead+'.grib2'  
    #print (infile)
    grbfile = pygrib.open(infile) 
    grb = grbfile.select()[0] 
    forecast = grb.values
    grbfile.close()
    
    # ---- produce estimate of Kalman filter bias correction with seasonal variability.
        
    bias_estimate = decayavg_bias_simultaneousKF(Kalman_gain_4D, \
        nlats, nlons, analysis, forecast, bias_estimate)
        
    # ---- write bias estimates to file if in 2019.
    
    if tally_statistics == True:
        outfilename = datadir + 'bias_est_conusKF'+datef+'_f'+clead+'.cPick'
        #print ('       writing bias estimates to ', outfilename)
        ouf = open(outfilename, 'wb')
        cPickle.dump(bias_estimate, ouf)
        ouf.close()
                                                                                                                                                                                                                       ._bias_corr_decayavg.py                                                                             000775  000765  000024  00000000415 14016263133 015361  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2        
                                      ATTR      
                             com.apple.lastuseddate#PS           com.macromates.selectionRange      	     com.macromates.visibleIndex  `    3.    82:801613                                                                                                                                                                                                                                                   bias_corr_decayavg.py                                                                               000775  000765  000024  00000006125 14016263133 015150  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         """
using ECMWF forecast and ERA5 verification data, implement Kalman-filter
type bias correction approach to estimate forecast bias.   Grid covers
CONUS, approximately.  Coded by Tom Hamill, May-Jun 2020.
tom.hamill@noaa.gov
"""

import pygrib
from dateutils import daterange, dateshift, dayofyear, splitdate
import os, sys
import numpy as np
import numpy.ma as ma
from os import path
import _pickle as cPickle
    
# =====================================================================

def decayavg_bias(alpha, obs, forecast, bias_decayavg):
    
    # ---- compute the bog-standard decaying average bias correction estimate
       
    bias_decayavg = (1-alpha)*bias_decayavg[:,:] + alpha*(forecast[:,:]-obs[:,:])
    return bias_decayavg

# =====================================================================

clead = sys.argv[1]  # lead time, e.g., 12, 72, 120 (in hours)
calpha = sys.argv[2]  # alpha, specifying weighting of new vs. old data in 
    # decaying average bias correction and Kalman filter.

# --- other initialization stuff    
    
alpha = float(calpha)
ilead = int(clead)
datadir_reanl = '/Users/Tom/python/ecmwf/'
datadir = '/Users/Tom/python/ncep/'
cvariable = '2t'
tally_statistics = False
#datestart = dateshift('2018110100',ilead)
dateend = dateshift('2019123100',-ilead)
date_list_anal = daterange('2018110100',dateend,24) # initial time of the current forecast
ndates = len(date_list_anal)
date_list_fcst = []
for idate in range(ndates):
    date_list_fcst.append(dateshift(date_list_anal[idate],ilead)) # initial times of fcst

# ---- loop over dates and update bias estimates

for idate, datea in enumerate(date_list_anal):
    
    datef = date_list_fcst[idate]
    #print ('------ processing analysis, forecast dates = ', datea, datef)
    if int(datea) >= 2019010100: tally_statistics = True

    # ---- read the ECMWF ERA5 reanalysis at valid at the forecast date.
    
    infile = datadir_reanl + 't2m_era5_halfdegree_'+datef+'.cPick'
    inf = open(infile, 'rb')
    analysis = cPickle.load(inf)
    if idate == 0:
        lats = cPickle.load(inf)
        lons = cPickle.load(inf)
        nlats, nlons = np.shape(lats)
        bias_decayavg = ma.zeros((nlats, nlons), dtype=np.float32)       
    inf.close()
    
    infile = datadir + cvariable+'_'+datea+'_f'+clead+'.grib2'  
    fexist = path.exists(infile)
    #print (infile, fexist)
    if fexist:
        
        # ---- read the control forecast at this lead time and initial date

        #print (infile)
        grbfile = pygrib.open(infile) 
        grb = grbfile.select()[0] 
        forecast = grb.values
        grbfile.close()
    
        # ---- produce estimate of standard decaying-average bias correction
    
        bias_decayavg = decayavg_bias(alpha, analysis, forecast, bias_decayavg)
        
    # ---- write bias estimates to file if in 2019.
    
    if tally_statistics == True:
        outfilename = datadir + 'bias_decayavg_alpha'+calpha+'_'+datef+'_f'+clead+'.cPick'
        print ('       writing bias estimates to ', outfilename)
        ouf = open(outfilename, 'wb')
        cPickle.dump(bias_decayavg, ouf)
        ouf.close()
                                                                                                                                                                                                                                                                                                                                                                                                                                           ._bias_linregr_decayavg.py                                                                          000775  000765  000024  00000000415 14016263142 016056  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2        
                                      ATTR      
                             com.apple.lastuseddate#PS           com.macromates.selectionRange      	     com.macromates.visibleIndex  `     0    111:23508                                                                                                                                                                                                                                                   bias_linregr_decayavg.py                                                                            000775  000765  000024  00000020423 14016263142 015642  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         """
using ECMWF forecast and ERA5 verification data, implement Kalman-filter
type regression bias correction approach to estimate forecast bias.   Grid covers
CONUS, approximately.  Coded by Tom Hamill, May-Jun 2020.
tom.hamill@noaa.gov
"""

import pygrib
from dateutils import daterange, dateshift, dayofyear, splitdate
import os, sys
import numpy as np
import numpy.ma as ma
import _pickle as cPickle
import statsmodels.api as sm
from numba import jit
from os import path
    
# =====================================================================

#def Kalman_filter_regression(R, sigma2_intercept, sigma2_slope, \
#    regr_slope, regr_intercept, analysis, forecast):   
#
#    # --- Kalman-filter update our estimates of the regression coefficients with the most
#    #     recent forecast and analysis deviations from climatology
#
#    regr_corr_forecast = regr_intercept + regr_slope*forecast
#    Kalman_gain_intercept = sigma2_intercept / \
#        (sigma2_intercept + sigma2_slope*regr_corr_forecast + R )   # **** regression corrected or not?
#    Kalman_gain_slope = sigma2_slope*forecast / \
#        (sigma2_intercept + sigma2_slope*regr_corr_forecast + R )
#    regr_slope = regr_slope +  Kalman_gain_slope*(analysis-regr_corr_forecast)
#    regr_intercept = regr_intercept +  Kalman_gain_intercept*(analysis-regr_corr_forecast)
#    return regr_slope, regr_intercept


@jit
def Kalman_filter_regression(R, var_const, var_slope, cov_const_slope, \
    regr_slope, regr_intercept, analysis, forecast):   

    # --- Kalman-filter update our estimates of the regression coefficients with the most
    #     recent forecast and analysis deviations from climatology

    regr_corr_forecast = regr_intercept + regr_slope*forecast
    ny, nx = np.shape(var_const)
    P_beta = np.zeros((2, 2), dtype=np.float32)
    P_beta_inv = np.zeros((2, 2), dtype=np.float32)
    for ix in range(nx):
        for jy in range(ny):
            H = np.array([1.0, regr_corr_forecast[jy,ix]])
            P_beta[0,0] = var_const[jy,ix]
            P_beta[1,1] = var_slope[jy,ix]
            P_beta[1,0] = cov_const_slope[jy,ix]
            P_beta[0,1] = P_beta[1,0]
            #P_beta_inv = np.linalg.inv(P_beta)
            P_beta_HT = np.matmul(P_beta, np.transpose(H))
            H_Pbeta_HT = np.matmul(H, P_beta_HT)
            H_Pbeta_HT_plusR_inv = 1.0 / (H_Pbeta_HT + R)
            K_beta = P_beta_HT * H_Pbeta_HT_plusR_inv
            if jy == 27 and ix == 13:
                print ('prior intercept, slope = ', regr_intercept[jy,ix] , regr_slope[jy,ix] )
            regr_intercept[jy,ix] = regr_intercept[jy,ix] + \
                K_beta[0]*(analysis[jy,ix]-regr_corr_forecast[jy,ix])
            regr_slope[jy,ix] = regr_slope[jy,ix] + \
                K_beta[1]*(analysis[jy,ix]-regr_corr_forecast[jy,ix])
            if jy == 27 and ix == 13:
                print ('K_beta = ', K_beta)
                print ('posterior intercept, slope = ', regr_intercept[jy,ix] , regr_slope[jy,ix] )
                print ('analysis[27,13], regr_corr_forecast[jy,ix], forecast[jy,ix] = ', \
                    analysis[27,13], regr_corr_forecast[jy,ix], forecast[jy,ix])
            
    return regr_slope, regr_intercept

# =====================================================================

clead = sys.argv[1]  # lead time, e.g., 12, 72, 120 (in hours)
ilead = int(clead)
ndstart = ilead // 24
R = 1.0
datadir_reanl = '/Users/Tom/python/ecmwf/'
datadir = '/Users/Tom/python/ecmwf/'
cvariable = '2t'
tally_statistics = False
#datestart = dateshift('2018110100',ilead)
dateend = dateshift('2019123100',-ilead)
date_list_anal = daterange('2018110100',dateend,24) # initial time of the current forecast
ndates = len(date_list_anal)
date_list_fcst = []
for idate in range(ndates):
    date_list_fcst.append(dateshift(date_list_anal[idate],ilead)) # initial times of fcst

# ---- read in the time-dependent ERA5 climatology of t2m

infilename = 'ecmwf/t2m_climo_daily_era5_halfdegree.cPick'
print (infilename)
inf = open(infilename, 'rb')
climo_yearly = cPickle.load(inf)
print ('shape climo_yearly = ', np.shape(climo_yearly))
latsa_halfdegree = cPickle.load(inf)
lonsa_halfdegree = cPickle.load(inf)
nlats, nlons = np.shape(latsa_halfdegree)
inf.close()

infile = 'covariance_regression_coefficients_lead='+clead+'.cPick'
inf = open(infile, 'rb')
var_const = cPickle.load(inf)
var_slope = cPickle.load(inf)
cov_const_slope = cPickle.load(inf)

#var_const = var_const*2.0
#var_slope = var_slope*2.0
#cov_const_slope[:,:] = 0.0

inf.close()
mfact = 1.
#var_const = var_const * mfact
#var_slope = var_slope * mfact
#cov_const_slope = cov_const_slope*mfact

regr_intercept = np.zeros((nlats, nlons), dtype=np.float32)
regr_slope = np.ones((nlats, nlons), dtype=np.float32)
bias_estimate = np.zeros((nlats, nlons), dtype=np.float32)

#mfact = 4.0
#if clead == '24':
#    #sigma2_intercept = mfact*0.0054
#    #sigma2_slope = mfact*0.00027
#    sigma2_intercept = mfact*7.77
#    sigma2_slope = mfact*9.0316564e-05
#elif clead == '48':
#    sigma2_intercept = mfact*0.0073
#    sigma2_slope = mfact*0.00034
#elif clead == '72':
#    sigma2_intercept = mfact*0.0093
#    sigma2_slope = mfact*0.00043
#elif clead == '96':
#    sigma2_intercept = mfact*0.0122
#    sigma2_slope = mfact*0.00054

# ---- loop over dates and update bias estimates

for idate, datea in enumerate(date_list_anal):
    
    # ---- determine the julian day of the year
    
    yyyy,mm,dd,hh = splitdate(datea)
    doy = dayofyear(yyyy,mm,dd)
    datef = date_list_fcst[idate]
    print ('------ processing analysis, forecast dates = ', datea, datef)
    if int(datea) >= 2019010100: tally_statistics = True

    # ---- read the ECMWF ERA5 reanalysis at valid at the forecast date.
    #      Convert this to deviation from climatology
    
    infile = datadir_reanl + 't2m_era5_halfdegree_'+datef+'.cPick'
    #print (infile)
    inf = open(infile, 'rb')
    analysis = cPickle.load(inf)
    if idate == 0:
        lats = cPickle.load(inf)
        lons = cPickle.load(inf)
        nlats, nlons = np.shape(lats)
        print (nlats, nlons)
        analyzed_yearly = np.zeros((ndates,nlats,nlons), dtype=np.float32) 
        forecast_yearly = np.zeros((ndates,nlats,nlons), dtype=np.float32)    
    inf.close()
    analysis[:,:] = analysis[:,:] - climo_yearly[doy,:,:]
    #print ('max, min analysis = ', np.max(analysis), np.min(analysis))
    
    # ---- read the control forecast at this lead time and initial date.
    #      Convert this to deviation from climatology
 
    infile = datadir + cvariable+'_'+datea+'_f'+clead+'.grib2'   
    fexist = path.exists(infile)
    #print (infile, fexist)
    if fexist:
        grbfile = pygrib.open(infile) 
        grb = grbfile.select()[0] 
        forecast = grb.values
        grbfile.close()
        forecast[:,:] = forecast[:,:] - climo_yearly[doy,:,:]
        #print ('max, min forecast = ', np.max(forecast), np.min(forecast))
        #print ('max, min F-A = ', np.max(forecast-analysis), np.min(forecast-analysis))
    
        # ---- update the estimates of the regression coefficients with Kalman-filter
        #      type machinery.
    
        #regr_slope, regr_intercept = Kalman_filter_regression(R, \
        #    sigma2_intercept, sigma2_slope, \
        #    regr_slope, regr_intercept, analysis, forecast)
        
        regr_slope, regr_intercept = Kalman_filter_regression(R, \
            var_const, var_slope, cov_const_slope, \
            regr_slope, regr_intercept, analysis, forecast) 
        #print ('       max, min regr_slope = ', ma.max(regr_slope), ma.min(regr_slope)) 
        #print ('       max, min regr_intercept = ', ma.max(regr_intercept), ma.min(regr_intercept))  
        #print ('       mean slope, intercept = ', np.mean(regr_slope), np.mean(regr_intercept))
        #print ('       std slope, intercept = ', np.std(regr_slope), np.std(regr_intercept))
        #print ('       mean O-F  = ', np.mean(analysis)-np.mean(forecast))  
        #ind = np.unravel_index(np.argmin(regr_intercept, axis=None), regr_intercept.shape)
    
    # ---- write bias estimates to file if in 2019.

    if tally_statistics == True:
        outfilename = datadir + 'bias_KFregression_'+datef+'_f'+clead+'.cPick'
        #print ('       writing bias estimates to ', outfilename)

        ouf = open(outfilename, 'wb')
        cPickle.dump(regr_slope, ouf)
        cPickle.dump(regr_intercept, ouf)
        ouf.close()                                                                                                                                                                                                                                             ._bias_linregr_getcovstats.py                                                                       000775  000765  000024  00000000414 14016263142 016640  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2                                              ATTR                                   com.apple.lastuseddate#PS           com.macromates.selectionRange      	     com.macromates.visibleIndex  `    Ż0    48:35672                                                                                                                                                                                                                                                    bias_linregr_getcovstats.py                                                                         000775  000765  000024  00000013054 14016263142 016427  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         """
using ECMWF forecast and ERA5 verification data, implement Kalman-filter
type regression bias correction approach to estimate forecast bias.   Grid covers
CONUS, approximately.  Coded by Tom Hamill, May-Jun 2020.
tom.hamill@noaa.gov
"""

import pygrib
from dateutils import daterange, dateshift, dayofyear, splitdate
import os, sys
import numpy as np
import numpy.ma as ma
import _pickle as cPickle
from os import path
import statsmodels.api as sm
    
# =====================================================================

def KF_linear_regres(Bbeta, B, R, KF_betahat, \
    obs, fcst, bias_estimate):

    # ---- estimate the Kalman gain for the bias correction.

    L = np.array([1.0, ])
    BbetaLT = np.matmul(Bbeta[:,:], np.transpose(L))
    LBbetaLT = np.matmul(L,BbetaLT)
    LBbetaLT_plus_B_plus_R = LBbetaLT + B + R
    LBbetaLT_plus_B_plus_R_inv = 1.0 / LBbetaLT_plus_B_plus_R
    Kfgain_beta = BbetaLT * LBbetaLT_plus_B_plus_R_inv

    # ---- update bias estimate with new data

    for i in range(3):
        KF_betahat[i,:,:] = KF_betahat[i,:,:] - \
            Kfgain_beta[i]*(obs[:,:] - (forecast[:,:] - bias_estimate[:,:]))
    bias_estimate = L[0]*KF_betahat[0,:,:] + \
        L[1]*KF_betahat[1,:,:] + L[2]*KF_betahat[2,:,:] + \
        L[3]*KF_betahat[3,:,:] + L[4]*KF_betahat[4,:,:]
    return KF_betahat, bias_estimate


# =====================================================================

clead = sys.argv[1]  # lead time, e.g., 12, 72, 120 (in hours)
ilead = int(clead)
ndstart = ilead // 24
datadir_reanl = '/Users/Tom/python/ecmwf/'
datadir = '/Users/Tom/python/ecmwf/'

cvariable = '2t'
tally_statistics = False
#datestart = dateshift('2018110100',ilead)
dateend = dateshift('2018123100',-ilead)
date_list_anal = daterange('2018010100',dateend,24) # initial time of the current forecast
ndates = len(date_list_anal)
date_list_fcst = []
for idate in range(ndates):
    date_list_fcst.append(dateshift(date_list_anal[idate],ilead)) # initial times of fcst

# ---- read in the time-dependent ERA5 climatology of t2m

infilename = 'ecmwf/t2m_climo_daily_era5_halfdegree.cPick'
print (infilename)
inf = open(infilename, 'rb')
climo_yearly = cPickle.load(inf)
print ('shape climo_yearly = ', np.shape(climo_yearly))
latsa_halfdegree = cPickle.load(inf)
lonsa_halfdegree = cPickle.load(inf)
inf.close()

# ---- loop over dates and update bias estimates

for idate, datea in enumerate(date_list_anal):
    
    datef = date_list_fcst[idate]
    #print ('------ processing analysis, forecast dates = ', datea, datef)
    if int(datea) >= 2019010100: tally_statistics = True

    # ---- read the ECMWF ERA5 reanalysis at valid at the forecast date.
    
    infile = datadir_reanl + 't2m_era5_halfdegree_'+datef+'.cPick'
    #print (infile)
    inf = open(infile, 'rb')
    analysis = cPickle.load(inf)
    if idate == 0:
        lats = cPickle.load(inf)
        lons = cPickle.load(inf)
        nlats, nlons = np.shape(lats)
        print (nlats, nlons)
        analyzed_yearly = ma.zeros((ndates,nlats,nlons), dtype=np.float32) 
        forecast_yearly = ma.zeros((ndates,nlats,nlons), dtype=np.float32)    
    inf.close()
    analyzed_yearly[idate,:,:] = analysis[:,:] 
    
    # ---- read the control forecast at this lead time and initial date

 
    infile = datadir + cvariable+'_'+datea+'_f'+clead+'.grib2'  
    fexist = path.exists(infile)
    print (infile, fexist)
    if fexist == True:
        #print (infile)
        grbfile = pygrib.open(infile) 
        grb = grbfile.select()[0] 
        forecast = grb.values
        grbfile.close()
        forecast_yearly[idate,:,:] = forecast[:,:] 
    else:
        forecast_yearly[idate,:,:] = ma.masked
    
# ---- for each grid point, first produce a linear regression in order 
#      to get a sense of the standard error of the regression coefficients.
    
forecast_deviation = forecast_yearly - climo_yearly[ndstart:ndstart+ndates,:,:]
analyzed_deviation = analyzed_yearly - climo_yearly[ndstart:ndstart+ndates,:,:]

var_const = np.zeros((nlats,nlons), dtype = np.float32)
var_slope = np.zeros((nlats,nlons), dtype = np.float32)
cov_const_slope = np.zeros((nlats,nlons), dtype = np.float32)
regr_const = np.zeros((nlats,nlons), dtype = np.float32)
regr_slope = np.zeros((nlats,nlons), dtype = np.float32)

for ix in range(nlons):
    for jy in range(nlats):
        a = analyzed_deviation[:,jy,ix]
        f = forecast_deviation[:,jy,ix]
        x = ma.zeros((ndates,2), dtype=np.float32)
        x[:,0] = 1.0
        x[:,1] = f[:]
        model = sm.OLS(a, x, hasconst=True)
        results = model.fit()
        params = results.params
        cov = results.cov_params()
        var_const[jy,ix] = cov[0,0]
        var_slope[jy,ix] = cov[1,1]
        cov_const_slope[jy,ix] = cov[1,0]
        regr_const[jy,ix] = params[0]
        regr_slope[jy,ix] = params[1]


print ('mean slope, intercept = ', np.mean(regr_slope), np.mean(regr_const))
print ('std slope, intercept = ', np.std(regr_slope), np.std(regr_const))
ind = np.unravel_index(np.argmin(regr_const, axis=None), regr_const.shape)
print ('ind = ', ind)
print ('regr_const at min = ', regr_const[ind])

        

outfile = 'covariance_regression_coefficients_lead='+clead+'.cPick'
ouf = open(outfile, 'wb')
cPickle.dump(var_const, ouf)
cPickle.dump(var_slope, ouf)
cPickle.dump(cov_const_slope, ouf)
ouf.close()

print ('mean const, slope = ', np.mean(regr_const), np.mean(regr_slope))
print ('variance of const, slope = ', np.mean(var_const), np.mean(var_slope))
        
#KF_betahat, bias_seasonalKF = seasonalKFbias(cosfac, sinfac, \
#    cos2fac, sin2fac, Bbeta, B, R, KF_betahat, obs, \
#    forecast, bias_seasonalKF)
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    ._calculate_brier_reliability.py                                                                    000664  000765  000024  00000000416 14075104460 017264  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2                                              ATTR                                   com.apple.lastuseddate#PS           com.macromates.selectionRange      
     com.macromates.visibleIndex  `    b5    238:848929                                                                                                                                                                                                                                                  calculate_brier_reliability.py                                                                      000664  000765  000024  00000026204 14075104460 017052  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         """ calculate_brier_reliability.py cyyyymm clead ctype
"""

import os, sys
import numpy as np
import numpy.ma as ma
from netCDF4 import Dataset
import _pickle as cPickle

# ============================================================

def compute_brier_score_and_contingency_tables( idate, ithresh, \
    brier_score_overall, brier_score_overall_west, \
    brier_score_overall_east, brier_score_daily, \
    brier_score_gridded, probability, \
    contab, contab_daily, precip_anal, thresh, ones, zeros, weight):

    # ---- compute addition to the Brier Score for this case to that
    #      tallied for other case.   Populate the contingency tables
    #      that are used to calculate the reliability and frequency
    #      of usage.   

    ny, nx = np.shape(precip_anal)
    binary_anal = np.where(precip_anal > thresh, ones, zeros)
    binary_anal[np.where(precip_anal < 0)] = -1
    brier_score_overall[ithresh] = brier_score_overall[ithresh] + \
        np.sum(weight*(binary_anal-probability)**2)
    brier_score_overall_west[ithresh] = brier_score_overall_west[ithresh] + \
        np.sum(weight[:,0:3*nx//7]*(binary_anal[:,0:3*nx//7]- \
        probability[:,0:3*nx//7])**2)
    brier_score_overall_east[ithresh] = brier_score_overall_east[ithresh] + \
        np.sum(weight[:,3*nx//7:-1]*(binary_anal[:,3*nx//7:-1]- \
        probability[:,3*nx//7:-1])**2)
    brier_score_gridded[:,:] = brier_score_gridded[:,:] + \
        weight[:,:]*(binary_anal[:,:]-probability[:,:])**2            
        
    brier_score_daily[ithresh,idate] = \
        brier_score_daily[ithresh,idate] + \
        np.sum(weight*(binary_anal-probability)**2)
    
    # ---- compute increment to contingency table array
    
    for icat in range(32):
        
        # ---- saved for 31 categories given 31 ens mbrs.
        
        pmin = np.max([0.0,float(icat)/31. - 1./62.])
        pmax = np.min([1.0,float(icat)/31. + 1./62.])
        
        a = np.where(np.logical_and(np.logical_and( \
            probability >= pmin, probability < pmax), binary_anal == 1)  )
        if len(a) > 0:  # a[0] != -1:
            contab[ithresh,icat,1] = \
                contab[ithresh,icat,1] + \
                np.sum(weight[a])
            contab_daily[idate,ithresh,icat,1] = \
                contab_daily[idate,ithresh,icat,1] + \
                np.sum(weight[a])
        a = np.where(np.logical_and(np.logical_and( \
            probability >= pmin, probability < pmax), binary_anal == 0)  )
        if len(a) > 0:   # a[0] != -1:
            contab[ithresh,icat,0] = \
                contab[ithresh,icat,0] + \
                np.sum(weight[a])
            contab_daily[idate,ithresh,icat,0] = \
                contab_daily[idate,ithresh,icat,0] + \
                np.sum(weight[a])
    
    return brier_score_overall, brier_score_overall_west, \
        brier_score_overall_east,  brier_score_daily, \
        brier_score_gridded, contab, contab_daily

# ============================================================

# ---- get the month and end time from the commmand line.  The first 00
#      hour analysis of the month will need to access the data from
#      the previous month.

cyyyymm = sys.argv[1] # 202001 etc
clead = sys.argv[2] # 018 etc
ctype = sys.argv[3] # thinned or upscaled
cmo = cyyyymm[4:6]
cmonths = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']
#cmonths = ['Mar']
imonth = int(cyyyymm[4:6])-1
cmonth = cmonths[imonth]
cyyyy = cyyyymm[0:4]
thresholds = [0.254, 1.0, 5.0, 10.0, 25.0]
nthresh = len(thresholds)

# ---- read the probability forecasts for this lead time and month
#      saved by the quantile-mapping routine software.

forecast_directory = '/Volumes/NBM/conus_gefsv12/'+ctype+'/'
infile = forecast_directory + cmonth + cyyyy + \
     '_use99_lead'+clead+'_probabilities_'+ctype+'.nc'
print ('reading ', infile)
nc = Dataset(infile)
yyyymmddhh_init_in = nc.variables['yyyymmddhh_init'][0:-1]
print (yyyymmddhh_init_in)
yyyymmddhh_fcst_in = nc.variables['yyyymmddhh_fcst'][0:-1]
lats_fcst = nc.variables['lats'][:,:]
lons_fcst = nc.variables['lons'][:,:]
probability_raw = nc.variables['probability_raw'][0:-1,:,:,:]
probability_qmapped = nc.variables['probability_qmapped'][0:-1,:,:,:]
print ('np.shape(probability_raw) = ', np.shape(probability_raw))
ndates, nthresh, ny_fcst, nx_fcst = np.shape(probability_qmapped )
lons_in = nc.variables['lons'][:,:]
lats_in = nc.variables['lats'][:,:]
nc.close()

# --- set up various working arrays needed

ones = np.ones((ny_fcst, nx_fcst), dtype = np.int32)
zeros = np.zeros((ny_fcst, nx_fcst), dtype = np.int32)

# ---- need to develop and read in the climatological probability 
#      by month

climo_directory = '/Volumes/NBM/conus_panal/'
infile = climo_directory + cmo + \
    '_ccpa_on_ndfd_grid_6hourly_climo_probability_'+ctype+'.cPick'
print ('reading from ', infile) 
inf = open(infile, 'rb')
probability_climo = cPickle.load(inf)
lons_climo = cPickle.load(inf)
lats_climo = cPickle.load(inf)
inf.close()

# --- set up work arrays.

contab_forecast_raw = np.zeros((nthresh,32,2), dtype=np.float64)
contab_forecast_qmapped = np.zeros((nthresh,32,2), dtype=np.float64)
contab_climo = np.zeros((nthresh,32,2), dtype=np.float64)

contab_forecast_raw_daily = np.zeros((ndates,nthresh,32,2), dtype=np.float64)
contab_forecast_qmapped_daily = np.zeros((ndates,nthresh, 32,2), dtype=np.float64)
contab_climo_daily = np.zeros((ndates, nthresh,32,2), dtype=np.float64)

brier_score_raw_overall = np.zeros((nthresh), dtype=np.float64)
brier_score_qmapped_overall = np.zeros((nthresh), dtype=np.float64)
brier_score_climo_overall = np.zeros((nthresh), dtype=np.float64)

brier_score_raw_west = np.zeros((nthresh), dtype=np.float64)
brier_score_qmapped_west = np.zeros((nthresh), dtype=np.float64)
brier_score_climo_west = np.zeros((nthresh), dtype=np.float64)

brier_score_raw_east = np.zeros((nthresh), dtype=np.float64)
brier_score_qmapped_east = np.zeros((nthresh), dtype=np.float64)
brier_score_climo_east = np.zeros((nthresh), dtype=np.float64)

brier_score_raw_daily = np.zeros((nthresh, ndates), dtype=np.float64)
brier_score_qmapped_daily = np.zeros((nthresh, ndates), dtype=np.float64)
brier_score_climo_daily = np.zeros((nthresh, ndates), dtype=np.float64)

brier_score_raw_gridded = \
    np.zeros((nthresh, ny_fcst, nx_fcst), dtype=np.float64)
brier_score_qmapped_gridded = \
    np.zeros((nthresh, ny_fcst, nx_fcst), dtype=np.float64)
brier_score_climo_gridded = \
    np.zeros((nthresh, ny_fcst, nx_fcst), dtype=np.float64)

for idate, fcst_date in enumerate(yyyymmddhh_fcst_in):
    
    # ---- read the precipitation analyses for the chosen date

    cyyyymm_anal = str(yyyymmddhh_fcst_in[idate])[0:6]
    master_directory = '/Volumes/NBM/conus_panal/'
    infile = master_directory + cyyyymm_anal + \
        '_ccpa_on_ndfd_grid_6hourly_'+ctype+'.nc'
    nc = Dataset(infile)
    yyyymmddhh_end_in = nc.variables['yyyymmddhh_end'][:]
    if idate == 0:
        conusmask_in = nc.variables['conusmask'][:,:]
        lons_in = nc.variables['lons'][:,:]
        lats_in = nc.variables['lats'][:,:]
        weight = np.where(conusmask_in == 1.0, \
            ones*np.cos(lats_in*3.1415926/180.), zeros)
    idx = np.where(yyyymmddhh_end_in == fcst_date)[0]
    precip_anal = np.squeeze(nc.variables['apcp_anal'][idx,:,:])
    ny_anal, nx_anal = np.shape(precip_anal)
    nc.close()    

    # ---- loop thru thresholds, compute scores for raw, qmapped forecast, 
    #      and climatology
    
    for ithresh, thresh in enumerate(thresholds):
        
        # --- raw
        
        probability = probability_raw[idate,ithresh,:,:]
        brier_score_raw_overall, brier_score_raw_west, \
            brier_score_raw_east, brier_score_raw_daily, \
            brier_score_raw_gridded, contab_forecast_raw, \
            contab_forecast_raw_daily = \
            compute_brier_score_and_contingency_tables( idate, ithresh, \
            brier_score_raw_overall, brier_score_raw_west, \
            brier_score_raw_east, brier_score_raw_daily,\
            brier_score_raw_gridded, probability, \
            contab_forecast_raw, contab_forecast_raw_daily, \
            precip_anal, thresh, ones, zeros, weight)
        
        # --- quantile mapped
        
        probability = probability_qmapped[idate,ithresh,:,:]
        brier_score_qmapped_overall, brier_score_qmapped_west, \
            brier_score_qmapped_east, brier_score_qmapped_daily, \
            brier_score_qmapped_gridded, contab_forecast_qmapped, \
            contab_forecast_qmapped_daily = \
            compute_brier_score_and_contingency_tables( idate, ithresh, \
            brier_score_qmapped_overall, brier_score_qmapped_west, \
            brier_score_qmapped_east, brier_score_qmapped_daily, \
            brier_score_qmapped_gridded, probability, \
            contab_forecast_qmapped, contab_forecast_qmapped_daily, \
            precip_anal, thresh, ones, zeros, weight)
            
        # --- climatology
        
        probability = probability_climo[ithresh,:,:]
        brier_score_climo_overall, brier_score_climo_west, \
            brier_score_climo_east, brier_score_climo_daily,\
            brier_score_climo_gridded, contab_climo, contab_climo_daily = \
            compute_brier_score_and_contingency_tables( idate, ithresh, \
            brier_score_climo_overall, brier_score_climo_west, \
            brier_score_climo_east, brier_score_climo_daily, \
            brier_score_climo_gridded, probability, \
            contab_climo, contab_climo_daily, precip_anal, \
            thresh, ones, zeros, weight)            
            

# --- save data via cPickle file for later computation of skill scores,  
#     reliability diagrams

print ('--------------- ', cyyyymm,' ',clead,' h, ',ctype,' -----------------')
print ('BSS_raw_overall = ', 1. - brier_score_raw_overall/brier_score_climo_overall)
print ('BSS_qmapped_overall = ', 1.-brier_score_qmapped_overall/brier_score_climo_overall)
print ('BSS_raw_west = ', 1. - brier_score_raw_west/brier_score_climo_west)
print ('BSS_qmapped_west = ', 1. - brier_score_qmapped_west/brier_score_climo_west)
print ('BSS_raw_east = ', 1. - brier_score_raw_east/brier_score_climo_east)
print ('BSS_qmapped_east = ', 1. - brier_score_qmapped_east/brier_score_climo_east)


outfile = forecast_directory + cyyyymm + \
    '_lead'+clead+'_'+ctype+'_Brier_contingency_table.cPick'
    
ouf = open(outfile, 'wb')

cPickle.dump(brier_score_raw_overall, ouf)
cPickle.dump(brier_score_raw_west, ouf)
cPickle.dump(brier_score_raw_east, ouf)
cPickle.dump(brier_score_raw_daily, ouf)
cPickle.dump(brier_score_raw_gridded, ouf)
cPickle.dump(contab_forecast_raw, ouf)
cPickle.dump(contab_forecast_raw_daily, ouf)

cPickle.dump(brier_score_qmapped_overall, ouf)
cPickle.dump(brier_score_qmapped_west, ouf)
cPickle.dump(brier_score_qmapped_east, ouf)
cPickle.dump(brier_score_qmapped_daily, ouf)
cPickle.dump(brier_score_qmapped_gridded, ouf)
cPickle.dump(contab_forecast_qmapped, ouf)
cPickle.dump(contab_forecast_qmapped_daily, ouf)

cPickle.dump(brier_score_climo_overall, ouf)
cPickle.dump(brier_score_climo_west, ouf)
cPickle.dump(brier_score_climo_east, ouf)
cPickle.dump(brier_score_climo_daily, ouf)
cPickle.dump(brier_score_climo_gridded, ouf)
cPickle.dump(contab_climo, ouf)
cPickle.dump(contab_climo_daily, ouf)

ouf.close()

                                                                                                                                                                                                                                                                                                                                                                                            ._calculate_evaporation_prate.py                                                                    000775  000765  000024  00000000416 14016263142 017313  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2                                              ATTR                                   com.apple.lastuseddate#PS           com.macromates.selectionRange      
     com.macromates.visibleIndex  `    7    123:132908                                                                                                                                                                                                                                                  calculate_evaporation_prate.py                                                                      000775  000765  000024  00000012511 14016263142 017075  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         """
calculate_evaporation_prate.py

read in global time series of analyzed precipitation rate,
latent heat flux, and surface temperature.   From LHF and
temperature, calculate the evaporative flux.   Get average
over the globe and write out time series.
""" 


from netCDF4 import Dataset
import numpy as np
from dateutils import daterange
import sys
import os
import os.path
from os import path
import numpy.ma as ma
import _pickle as cPickle

def set_Lwater(temp): # return the Latent heat of evaporation, f(temp)
    Lwater = 2500.8 - 2.36*temp + 0.016*temp**2 - 0.00006*temp**3
    return Lwater

# ----- develop a table of the latent heat of condensation, -25 to 40 by 0.1

cstreams = ['1999','2003','2007','2011','2015']
date_list_1999 = daterange('2000010100','2003123100',24)
date_list_2003 = daterange('2004010100','2007123100',24)
date_list_2007 = daterange('2008010100','2011123100',24)
date_list_2011 = daterange('2012010100','2015123100',24)
date_list_2015 = daterange('2016010100','2019123100',24)
ndates_total = len(date_list_1999) + len(date_list_2003) + \
    len(date_list_2007) + len(date_list_2011) + len(date_list_2015)
prate_timeseries = np.zeros((ndates_total), dtype=np.float64)
evap_timeseries  = np.zeros((ndates_total), dtype=np.float64)
print (ndates_total)

# --- read in sample lat/lon indices; set up cos(latitude) grid

infile = '/Users/Tom/python/gefsv12/1999/bfg_2003123100_fhr00_control2.nc4'
nc = Dataset(infile)
lon = nc.variables['lon'][:]
lat = nc.variables['lat'][:]
nlons = len(lon)
nlats = len(lat)
cosfac_field = np.zeros((nlats,nlons), dtype=np.float64)
nc.close()
for i in range(nlats):
    cosfac_field[i,:] = np.cos(lat[i]*3.1415926/180.)

# --- loop over streams

ktr = 0
for istream, cstream in enumerate(cstreams):

    print (istream, cstream )
    # ---- determine the dates

    if cstream == '1999':
        date_list = date_list_1999
    elif cstream == '2003':
        date_list = date_list_2003
    elif cstream == '2007':
        date_list = date_list_2007
    elif cstream == '2011':
        date_list = date_list_2011
    else:
        date_list = date_list_2015
                    
    print ('***** processing stream = ', cstream)
    # ---- loop thru each date in the date_list
    
    for idate, date in enumerate(date_list):
        
        # --- read in from netCDF file
        
        infile1 = '/Users/Tom/python/gefsv12/'+cstream+'/bfg_'+date+'_fhr00_control2.nc4'
        fexist = path.exists(infile1)
        if fexist == True:
            
            nc = Dataset(infile1)
            #print (infile1)
            latent = nc.variables['lhtfl_avesfc'][0,:,:]
            prate = nc.variables['prate_avesfc'][0,:,:]
            temperature = nc.variables['tmp2m'][0,:,:] - 273.15 # convert to deg C
            ny, nx = np.shape(temperature)
            #print ('max, min temperature = ', np.max(temperature), np.min(temperature))
        
            # ---- from the 2-meter temperature, estimate the latent heat of evaporation
            #      Lwater calculated from https://en.wikipedia.org/wiki/Latent_heat
            #      as a cubic function of temperature in degrees C.
        
            Lwater = set_Lwater(temperature)
            #Lwater = 2500.*np.ones((ny,nx), dtype=np.float32) # a common approximation
        
            # Evaporation rate can be calculated from the latent heat flux divided by 
            # the latent heat of evaporation of water (Lwater).  
            # E = (latent heat flux)/Lwater , where latent heat flux 
            # read in from netCDF file.
            #
            # Do the units work out?
            #
            # Numerator's units: evaporative heat flux units: W/m**2
            # 1 W = 1 J/s = 1 Nm/s = 1 kg*m**2/s**3 , so evaporative heat flux  
            # units are (kg m**2/s**3)*(1/m**2) = kg/s**3
            #
            # Denominator's units: Lwater as calculated there has units of 
            # J/gm = (kg*m**2/s**2)/ gm.  Hence multiply by 0.001 kg/gm 
            # to get expressed in m**2/s**2 .
            # 
            # So, the final units are (kg/s**3) / (m**2/s**2) = 
            #   kg/s**3 * s**2/m**2 = kg/(m**2 s)
            # 
            # which coincides with the precipitation rate units in the grib table 
            # https://www.nco.ncep.noaa.gov/pmb/docs/on388/table2.html
            #
        
            evap = latent / (Lwater*1000.)
            nc.close()
            evap_timeseries[ktr] = np.sum(evap*cosfac_field) / np.sum(cosfac_field)
            #if istream == 0:
            prate_timeseries[ktr] = 2.0 * np.sum(prate*cosfac_field) / np.sum(cosfac_field) 
            #else:
            #    prate_timeseries[ktr] = np.sum(prate*cosfac_field) / np.sum(cosfac_field) 
            print (idate, ktr, date, evap_timeseries[ktr], prate_timeseries[ktr], \
                np.sum(Lwater*cosfac_field) / np.sum(cosfac_field)) 
                
            if evap_timeseries[ktr] > 0.1: evap_timeseries[idate-1] # bad data point filter
            if prate_timeseries[ktr] > 0.1: prate_timeseries[idate-1] 
                
        else:
            
            evap_timeseries[ktr] = -99.99
            prate_timeseries[ktr] = -99.99
        
        ktr = ktr+1
        
# ---- save to file.

outfile = 'evap_prate.cPick'
print ('writing to ', outfile)
ouf = open(outfile,'wb')
cPickle.dump(evap_timeseries, ouf)
cPickle.dump(prate_timeseries, ouf)
ouf.close()                                                                                                                                                                                       ._cca.py                                                                                            000775  000765  000024  00000000416 14016263143 012303  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2                                              ATTR                                   com.apple.lastuseddate#PS           com.macromates.selectionRange      
     com.macromates.visibleIndex  `    F    107:201830                                                                                                                                                                                                                                                  cca.py                                                                                              000775  000765  000024  00000030431 14016263143 012066  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         """
cca.py: user inputs the month of choice and the forecast lead time. program
then loads in the HUC precipitation and the ERA-5 analysis data,
and it munges the data to make it suitable for Canonical Correlation Analysis.
It calls scipy.stats library routine to perform the CCA and saves output.
"""

from netCDF4 import Dataset
import numpy as np
from dateutils import daterange, datetohrs, dateshift
from datetime import datetime
import sys
import os
from os import path
import numpy.ma as ma
import _pickle as cPickle
import matplotlib.pyplot as plt
from matplotlib import rcParams
from mpl_toolkits.basemap import Basemap
import scipy.stats as stats
from sklearn.cross_decomposition import PLSRegression

rcParams['xtick.labelsize']='x-small'
rcParams['ytick.labelsize']='x-small'
rcParams['legend.fontsize']='xx-small'
rcParams['legend.fancybox']=True


#def load_latlon():
    
#    infile = 'cca/199901_air_era5.nc'
#    nc = Dataset(infile)
#    lat = nc.variables['lat'][:] 
#    lon = nc.variables['lon'][:]
#    nc.close()
    
def load_era5(cyear, cmonth, cvar):
    
    infile = 'cca/'+cyear+cmonth+'_'+cvar+'_era5.cPick'
    #print (infile)
    inf = open(infile,'rb')
    input_data = cPickle.load(inf)
    ntimes, nlevels, ny, nx = np.shape(input_data)
    lon = cPickle.load(inf)
    lat = cPickle.load(inf)
    yyyymmddhh = cPickle.load(inf)
    inf.close()
    return yyyymmddhh, input_data, ntimes, nlevels, ny, nx, lon, lat
    
def convert_to_yyyymmddhh(precipDates):
    
    # --- convert Matt's HUC date array to yyyymmddhh format, making the 
    #     assumption that the 7am local time is close enough to 12Z.
    
    npdates, nymd = np.shape(precipDates)
    yyyymmddhh = []
    #print ('npdates, nymd = ', npdates, nymd)
    for i in range(npdates):
        
        yyyy = str(precipDates[i,0])
        imm = precipDates[i,1]
        if imm < 10:
            cmm = '0'+str(imm)
        else:
            cmm = str(imm)
        idd = precipDates[i,2]
        if idd < 10:
            cdd = '0'+str(idd)
        else:
            cdd = str(idd)
        yyyymmddhh.append(int(yyyy+cmm+cdd+'12'))
        #print (precipDates[i,0], precipDates[i,1], precipDates[i,2], int(yyyy+cmm+cdd+'12'))
        #if i == 1000: sys.exit()
    return yyyymmddhh
        
# --- get inputs from command line

#cmonth = sys.argv[1] # 01 to 12
#clead = sys.argv[2] # forecast lead time in days, e.g., 2.5   Use half days so the 00 UTC
cyears = ['1981', '1982', '1983', '1984','1985', '1986', '1987', '1988', '1989', '1990', \
    '1991', '1992', '1993', '1994', '1995', '1996', '1997', '1998', '1999', '2000', \
    '2001', '2002', '2003', '2004', '2005', '2006', '2007', '2008', '2009', '2010', '2011']
nyears = len(cyears)
   
#for cmonth in ['01','02','03','04','05','06','07','08','09','10','11']:
for cmonth in ['03']:
    for clead in ['1.5']:

        # ---- ECMWF initial dates line up with the 12 UTC HUC dates.

        ifhour = int(float(clead)*24)
        print ('forecast lead in hours: ', ifhour)
        imonth = int(cmonth)
        if imonth == 1 or imonth == 3 or imonth == 5 or imonth == 7 \
        or imonth == 8 or imonth == 10 or imonth == 12:
            ndays_total = 31*nyears
        elif imonth == 4 or imonth == 6 or imonth == 9 or imonth == 11:
            ndays_total = 30*nyears 
        elif imonth == 2:
            ndays_total = 7*29 + (nyears-7)*28

        # --- read in the HUC data provided by Matt Switanek

        now = datetime.now()
        current_time = now.strftime("%H:%M:%S")
        print ('Reading HUC data. Current time is ', current_time)
        f1 = open("PRISM_pr_hucs_19810101-20180930.pickle",'rb')
        precipHUCs = cPickle.load(f1) #the daily precip accumulations in mm (days,hucs)
        ndayhucs, nhucs = np.shape(precipHUCs)
        precipDates = cPickle.load(f1) #the dates
        hucLats = cPickle.load(f1) #centroid lats of hucs
        hucLons = cPickle.load(f1) #centroid lons of hucs
        hucShapes = cPickle.load(f1) #embedded lists of huc boundaries
        hucDivision4 = cPickle.load(f1) #the division 4 numeric codes of the hucs
        #print ('np.shape(precipHUCs) = ', np.shape(precipHUCs))
        #print ('precipDates[0:-1:100] = ', precipDates[0:-1:100]) # [2012    3   19]
        #print ('hucLons = ', hucLons) # negative for west
        #print ('hucLats = ', hucLats)
        #print ('hucShapes.__doc__ = ',hucShapes.__doc__ )
        #print ('hucDivision4 = ', hucDivision4)
        f1.close()
        #for i in range(len(hucLons)):
        #    print (i,hucLons[i],hucLats[i])
        #sys.exit()
        

        # ---- convert the precipitation dates into yyyymmddhh format

        now = datetime.now()
        current_time = now.strftime("%H:%M:%S")
        print ('Converting HUC precipitation dates.  Current time is ', current_time)
        yyyymmddhh_hucs = convert_to_yyyymmddhh(precipDates)
        #print ('HUC dates = ', yyyymmddhh_hucs[0:100])
        #sys.exit()

        # ---- for the chosen month, load the ERA5 analysis data over the multiple 
        #      years

        temp_store = np.zeros((ndays_total,4,45,180), dtype=np.float64) # 4 levels, 45 lats, 180 lons
        shum_store = np.zeros((ndays_total,4,45,180), dtype=np.float64)
        uwnd_store = np.zeros((ndays_total,4,45,180), dtype=np.float64)
        vwnd_store = np.zeros((ndays_total,4,45,180), dtype=np.float64)
        yyyymmddhh_store = np.zeros((ndays_total), dtype=np.int32)

        ktr = 0
        for iyear, cyear in enumerate(cyears):
    
            now = datetime.now()
            current_time = now.strftime("%H:%M:%S")
            #print ("Loading ERA-5 reanalysis data from disk for year ",cyear,". Current time = ", current_time)
            yyyymmddhh, temp, ntimes, nlevels, ny, nx, lon, lat = load_era5(cyear, cmonth, 'air')
            yyyymmddhh, shum, ntimes, nlevels, ny, nx, lon, lat = load_era5(cyear, cmonth, 'shum')    
            yyyymmddhh, uwnd, ntimes, nlevels, ny, nx, lon, lat = load_era5(cyear, cmonth, 'uwnd')    
            yyyymmddhh, vwnd, ntimes, nlevels, ny, nx, lon, lat = load_era5(cyear, cmonth, 'vwnd') 
            zero_store = np.zeros((ntimes, nlevels, ny, nx), dtype=np.float32)
            shum = np.where(shum < 0.0, zero_store, shum)
            #print (yyyymmddhh) 
            #print ('ntimes = ', ntimes)
            temp_store[ktr:ktr+ntimes,:,:,:] = temp[:,:,0:45,:]
            shum_store[ktr:ktr+ntimes,:,:,:] = shum[:,:,0:45,:]**0.3333
            uwnd_store[ktr:ktr+ntimes,:,:,:] = uwnd[:,:,0:45,:]
            vwnd_store[ktr:ktr+ntimes,:,:,:] = vwnd[:,:,0:45,:]
            yyyymmddhh_store[ktr:ktr+ntimes] = yyyymmddhh[:]
            ktr = ktr + ntimes
    
        # --- determine the standard deviation across all time samples, and normalize by this.

        #print ('yyyymmddhh_store from ERA5 = ',yyyymmddhh_store[0:100])
        now = datetime.now()
        current_time = now.strftime("%H:%M:%S")
        print ('Determining standard deviation and dividing by this.  Current time =  ', current_time)
        temp_stddev = np.std(temp_store, axis=0)
        shum_stddev = np.std(shum_store, axis=0)
        uwnd_stddev = np.std(uwnd_store, axis=0)
        vwnd_stddev = np.std(vwnd_store, axis=0)
        temp_mean = np.mean(temp_store, axis=0)
        shum_mean = np.mean(shum_store, axis=0)
        uwnd_mean = np.mean(uwnd_store, axis=0)
        vwnd_mean = np.mean(vwnd_store, axis=0)
        print ('max, min temp_mean = ', np.max(temp_mean), np.min(temp_mean))
        print ('max, min temp_stddev = ', np.max(temp_stddev), np.min(temp_stddev))

        for idate in range(ktr):
            temp_store[idate,:,:,:] = (temp_store[idate,:,:,:] - temp_mean[:,:,:]) / temp_stddev[:,:,:]
            shum_store[idate,:,:,:] = (shum_store[idate,:,:,:] - shum_mean[:,:,:]) / shum_stddev[:,:,:]
            uwnd_store[idate,:,:,:] = (uwnd_store[idate,:,:,:] - uwnd_mean[:,:,:]) / uwnd_stddev[:,:,:]
            vwnd_store[idate,:,:,:] = (vwnd_store[idate,:,:,:] - vwnd_mean[:,:,:]) / vwnd_stddev[:,:,:]
        print ('max, min temp_store after normalization = ', np.max(temp_store), np.min(temp_store))
        print ('max, min shum_store after normalization = ', np.max(shum_store), np.min(shum_store))
        print ('max, min uwnd_store after normalization = ', np.max(uwnd_store), np.min(uwnd_store))
        print ('max, min vwnd_store after normalization = ', np.max(vwnd_store), np.min(vwnd_store))
    
        # --- rearrange data; first index is time, second index is 1-d composite vector
        #     of the standardized temperature, specific humidity, and u- and v-wind 
        #     components

        now = datetime.now()
        current_time = now.strftime("%H:%M:%S")
        print ('Munging data into X, Y arrays. Current time = ', current_time)
        ngrid = nx*45*4
        X = np.zeros((ktr, ngrid*4), dtype=np.float64) # 4 as there are four fields
        Y = np.zeros((ktr, nhucs), dtype=np.float64)
        #Y = np.zeros((ktr, 1), dtype=np.float64)
        print ('ktr = ', ktr)
        for idate in range(ktr):
    
            if idate%30 == 0:
                now = datetime.now()
                current_time = now.strftime("%H:%M:%S")
                #print ('Processing idate = ',idate,' of ',ktr,'.  Current time = ', current_time)
            temp_1d = np.reshape(temp_store[idate,:,:,:], ngrid)
            shum_1d = np.reshape(shum_store[idate,:,:,:], ngrid)
            uwnd_1d = np.reshape(uwnd_store[idate,:,:,:], ngrid)
            vwnd_1d = np.reshape(vwnd_store[idate,:,:,:], ngrid)
            #print ('idate, max, min temp_1d = ',idate,np.max(temp_1d), np.min(temp_1d))
            #print ('idate, max, min shum_1d = ',idate,np.max(shum_1d), np.min(shum_1d))
            #print ('idate, max, min uwnd_1d = ',idate,np.max(uwnd_1d), np.min(uwnd_1d))
            #print ('idate, max, min vwnd_1d = ',idate,np.max(vwnd_1d), np.min(vwnd_1d))

            X[idate,0:ngrid] = temp_1d[:]
            X[idate,ngrid:2*ngrid] = shum_1d[:]
            X[idate,2*ngrid:3*ngrid] = uwnd_1d[:]
            X[idate,3*ngrid:4*ngrid] = vwnd_1d[:]
                
            # --- for an n.5 -day forecast, pluck the HUC data offset by +n.5 days.
            #     got to get this data individually for the given month of each year.
            #     yyyymmddhh_store has the vector of initial condition dates.
            #     yyyymmddhh_hucs has the vector of HUC verification period (end)
    
            #print ('str(yyyymmddhh_store[idate] = ', str(yyyymmddhh_store[idate]))
            fcst_date = int(dateshift(str(yyyymmddhh_store[idate]), ifhour))
            if idate%30 == 0:
                now = datetime.now()
                current_time = now.strftime("%H:%M:%S")
                #print ('Finding HUC data for date = ', fcst_date,'. Current time is ', current_time)
            timeindex = yyyymmddhh_hucs.index(fcst_date)
            Y[idate,:] = precipHUCs[timeindex,:]**0.3333
  
        # --- apply standard deviation to Y, HUC data  
    
        print ('max, min X = ', np.max(X), np.min(X))
        print ('max, min Y after power transform ', np.max(Y), np.min(Y))
        Ystd = np.std(Y,axis=0)
        Ymean = np.mean(Y,axis=0)
        for idate in range(ktr):
            Y[idate,:] = (Y[idate,:]-Ymean[:]) / Ystd[:]
        print ('max, min Y after normalization', np.max(Y), np.min(Y))

        # --- perform the canonical correlation analysis

        now = datetime.now()
        current_time = now.strftime("%H:%M:%S")
        print ("Performing CCA.  Current time = ", current_time)
        plsr = PLSRegression(n_components=160, scale=False)
        plsr.fit(X,Y)
        Ypred = plsr.predict(X)
        print ('max, min Y = ', np.max(Y), np.min(Y))
        print ('max, min Ypred = ', np.max(Ypred), np.min(Ypred))
        for i in range(ktr):
            print (i,Ypred[i,163], Y[i,163])
        X_c, Y_c = plsr.transform(X, Y)
        print ('shape X_c, Y_c = ', np.shape(X_c), np.shape(Y_c))
        now = datetime.now()
        current_time = now.strftime("%H:%M:%S")
        print ("Done!  Current time = ", current_time)

        # ---- save to file

        outfile = 'cca/cca_data_month='+cmonth+'_lead='+clead+'days.cPick'
        print ('writing to ', outfile)
        ouf = open(outfile, 'wb')
        cPickle.dump(Ystd, ouf)
        cPickle.dump(Ymean, ouf)
        cPickle.dump(X_c, ouf)
        cPickle.dump(Y_c, ouf)
        cPickle.dump(Ypred, ouf)
        cPickle.dump(yyyymmddhh_store, ouf)
        ouf.close()




                                                                                                                                                                                                                                             ._cca_plot.py                                                                                       000775  000765  000024  00000000415 14016263143 013340  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2        
                                      ATTR      
                             com.apple.lastuseddate#PS           com.macromates.selectionRange      	     com.macromates.visibleIndex  `    {G    234:66978                                                                                                                                                                                                                                                   cca_plot.py                                                                                         000775  000765  000024  00000022363 14016263143 013131  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         """
cca_plot.py: user inputs the date and the forecast lead time. program
then loads in the HUC precipitation and the ERA-5 analysis data,
and it plots out the prediction from previously calculated CCA analysis.

"""

from netCDF4 import Dataset
import numpy as np
from dateutils import daterange, datetohrs, dateshift
from datetime import datetime
import sys
import os
from os import path
import numpy.ma as ma
import _pickle as cPickle
import matplotlib.pyplot as plt
from matplotlib import rcParams
from mpl_toolkits.basemap import Basemap
import scipy.stats as stats
from sklearn.cross_decomposition import CCA
import shapefile
from matplotlib.collections import LineCollection, PatchCollection
from matplotlib.patches import Polygon

rcParams['xtick.labelsize']='x-small'
rcParams['ytick.labelsize']='x-small'
rcParams['legend.fontsize']='xx-small'
rcParams['legend.fancybox']=True

def load_era5(cyear, cmonth, cvar):
    
    infile = 'cca/'+cyear+cmonth+'_'+cvar+'_era5.cPick'
    #print (infile)
    inf = open(infile,'rb')
    input_data = cPickle.load(inf)
    ntimes, nlevels, ny, nx = np.shape(input_data)
    lon = cPickle.load(inf)
    lat = cPickle.load(inf)
    yyyymmddhh = cPickle.load(inf)
    inf.close()
    return yyyymmddhh, input_data, ntimes, nlevels, ny, nx, lon, lat
    
def convert_to_yyyymmddhh(precipDates):
    
    # --- convert Matt's HUC date array to yyyymmddhh format, making the 
    #     assumption that the 7am local time is close enough to 12Z.
    
    npdates, nymd = np.shape(precipDates)
    yyyymmddhh = []
    #print ('npdates, nymd = ', npdates, nymd)
    for i in range(npdates):
        
        yyyy = str(precipDates[i,0])
        imm = precipDates[i,1]
        if imm < 10:
            cmm = '0'+str(imm)
        else:
            cmm = str(imm)
        idd = precipDates[i,2]
        if idd < 10:
            cdd = '0'+str(idd)
        else:
            cdd = str(idd)
        yyyymmddhh.append(int(yyyy+cmm+cdd+'12'))
        #print (precipDates[i,0], precipDates[i,1], precipDates[i,2], int(yyyy+cmm+cdd+'12'))
        #if i == 1000: sys.exit()
    return yyyymmddhh
        
def find_color_index(levels, Y):
    foundit = False
    n = len(levels)
    #print ('number of levels = ', n)
    for i in range(0,n-1):
        if Y >= levels[i] and Y < levels[i+1] and foundit == False:
            idxcolor = i
            foundit = True
    return idxcolor

# --- get inputs from command line

cdate = sys.argv[1]
clead = sys.argv[2]
cmonth = cdate[4:6]
cyear = cdate[0:4]
#cmonth = sys.argv[1] # 01 to 12
#clead = sys.argv[2] # forecast lead time in days, e.g., 2.5   Use half days so the 00 UTC
cyears = ['1981', '1982', '1983', '1984','1985', '1986', '1987', '1988', '1989', '1990', \
    '1991', '1992', '1993', '1994', '1995', '1996', '1997', '1998', '1999', '2000', \
    '2001', '2002', '2003', '2004', '2005', '2006', '2007', '2008', '2009', '2010', '2011']
nyears = len(cyears)
    
ifhour = int(float(clead)*24)
print ('forecast lead in hours: ', ifhour)
imonth = int(cmonth)

# --- read in the ERA5 data for this month.


yyyymmddhh_era5, input_data, ntimes, nlevels, ny, nx, lon, lat = \
    load_era5(cyear, cmonth, 'air')
nlons = len(lon)
nlats = len(lat)
lon2d, lat2d = np.meshgrid(lon,lat)
zeros = np.zeros((nlats, nlons), dtype=np.float32)    

# --- read in the HUC data provided by Matt Switanek

now = datetime.now()
current_time = now.strftime("%H:%M:%S")
print ('Reading HUC data. Current time is ', current_time)
f1 = open("cca/PRISM_pr_hucs_19810101-20180930.pickle",'rb')
precipHUCs = cPickle.load(f1) #the daily precip accumulations in mm (days,hucs)
ndayhucs, nhucs = np.shape(precipHUCs)
precipDates = cPickle.load(f1) #the dates
hucLats = cPickle.load(f1) #centroid lats of hucs
hucLons = cPickle.load(f1) #centroid lons of hucs
hucShapes = cPickle.load(f1) #embedded lists of huc boundaries
hucDivision4 = cPickle.load(f1) #the division 4 numeric codes of the hucs
f1.close()

# ---- read in the previously calculated CCA output

infile = 'cca/PLSR_regression_data_month='+cmonth+'_lead='+clead+'days.cPick'
print ('reading from ', infile)
inf = open(infile, 'rb')
Ypred_full = cPickle.load(inf)
yyyymmddhh_ICdates = cPickle.load(inf)
inf.close()
ys = np.shape(Ypred_full)
yzeros = np.zeros(ys,dtype=np.float32)
Ypred_full = np.where (Ypred_full < 0.0, yzeros, Ypred_full)

# ---- convert the HUC precipitation dates into yyyymmddhh format

now = datetime.now()
current_time = now.strftime("%H:%M:%S")
print ('Converting HUC precipitation dates.  Current time is ', current_time)
yyyymmddhh_hucs = convert_to_yyyymmddhh(precipDates)

# ---- for the chosen month, load the ERA5 analysis data and extract the analysis at the initial time

yyyymmddhh, temp, ntimes, nlevels, ny, nx, lon, lat = load_era5(cyear, cmonth, 'air')
yyyymmddhh, shum, ntimes, nlevels, ny, nx, lon, lat = load_era5(cyear, cmonth, 'shum')    
yyyymmddhh, uwnd, ntimes, nlevels, ny, nx, lon, lat = load_era5(cyear, cmonth, 'uwnd')    
yyyymmddhh, vwnd, ntimes, nlevels, ny, nx, lon, lat = load_era5(cyear, cmonth, 'vwnd')
yyyymmddhh_list = yyyymmddhh.tolist()

timeindex = yyyymmddhh_list.index(int(cdate))    
temp_day = temp[timeindex,:,:]  
shum_day = shum[timeindex,:,:]  
uwnd_day = uwnd[timeindex,:,:]  
vwnd_day = vwnd[timeindex,:,:]  
    
# --- for an n.5 -day forecast, pluck the HUC data offset by +n.5 days.
#     got to get this data individually for the given month of each year.
#     yyyymmddhh_store has the vector of initial condition dates.
#     yyyymmddhh_hucs has the vector of HUC verification period (end)
    
fcst_date = int(dateshift(cdate, ifhour))
timeindex_PLSR = np.where(yyyymmddhh_ICdates == int(cdate)) [0]
Ypred2 = np.squeeze(Ypred_full[timeindex_PLSR,:])
timeindex_HUC = yyyymmddhh_hucs.index(fcst_date)
Y = precipHUCs[timeindex_HUC,:]

# --- get the patches and colors for the analyzed and forecast precip

patches_analyzed = []
patches_forecast = []
colors_analyzed = []
colors_forecast = []
levels = [0.0,0.1,0.3,0.5,1.0, 1.5,2.0,3.0,5.0,10.0,20.0,30.0,40.0,50.0,200.]
colorst = ['White','#E4FFFF','#C4E8FF','#8FB3FF','#D8F9D8',\
    '#A6ECA6','#42F742','Yellow','Gold','Orange','#FCD5D9','#F6A3AE',\
    '#FA5257','Orchid','#AD8ADB','#A449FF','LightGray'] 
        
# --- now plot the analyzed

fig1 = plt.figure(figsize=(10,6.2))
axloc = [0.02,0.12,0.96,0.77]
ax1 = fig1.add_axes(axloc)
ax1.set_title('HUC4 analyzed precipitation for 24 hours ending '+ \
    str(fcst_date), fontsize=16,color='Black')
map = Basemap(llcrnrlon=-125,llcrnrlat=25,urcrnrlon=-65,urcrnrlat=51.,
    resolution='l', projection='mill')
patches_analyzed = []
colors_analyzed = []
for v in range(0,202): 
    #print (v, Y[v], levels)
    idxcolor = find_color_index(levels, Y[v])
    for v2 in range(0,len(hucShapes[v])):   
        a = hucShapes[v][v2]
        xlon = a[:,0]
        ylat = a[:,1]
        xs, ys = map(xlon, ylat) 
        patches_analyzed.append(Polygon(np.column_stack([xs, ys]), True) ) 
        colors_analyzed.append(colorst[idxcolor])
ax1.add_collection(PatchCollection(patches_analyzed, facecolor=colors_analyzed, \
    edgecolor='Gray', linewidths=0.3, zorder=2)) 
xc, yc = map(lon2d, lat2d)
cs1 = map.contourf(xc, yc, zeros, levels, colors=colorst,extend='neither')   
map.drawcoastlines()
map.drawcountries()
map.drawstates()

cax = fig1.add_axes([0.02,0.07,0.96,0.02])
cbar = plt.colorbar(cs1, orientation='horizontal',\
    cax=cax, extend='both', ticks=levels[0:-1], format='%g') 
cbar.ax.tick_params(labelsize=9)
cbar.set_label('24-h accumulated precipitation amount (mm)')

# ---- set plot title

plot_title = 'analyzed_precipitation_HUCs_'+str(fcst_date)+'.png'
print ('saving plot to ', plot_title)
fig1.savefig(plot_title, dpi=300)
print ('saving plot to file = ',plot_title)
print ('Done!')


# --- now plot the analyzed

fig1 = plt.figure(figsize=(10,6.2))
axloc = [0.02,0.12,0.96,0.77]
ax1 = fig1.add_axes(axloc)
ax1.set_title('HUC4 PLSR forecast precipitation for 24 hours ending '+ str(fcst_date)+\
    ' lead = '+clead+' days', fontsize=16,color='Black')
map = Basemap(llcrnrlon=-125,llcrnrlat=25,urcrnrlon=-65,urcrnrlat=51.,
    resolution='l', projection='mill')
patches_forecast = []
colors_forecast = []
print ('max, min Ypred2 = ', np.max(Ypred2), np.min(Ypred2))
for v in range(0,202): 
    #print (v, Ypred2[v], levels)
    idxcolor = find_color_index(levels, Ypred2[v])
    for v2 in range(0,len(hucShapes[v])):   
        a = hucShapes[v][v2]
        xlon = a[:,0]
        ylat = a[:,1]
        xs, ys = map(xlon, ylat) 
        patches_forecast.append(Polygon(np.column_stack([xs, ys]), True) ) 
        colors_forecast.append(colorst[idxcolor])

ax1.add_collection(PatchCollection(patches_forecast, facecolor=colors_forecast, \
    edgecolor='Gray', linewidths=0.3, zorder=2)) 
xc, yc = map(lon2d, lat2d)
cs1 = map.contourf(xc, yc, zeros, levels, colors=colorst,extend='neither')   
map.drawcoastlines()
map.drawcountries()
map.drawstates()

cax = fig1.add_axes([0.02,0.07,0.96,0.02])
cbar = plt.colorbar(cs1, orientation='horizontal',\
    cax=cax, extend='both', ticks=levels[0:-1], format='%g') 
cbar.ax.tick_params(labelsize=9)
cbar.set_label('24-h accumulated precipitation amount (mm)')

# ---- set plot title

plot_title = 'forecast_precipitation_IC='+cdate+'_lead='+clead+'days_HUCs.png'
print ('saving plot to ', plot_title)
fig1.savefig(plot_title, dpi=300)
print ('saving plot to file = ',plot_title)
print ('Done!')



                                                                                                                                                                                                                                                                                   ._cca_xval.py                                                                                       000775  000765  000024  00000000417 14016263143 013336  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2                                              ATTR                                   com.apple.lastuseddate#PS         
  com.macromates.selectionRange           com.macromates.visibleIndex  `    'H    11-11:30+50                                                                                                                                                                                                                                                 cca_xval.py                                                                                         000775  000765  000024  00000030312 14016263143 013116  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         """
cca.py: user inputs the month of choice and the forecast lead time. program
then loads in the HUC precipitation and the ERA-5 analysis data,
and it munges the data to make it suitable for Canonical Correlation Analysis.
It calls scipy.stats library routine to perform the CCA and saves output.
"""

from netCDF4 import Dataset
import numpy as np
from dateutils import daterange, datetohrs, dateshift
from datetime import datetime
import sys
import os
from os import path
import numpy.ma as ma
import _pickle as cPickle
import matplotlib.pyplot as plt
from matplotlib import rcParams
from mpl_toolkits.basemap import Basemap
import scipy.stats as stats
from sklearn.cross_decomposition import PLSRegression

# ==================================================================
    
def load_era5(cyear, cmonth, cvar):
    """ read the ERA5 reanalysis data at 4 vertical levels, 
    NHem, averaged to 5 degrees """
    infile = 'cca/'+cyear+cmonth+'_'+cvar+'_era5_5deg.cPick'
    #print (infile)
    inf = open(infile,'rb')
    input_data = cPickle.load(inf)
    ntimes, nlevels, ny, nx = np.shape(input_data)
    lon = cPickle.load(inf)
    lat = cPickle.load(inf)
    yyyymmddhh = cPickle.load(inf)
    inf.close()
    return yyyymmddhh, input_data, ntimes, nlevels, ny, nx, lon, lat
    
# ==================================================================
    
def convert_to_yyyymmddhh(precipDates):
    
    # --- convert Matt's HUC date array to yyyymmddhh format, making the 
    #     assumption that the 7am local time is close enough to 12Z.
    
    npdates, nymd = np.shape(precipDates)
    yyyymmddhh = []
    #print ('npdates, nymd = ', npdates, nymd)
    for i in range(npdates):
        
        yyyy = str(precipDates[i,0])
        imm = precipDates[i,1]
        if imm < 10:
            cmm = '0'+str(imm)
        else:
            cmm = str(imm)
        idd = precipDates[i,2]
        if idd < 10:
            cdd = '0'+str(idd)
        else:
            cdd = str(idd)
        yyyymmddhh.append(int(yyyy+cmm+cdd+'12'))
        #print (precipDates[i,0], precipDates[i,1], \
        #precipDates[i,2], int(yyyy+cmm+cdd+'12'))
        #if i == 1000: sys.exit()
    return yyyymmddhh
    
# ==================================================================

def compute_n_sampledays(cmonth):
    """ compute the number of days of samples """    

    imonth = int(cmonth)
    if imonth == 1 or imonth == 3 or imonth == 5 or imonth == 7 \
    or imonth == 8 or imonth == 10 or imonth == 12:
        ndays_total = 31*nyears
    elif imonth == 4 or imonth == 6 or imonth == 9 or imonth == 11:
        ndays_total = 30*nyears 
    elif imonth == 2:
        ndays_total = 9*29 + (nyears-9)*28 
    return ndays_total
    
# ==================================================================

def read_HUC_data():
            
    """ read in the HUC data provided by Matt Switanek """

    f1 = open("cca/PRISM_pr_hucs_19810101-20180930.pickle",'rb')
    precipHUCs = cPickle.load(f1) #the daily precip accum in mm (days,hucs)
    ndayhucs, nhucs = np.shape(precipHUCs)
    precipDates = cPickle.load(f1) #the dates
    hucLats = cPickle.load(f1) #centroid lats of hucs
    hucLons = cPickle.load(f1) #centroid lons of hucs
    hucShapes = cPickle.load(f1) #embedded lists of huc boundaries
    hucDivision4 = cPickle.load(f1) #the div 4 numeric codes of the hucs
    f1.close()  
    return precipHUCs, ndayhucs, nhucs, precipDates, hucLats,\
         hucLons, hucShapes, hucDivision4      
        
# ==================================================================

def control_load_era5(cyears, cmonth):
    """ ERA5 was upscaled on Tom Hamill's mac24 in cca directory from
    Cathy Smith data store of ERA5 data, copied to /Public/thamill, 
    then ftped to Tom's home computer """

    ktr = 0
    for iyear, cyear in enumerate(cyears):
    
        yyyymmddhh, temp, ntimes, nlevels, ny, nx, lon, lat = \
            load_era5(cyear, cmonth, 'air')
        yyyymmddhh, shum, ntimes, nlevels, ny, nx, lon, lat = \
            load_era5(cyear, cmonth, 'shum')    
        yyyymmddhh, uwnd, ntimes, nlevels, ny, nx, lon, lat = \
            load_era5(cyear, cmonth, 'uwnd')    
        yyyymmddhh, vwnd, ntimes, nlevels, ny, nx, lon, lat = \
            load_era5(cyear, cmonth, 'vwnd') 
                
        if iyear == 0:
            zero_store = np.zeros((ntimes, nlevels, ny, nx), dtype=np.float32)
            temp_store = np.zeros((ndays_total,4,ny//2, nx), dtype=np.float64) 
            shum_store = np.zeros((ndays_total,4,ny//2, nx), dtype=np.float64)
            uwnd_store = np.zeros((ndays_total,4,ny//2, nx), dtype=np.float64)
            vwnd_store = np.zeros((ndays_total,4,ny//2, nx), dtype=np.float64)
            yyyymmddhh_store = np.zeros((ndays_total), dtype=np.int32)
                
        shum = np.where(shum < 0.0, zero_store, shum)
        temp_store[ktr:ktr+ntimes,:,:,:] = temp[:,:,0:ny//2,:]
        shum_store[ktr:ktr+ntimes,:,:,:] = shum[:,:,0:ny//2,:]**0.3333
        uwnd_store[ktr:ktr+ntimes,:,:,:] = uwnd[:,:,0:ny//2,:]
        vwnd_store[ktr:ktr+ntimes,:,:,:] = vwnd[:,:,0:ny//2,:]
        yyyymmddhh_store[ktr:ktr+ntimes] = yyyymmddhh[:]
        ktr = ktr + ntimes 
    return zero_store, temp_store, shum_store, uwnd_store, \
        vwnd_store, yyyymmddhh_store, ktr, nx, ny//2,        

# ==================================================================

def standard_normal(data_store,ktr):
    """ computed standardized anomalies, return in same data structure """
    
    data_stddev = np.std(data_store, axis=0)
    data_mean = np.mean(data_store, axis=0)
    for idate in range(ktr):
        data_store[idate,:,:,:] = (data_store[idate,:,:,:] - data_mean[:,:,:]) / data_stddev[:,:,:]
    return data_store
    
# ==================================================================
       
def munge_predictors_predictand_to_2darray(nx, ny, ktr, nhucs, temp_store, \
    shum_store, uwnd_store, vwnd_store, precipHUCs, yyyymmddhh_hucs, \
    yyyymmddhh_store, ifhour):       

    # --- rearrange data; first index is time, second index is 1-d composite vector
    #     of the standardized temperature, specific humidity, and u- and v-wind 
    #     components

    ngrid = nx*ny*4
    X = np.zeros((ktr, ngrid*4), dtype=np.float64) # 4 as there are four fields
    Y = np.zeros((ktr, nhucs), dtype=np.float64)
    print ('ktr = ', ktr)
    for idate in range(ktr):
        temp_1d = np.reshape(temp_store[idate,:,:,:], ngrid)
        shum_1d = np.reshape(shum_store[idate,:,:,:], ngrid)
        uwnd_1d = np.reshape(uwnd_store[idate,:,:,:], ngrid)
        vwnd_1d = np.reshape(vwnd_store[idate,:,:,:], ngrid)
        X[idate,0:ngrid] = temp_1d[:]
        X[idate,ngrid:2*ngrid] = shum_1d[:]
        X[idate,2*ngrid:3*ngrid] = uwnd_1d[:]
        X[idate,3*ngrid:4*ngrid] = vwnd_1d[:]
                
        # --- for an n.5 -day forecast, pluck the HUC data offset by +n.5 days.
        #     got to get this data individually for the given month of each year.
        #     yyyymmddhh_store has the vector of initial condition dates.
        #     yyyymmddhh_hucs has the vector of HUC verification period (end)
    
        #print ('str(yyyymmddhh_store[idate] = ', str(yyyymmddhh_store[idate]))
        fcst_date = int(dateshift(str(yyyymmddhh_store[idate]), ifhour))
        timeindex = yyyymmddhh_hucs.index(fcst_date)
        Y[idate,:] = precipHUCs[timeindex,:]**0.3333
        
    return X, Y

# ==================================================================

def separate_train_validation (X, Y, ixval, nxval, ktr):

    """separate data into training and validation parts
    for cross validation """
    
    ixstart = (ktr*ixval) // nxval
    ixend = ixstart + ktr//nxval
    ival = [*range(ixstart, ixend)]
    nval = len(ival)
    itrain = [*range(ktr)]
    del itrain[ixstart:ixend]
    ntrain = len(itrain)

    #ival = range(ixval,ktr,nxval)
    #itrain = []
    #for ix in range(nxval):
    #    if itrain != ixval:
    #        itrain = itrain + range(ix,ktr,nxval)
    #ntrain = len(itrain)
    
    Xtrain = X[itrain,:]
    Ytrain = Y[itrain,:]
    Xval = X[ival,:]
    Yval = Y[ival,:]

    return Xtrain, Xval, Ytrain, Yval, ntrain, \
        nval, itrain, ival

# ==================================================================
        
# --- get inputs from command line
#cmonth = sys.argv[1] # 01 to 12
#clead = sys.argv[2] # forecast lead time in days, e.g., 2.5   Use half days so the 00 UTC
cyears = \
    ['1981', '1982', '1983', '1984','1985', '1986', \
    '1987', '1988', '1989', '1990', '1991', '1992', \
    '1993', '1994', '1995', '1996', '1997', '1998', \
    '1999', '2000', '2001', '2002', '2003', '2004', \
    '2005', '2006', '2007', '2008', '2009', '2010', \
    '2011', '2012', '2013', '2014', '2015', '2016'] # , '2017']
nyears = len(cyears)
   
#for cmonth in ['01','02','03','04','05','06','07','08','09','10','11']:

# --- perform cross validation over 6 groups of 6 years.

nxval = 6
for cmonth in ['03']:
    ndays_total = compute_n_sampledays(cmonth)
    for clead in ['1.5']:
        ifhour = int(float(clead)*24)
        for ixval in range(nxval): 
            
            # ---- read analyzed precipitation in HUC4 boundaries. 
            
            precipHUCs, ndayhucs, nhucs, precipDates, hucLats,\
                hucLons, hucShapes, hucDivision4 = read_HUC_data()

            # ---- convert the precipitation dates into yyyymmddhh format used w. ERA5 data

            now = datetime.now()
            current_time = now.strftime("%H:%M:%S")
            print ('Converting HUC precipitation dates.  Current time is ', current_time)
            yyyymmddhh_hucs = convert_to_yyyymmddhh(precipDates)

            # ---- for the chosen month, load the ERA5 analysis data over the multiple 
            #      years

            zero_store, temp_store, shum_store, uwnd_store, vwnd_store, \
                yyyymmddhh_store, ktr, nx, ny = control_load_era5(cyears, cmonth)
    
            # --- convert to a standard normal deviate.

            temp_store = standard_normal(temp_store, ktr)
            shum_store = standard_normal(shum_store, ktr)
            uwnd_store = standard_normal(uwnd_store, ktr)
            vwnd_store = standard_normal(vwnd_store, ktr)

            X, Y = munge_predictors_predictand_to_2darray \
                (nx, ny, ktr, nhucs, temp_store, \
                shum_store, uwnd_store, vwnd_store, precipHUCs, \
                yyyymmddhh_hucs, yyyymmddhh_store, ifhour)      

            # --- apply standard deviation to Y, HUC data  
    
            Ystd = np.std(Y,axis=0)
            Ymean = np.mean(Y,axis=0)
            for idate in range(ktr):
                Y[idate,:] = (Y[idate,:]-Ymean[:]) / Ystd[:]

            # --- separate the data into training and validation parts
            
            Xtrain, Xval, Ytrain, Yval, ntrain, nval, itrain, ival = \
                separate_train_validation (X, Y, ixval, nxval, ktr)

            # --- perform the partial-least-squares regression on the
            #     training data.

            now = datetime.now()
            current_time = now.strftime("%H:%M:%S")
            print ("Starting PLSR.  Current time = ", current_time)
            plsr = PLSRegression(n_components=160, scale=False)
            plsr.fit(Xtrain,Ytrain)
            Ypred = plsr.predict(Xval)
            now = datetime.now()
            current_time = now.strftime("%H:%M:%S")
            print ("Finished PLSR.  Current time = ", current_time)
            
            # ---- reconstitute the full vector of forecast data, including
            #      back transformation to original space.

            if ixval == 0: Ypred_full = np.copy(Y)
            print ('np.shape(Ypred_full[ival,:]) = ', np.shape(Ypred_full[ival,:]))
            print ('np.shape(Ypred[:,:]) = ', np.shape(Ypred[:,:]))
            print ('np.shape(Ystd[:]) = ', np.shape(Ystd[:]))
            print ('np.shape(Ymean[:]) = ', np.shape(Ymean[:]))
            
            Ypred_full[ival,:] = (Ypred[:,:]*Ystd[:] + Ymean[:])**3.0
            
        # ---- save cross-validated predicted precipitation forecasts to file

        outfile = 'cca/PLSR_regression_data_month='+cmonth+'_lead='+clead+'days.cPick'
        print ('writing to ', outfile)
        ouf = open(outfile, 'wb')
        cPickle.dump(Ypred_full, ouf)
        cPickle.dump(yyyymmddhh_store, ouf)
        ouf.close()
                                                                                                                                                                                                                                                                                                                            ._cca_xval_2deg.py                                                                                  000775  000765  000024  00000000417 14016263143 014237  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2                                              ATTR                                   com.apple.lastuseddate#PS           com.macromates.selectionRange      
     com.macromates.visibleIndex  `    H    386:4814674                                                                                                                                                                                                                                                 cca_xval_2deg.py                                                                                    000775  000765  000024  00000040464 14016263143 014030  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         """
cca.py: user inputs the month of choice and the forecast lead time. program
then loads in the HUC precipitation and the ERA-5 analysis data,
and it munges the data to make it suitable for Canonical Correlation Analysis.
It calls scipy.stats library routine to perform the CCA and saves output.
"""

from netCDF4 import Dataset
import numpy as np
from dateutils import daterange, datetohrs, dateshift
from datetime import datetime
import sys
import os
from os import path
import numpy.ma as ma
import _pickle as cPickle
import matplotlib.pyplot as plt
from matplotlib import rcParams
from mpl_toolkits.basemap import Basemap
import scipy.stats as stats
from sklearn.cross_decomposition import PLSRegression

# ==================================================================
    
def load_era5(cyear, cmonth, cvar):
    """ read the ERA5 reanalysis data at 4 vertical levels, 
    NHem, averaged to 2 degrees """
    infile = 'cca/'+cyear+cmonth+'_'+cvar+'_era5_2deg.cPick'
    #print (infile)
    inf = open(infile,'rb')
    input_data = cPickle.load(inf)
    ntimes, nlevels, ny, nx = np.shape(input_data)
    lon = cPickle.load(inf)
    lat = cPickle.load(inf)
    yyyymmddhh = cPickle.load(inf)
    inf.close()
    return yyyymmddhh, input_data, ntimes, nlevels, ny, nx, lon, lat

# ==================================================================
    
def load_era5_monolevel(cyear, cmonth, cvar):
    
    """ read the ERA5 reanalysis data at single levels, 
    NHem, averaged to 2 degrees """
    infile = 'cca/'+cyear+cmonth+'_'+cvar+'_era5_2deg.cPick'
    #print (infile)
    inf = open(infile,'rb')
    input_data = cPickle.load(inf)
    ntimes, ny, nx = np.shape(input_data)
    lon = cPickle.load(inf)
    lat = cPickle.load(inf)
    yyyymmddhh = cPickle.load(inf)
    inf.close()
    return yyyymmddhh, input_data, ntimes,  ny, nx, lon, lat
    
# ==================================================================
    
def convert_to_yyyymmddhh(precipDates):
    
    # --- convert Matt's HUC date array to yyyymmddhh format, making the 
    #     assumption that the 7am local time is close enough to 12Z.
    
    npdates, nymd = np.shape(precipDates)
    yyyymmddhh = []
    for i in range(npdates):
        
        yyyy = str(precipDates[i,0])
        imm = precipDates[i,1]
        if imm < 10:
            cmm = '0'+str(imm)
        else:
            cmm = str(imm)
        idd = precipDates[i,2]
        if idd < 10:
            cdd = '0'+str(idd)
        else:
            cdd = str(idd)
        yyyymmddhh.append(int(yyyy+cmm+cdd+'12'))
    return yyyymmddhh
    
# ==================================================================

def compute_n_sampledays(cmonth):
    """ compute the number of days of samples """    

    imonth = int(cmonth)
    if imonth == 1 or imonth == 3 or imonth == 5 or imonth == 7 \
    or imonth == 8 or imonth == 10 or imonth == 12:
        ndays_total = 31*nyears
    elif imonth == 4 or imonth == 6 or imonth == 9 or imonth == 11:
        ndays_total = 30*nyears 
    elif imonth == 2:
        ndays_total = 9*29 + (nyears-9)*28 
    return ndays_total
    
# ==================================================================

def read_HUC_data():
            
    """ read in the HUC data provided by Matt Switanek """

    f1 = open("cca/PRISM_pr_hucs_19810101-20180930.pickle",'rb')
    precipHUCs = cPickle.load(f1) #the daily precip accum in mm (days,hucs)
    ndayhucs, nhucs = np.shape(precipHUCs)
    precipDates = cPickle.load(f1) #the dates
    hucLats = cPickle.load(f1) #centroid lats of hucs
    hucLons = cPickle.load(f1) #centroid lons of hucs
    hucShapes = cPickle.load(f1) #embedded lists of huc boundaries
    hucDivision4 = cPickle.load(f1) #the div 4 numeric codes of the hucs
    f1.close()  
    return precipHUCs, ndayhucs, nhucs, precipDates, hucLats,\
         hucLons, hucShapes, hucDivision4      
        
# ==================================================================        
        
def set_before_after(cmonth):
    if cmonth == '01':
        cmonth_before = '12'
        cmonth_after = '02'
    elif cmonth == '02':
        cmonth_before = '01'
        cmonth_after = '03'
    elif cmonth == '03':
        cmonth_before = '02'
        cmonth_after = '04'
    elif cmonth == '04':
        cmonth_before = '03'
        cmonth_after = '05'
    elif cmonth == '05':
        cmonth_before = '04'
        cmonth_after = '06'
    elif cmonth == '06':
        cmonth_before = '05'
        cmonth_after = '07'
    elif cmonth == '07':
        cmonth_before = '06'
        cmonth_after = '08'
    elif cmonth == '08':
        cmonth_before = '07'
        cmonth_after = '09'
    elif cmonth == '09':
        cmonth_before = '08'
        cmonth_after = '10'
    elif cmonth == '10':
        cmonth_before = '09'
        cmonth_after = '11'
    elif cmonth == '11':
        cmonth_before = '10'
        cmonth_after = '12'
    elif cmonth == '12':
        cmonth_before = '11'
        cmonth_after = '01'
    else:
        print ('invalid month! ', cmonth)
        sys.exit()        

    return cmonth_before, cmonth_after 

# ==================================================================

def control_load_era5(cyears, cmonth, cmonth_before, cmonth_after, ndays_total):
    """ ERA5 was upscaled on Tom Hamill's mac24 in cca directory from
    Cathy Smith data store of ERA5 data, copied to /Public/thamill, 
    then ftped to Tom's home computer """

    ktr = 0
    for iyear, cyear in enumerate(cyears):
        
        for cm in [cmonth, cmonth_before, cmonth_after]:
    
            yyyymmddhh, temp, ntimes, nlevels, ny, nx, lon, lat = \
                load_era5(cyear, cm, 'air')
            yyyymmddhh, shum, ntimes, nlevels, ny, nx, lon, lat = \
                load_era5(cyear, cm, 'shum')    
            yyyymmddhh, uwnd, ntimes, nlevels, ny, nx, lon, lat = \
                load_era5(cyear, cm, 'uwnd')    
            yyyymmddhh, vwnd, ntimes, nlevels, ny, nx, lon, lat = \
                load_era5(cyear, cm, 'vwnd') 
            yyyymmddhh, mslp, ntimes, ny, nx, lon, lat = \
                load_era5_monolevel(cyear, cm, 'prmsl')
            yyyymmddhh, pwat, ntimes, ny, nx, lon, lat = \
                load_era5_monolevel(cyear, cm, 'pr_wtr')    
                
            zero_store = np.zeros((ntimes, nlevels, ny, nx), dtype=np.float32)    
            if iyear == 0 and cm == cmonth:
                temp_store = np.zeros((ndays_total,4,ny//2, nx), dtype=np.float64) 
                shum_store = np.zeros((ndays_total,4,ny//2, nx), dtype=np.float64)
                uwnd_store = np.zeros((ndays_total,4,ny//2, nx), dtype=np.float64)
                vwnd_store = np.zeros((ndays_total,4,ny//2, nx), dtype=np.float64)
                mslp_store = np.zeros((ndays_total, ny//2, nx), dtype=np.float64)
                pwat_store = np.zeros((ndays_total, ny//2, nx), dtype=np.float64)
                yyyymmddhh_store = np.zeros((ndays_total), dtype=np.int32)
                
            shum = np.where(shum < 0.0, zero_store, shum)
            temp_store[ktr:ktr+ntimes,:,:,:] = temp[:,:,0:ny//2,:]
            shum_store[ktr:ktr+ntimes,:,:,:] = shum[:,:,0:ny//2,:]**0.3333
            uwnd_store[ktr:ktr+ntimes,:,:,:] = uwnd[:,:,0:ny//2,:]
            vwnd_store[ktr:ktr+ntimes,:,:,:] = vwnd[:,:,0:ny//2,:]
            mslp_store[ktr:ktr+ntimes,:,:]   = mslp[:,0:ny//2,:]
            pwat_store[ktr:ktr+ntimes,:,:]   = pwat[:,0:ny//2,:]
            yyyymmddhh_store[ktr:ktr+ntimes] = yyyymmddhh[:]
            ktr = ktr + ntimes 
    
    return zero_store, temp_store, shum_store, uwnd_store, \
        vwnd_store, mslp_store, pwat_store, yyyymmddhh_store, \
        ktr, nx, ny//2        

# ==================================================================

def standard_normal(data_store,ktr):
    """ computed standardized anomalies, return in same data structure """
    
    ndims = np.ndim(data_store)    
    data_stddev = np.std(data_store, axis=0)
    data_mean = np.mean(data_store, axis=0)
    for idate in range(ktr):
        if ndims == 4:
            data_store[idate,:,:,:] = (data_store[idate,:,:,:] - data_mean[:,:,:]) / data_stddev[:,:,:]
        else:
            data_store[idate,:,:] = (data_store[idate,:,:] - data_mean[:,:]) / data_stddev[:,:]
    return data_store
    
# ==================================================================
       
def munge_predictors_predictand_to_2darray(nx, ny, ktr, nhucs, \
    hucnumber, temp_store, shum_store, uwnd_store, vwnd_store, \
    mslp_store, pwat_store, precipHUCs, yyyymmddhh_hucs, \
    yyyymmddhh_store, ifhour):       

    # --- rearrange data; first index is time, second index is 1-d composite vector
    #     of the standardized temperature, specific humidity, and u- and v-wind 
    #     components

    ngrid = nx*ny*4
    X = np.zeros((ktr, ngrid*4 + 2*nx*ny), dtype=np.float64) # 4 as there are four fields
    #Y = np.zeros((ktr, 1), dtype=np.float64)
    Y = np.zeros((ktr, nhucs), dtype=np.float64)
    for idate in range(ktr):
        temp_1d = np.reshape(temp_store[idate,:,:,:], ngrid)
        shum_1d = np.reshape(shum_store[idate,:,:,:], ngrid)
        uwnd_1d = np.reshape(uwnd_store[idate,:,:,:], ngrid)
        vwnd_1d = np.reshape(vwnd_store[idate,:,:,:], ngrid)
        mslp_1d = np.reshape(mslp_store[idate,:,:], ngrid//4)
        pwat_1d = np.reshape(pwat_store[idate,:,:], ngrid//4)
        X[idate,0:ngrid] = temp_1d[:]
        X[idate,ngrid:2*ngrid] = shum_1d[:]
        X[idate,2*ngrid:3*ngrid] = uwnd_1d[:]
        X[idate,3*ngrid:4*ngrid] = vwnd_1d[:]
        X[idate,4*ngrid:4*ngrid+nx*ny] = mslp_1d[:]
        X[idate,4*ngrid+nx*ny:4*ngrid+2*nx*ny] = pwat_1d[:]
                
        # --- for an n.5 -day forecast, pluck the HUC data offset by +n.5 days.
        #     got to get this data individually for the given month of each year.
        #     yyyymmddhh_store has the vector of initial condition dates.
        #     yyyymmddhh_hucs has the vector of HUC verification period (end)
    
        fcst_date = int(dateshift(str(yyyymmddhh_store[idate]), ifhour))
        timeindex = yyyymmddhh_hucs.index(fcst_date)
        Y[idate,:] = precipHUCs[timeindex,:]**0.3333
        #Y[idate,0] = precipHUCs[timeindex,hucnumber]**0.3333
        
    return X, Y

# ==================================================================

def separate_train_validation (X, Y, ixval, nxval, ktr):

    """separate data into training and validation parts
    for cross validation """
    
    ixstart = (ktr*ixval) // nxval
    ixend = ixstart + ktr//nxval
    ival = [*range(ixstart, ixend)]
    nval = len(ival)
    itrain = [*range(ktr)]
    del itrain[ixstart:ixend]
    ntrain = len(itrain)
    Xtrain = X[itrain,:]
    Ytrain = Y[itrain,:]
    Xval = X[ival,:]
    Yval = Y[ival,:]
    return Xtrain, Xval, Ytrain, Yval, ntrain, \
        nval, itrain, ival

# ==================================================================
        
# --- get inputs from command line
#cmonth = sys.argv[1] # 01 to 12
#clead = sys.argv[2] # forecast lead time in days, e.g., 2.5   Use half days so the 00 UTC
cyears = \
    ['1981', '1982', '1983', '1984','1985', '1986', \
    '1987', '1988', '1989', '1990', '1991', '1992', \
    '1993', '1994', '1995', '1996', '1997', '1998', \
    '1999', '2000', '2001', '2002', '2003', '2004', \
    '2005', '2006', '2007', '2008', '2009', '2010', \
    '2011', '2012', '2013', '2014', '2015', '2016'] # , '2017']
nyears = len(cyears)
   
#for cmonth in ['01','02','03','04','05','06','07','08','09','10','11']:

# --- perform cross validation over 6 groups of 6 years.

nxval = 6
hucnumber = 112
for cmonth in ['03']:
    cmonth_before, cmonth_after = set_before_after(cmonth)
    nc = compute_n_sampledays(cmonth) 
    na = compute_n_sampledays(cmonth_after) 
    nb = compute_n_sampledays(cmonth_before)
    ndays_total = na+nb+nc
    for clead in ['1.5']:
        ifhour = int(float(clead)*24)
        
        # ---- read analyzed precipitation in HUC4 boundaries. 
        
        precipHUCs, ndayhucs, nhucs, precipDates, hucLats,\
            hucLons, hucShapes, hucDivision4 = read_HUC_data()
            
        # ---- convert the precipitation dates into yyyymmddhh format used w. ERA5 data

        now = datetime.now()
        current_time = now.strftime("%H:%M:%S")
        print ('Converting HUC precipitation dates.  Current time is ', current_time)
        yyyymmddhh_hucs = convert_to_yyyymmddhh(precipDates)

        # ---- for the chosen month, load the ERA5 analysis data over the multiple 
        #      years

        now = datetime.now()
        current_time = now.strftime("%H:%M:%S")
        print ("Loading ERA5 data.  Current time = ", current_time)
        zero_store, temp_store, shum_store, uwnd_store, vwnd_store, mslp_store, \
            pwat_store, yyyymmddhh_store, ktr, nx, ny = \
            control_load_era5(cyears, cmonth, cmonth_before, cmonth_after, ndays_total)
    
        # ---- reshape arrays into predictors (X) and predictand (Y).   Y is
        #      exponentiated (**0.3333)

        temp_store = standard_normal(temp_store, ktr)
        shum_store = standard_normal(shum_store, ktr)
        uwnd_store = standard_normal(uwnd_store, ktr)
        vwnd_store = standard_normal(vwnd_store, ktr)
        mslp_store = standard_normal(mslp_store, ktr)
        pwat_store = standard_normal(pwat_store, ktr)

        now = datetime.now()
        current_time = now.strftime("%H:%M:%S")
        print ("Calling munge_predictors_predictand_to_2darray .  Current time = ", current_time)
        X, Y = munge_predictors_predictand_to_2darray \
            (nx, ny, ktr, nhucs, hucnumber, temp_store, \
            shum_store, uwnd_store, vwnd_store, mslp_store, \
            pwat_store, precipHUCs, yyyymmddhh_hucs, \
            yyyymmddhh_store, ifhour)    

        # --- convert to a standard normal deviate. apply standard deviation to Y, HUC data  
    
        now = datetime.now()
        current_time = now.strftime("%H:%M:%S")
        print ("Computing mean and std dev.  Current time = ", current_time)
        Ystd = np.std(Y,axis=0)
        Ymean = np.mean(Y,axis=0)
        for idate in range(ktr):
            Y[idate,:] = (Y[idate,:]-Ymean[:]) / Ystd[:]
        Ypred_full = np.copy(Y)  

        for ixval in range(nxval): 

            print ('**** performing cross validation ',ixval,' of ',nxval)
            
            # --- separate the data into training and validation parts. itrain
            #     and ival contain the associated indices of training, validation data
            #     within the X, Y arrays
            
            print ('calling separate train_validation, ktr = ', ktr)
            print ('np.shape(X) = ', np.shape(X))
            print ('np.shape(Y) = ', np.shape(Y))
            Xtrain, Xval, Ytrain, Yval, ntrain, nval, itrain, ival = \
                separate_train_validation (X, Y, ixval, nxval, ktr)

            # --- perform the partial-least-squares regression on the
            #     training data.

            now = datetime.now()
            current_time = now.strftime("%H:%M:%S")
            print ("Starting PLSR.  Current time = ", current_time)
            plsr = PLSRegression(n_components=40, scale=False) # , max_iter=150)
            plsr.fit(Xtrain,Ytrain)
            Ypred = plsr.predict(Xval)
            now = datetime.now()
            current_time = now.strftime("%H:%M:%S")
            print ("Finished PLSR.  Current time = ", current_time)
            
            # ---- reconstitute the full vector of forecast data, including
            #      back transformation to original space.  Back out the 
            #      indexing for cross_validation 
              
            Ypred_full[ival,:] = (Ypred[:,:]*Ystd[:] + Ymean[:])**3.0
            
        # ---- save cross-validated predicted precipitation forecasts to file
        
        for i in range(len(yyyymmddhh_store)):
            if yyyymmddhh_store[i] == 2003031700: 
                print (i-2,yyyymmddhh_store[i-2], Ypred_full[i-2,hucnumber])
                print (i-1,yyyymmddhh_store[i-1], Ypred_full[i-1,hucnumber])
                print (i,yyyymmddhh_store[i], Ypred_full[i,hucnumber])
                print (i+1,yyyymmddhh_store[i+1], Ypred_full[i+1,hucnumber])
                print (i+2,yyyymmddhh_store[i+2], Ypred_full[i+2,hucnumber])

        outfile = 'cca/PLSR_regression_data_month='+cmonth+'_lead='+clead+'days.cPick'
        print ('writing to ', outfile)
        ouf = open(outfile, 'wb')
        cPickle.dump(Ypred_full, ouf)
        cPickle.dump(yyyymmddhh_store, ouf)
        ouf.close()
                                                                                                                                                                                                                  ._ccpa_to_netCDF_original.py                                                                        000775  000765  000024  00000000415 14016263144 016234  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2        
                                      ATTR      
                             com.apple.lastuseddate#PS           com.macromates.selectionRange      	     com.macromates.visibleIndex  `    6P    82:461262                                                                                                                                                                                                                                                   ccpa_to_netCDF_original.py                                                                          000775  000765  000024  00000015037 14016263144 016025  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         """

python ccpa_to_netCDF.py cmonth

For chosen month (01 to 12), extract grib files of CCPA 
on CONUS NDFD grid and save to a new netCDF file.

Tom Hamill, Dec 2020

"""

import os, sys
from datetime import datetime
import numpy as np
import _pickle as cPickle
from netCDF4 import Dataset
import scipy.stats as stats
import pygrib
from netCDF4 import Dataset
from dateutils import hrs_since_day1CE_todate, \
    dateto_hrs_since_day1CE, hrstodate, datetohrs, dateshift
from mpl_toolkits.basemap import Basemap, interp

# ---- get the month and end time from the commmand line

cmonth = sys.argv[1] # 01 etc
imonth = int(cmonth) - 1
daysomo = [31, 28, 31,  30, 31, 30,  31, 31, 30,  31, 30, 31]
daysomo_leap = [31, 28, 31,  30, 31, 30,  31, 31, 30,  31, 30, 31]
day1900 = datetime(1900,1,1,0)
hours1900 = dateto_hrs_since_day1CE(day1900, mixedcal=True)


# ---- get the lat/lons of the output NDFD CONUS grid.   These are
#      oriented S to N as interp requires

infile = '/Volumes/Backup Plus/ccpa/ccpa.20180101/00/ccpa.t00z.06h.ndgd2p5.conus.gb2'
print (infile)
flatlon = pygrib.open(infile)
fcst = flatlon.select()[0]
lats_ndfd, lons_ndfd = fcst.latlons()
if lats_ndfd[0,0] > lats_ndfd[-1,0]: 
    flipud = True
else:
    flipud = False
if flipud == True:
    lats_ndfd = np.flipud(lats_ndfd)
    lons_ndfd = np.flipud(lons_ndfd)
nlats_ndfd, nlons_ndfd = np.shape(lons_ndfd)
flatlon.close()
print ('min, max lons_ndfd = ', np.min(lons_ndfd), np.max(lons_ndfd))
#lons_ndfd = lons_ndfd  # convert to deg E.    
       
zeros = np.zeros((nlats_ndfd, nlons_ndfd), dtype=np.float32)


# ---- read in the CONUS mask.  Not sure about accuracy.

infile = '/Volumes/Backup Plus/ccpa/supplemental_locations_ndfd2p5_Jan.nc'
nc = Dataset(infile)
conusmask_in = nc.variables['conusmask'][:,:]
nc.close()

# ---- process all years for this month

#for iyear in range(2002,2020):
#for iyear in range(2002,2020):
for iyear in range(2002,2020):
    cyear = str(iyear)
    
    print ('****** processing year = ', iyear)
    # ---- determine the days of the month
    if iyear%4 == 0:
        ndays = daysomo_leap[imonth]
    else:
        ndays = daysomo[imonth]
    

    # ---- open netCDF output file and deal with all the variable definition and 
    #      such.
     
    outfile = '../ccpa/'+cyear+cmonth+'_ccpa_on_ndfd_grid_6hourly.nc'
    print ('   writing to ',outfile)
    ncout = Dataset(outfile,'w',format='NETCDF4_CLASSIC')

    xf = ncout.createDimension('xf',nlons_ndfd)
    xvf = ncout.createVariable('xf','f4',('xf',))
    xvf.long_name = "eastward grid point number on NDFD grid"
    xvf.units = "n/a"

    yf = ncout.createDimension('yf',nlats_ndfd)
    yvf = ncout.createVariable('yf','f4',('yf',))
    yvf.long_name = "northward grid point number on 1/4-degree lat-lon grid"
    yvf.units = "n/a"

    time = ncout.createDimension('time',None)
    timev = ncout.createVariable('time','f4',('time',))
    timev.units = "index to time dimension, that's all"

    lonsa = ncout.createVariable('lons','f4',('yf','xf',))
    lonsa.long_name = "longitude"
    lonsa.units = "degrees_east"

    latsa = ncout.createVariable('lats','f4',('yf','xf',))
    latsa.long_name = "latitude"
    latsa.units = "degrees_north"
    
    conusmask = ncout.createVariable('conusmask','i4',('yf','xf',))
    latsa.long_name = "mask (1=land, 0=water)"
    latsa.units = "none"

    yyyymmddhh_begin = ncout.createVariable('yyyymmddhh_begin','i4',('time',))
    yyyymmddhh_begin.longname = \
        "Precip accumulation period beginning in yyyymmddhh format"

    yyyymmddhh_end = ncout.createVariable('yyyymmddhh_end','i4',('time',))
    yyyymmddhh_end.longname = \
        "Precip accumulation period ending in yyyymmddhh format"

    # --- declare the single-level variable information on lat-lon grid

    apcp_anal = ncout.createVariable('apcp_anal','f4',('time','yf','xf',),
        zlib=True,least_significant_digit=4)
    apcp_anal.units = "mm"
    apcp_anal.long_name = \
        "Interpolated 6-h accumulated MSWEP analysis on CONUS NDFD grid"
    apcp_anal.valid_range = [0.,1000.]
    apcp_anal.missing_value = np.array(-9999.99,dtype=np.float32)

    # ---- initialize

    xvf[:] = np.arange(nlons_ndfd)
    yvf[:] = np.arange(nlats_ndfd)
    lonsa[:] = lons_ndfd[:,:]
    latsa[:] = lats_ndfd[:,:]
    conusmask[:] = conusmask_in[:,:]

    # ---- metadata

    ncout.title = "NDFD CONUS domain interpolated from CCPA, 6 hourly accum."
    ncout.history = "Interpolated CCPA provided by Yan Luo, NCEP/EMC, Dec 2020"
    ncout.institution =  "NCEP/EMC"
    ncout.platform = "Precipitation analysis"
    ncout.references = "DOI: 10.1175/JHM-D-11-0140.1"

    # ---- loop thru all dates, read reforecasts, and munge them into netCDF...

    ktr = 0
    for iday in range(1,ndays+1):
        
        if iday < 10:
            cday = '0'+str(iday)
        else:
            cday = str(iday)
            
        cyyyymmdd = cyear + cmonth + cday
        for chour in ['00','06','12','18',]:
            ihour = int(chour)
            if ihour == 0:
                chour_begin = '18'
                chour_end = '00'
            elif ihour == 6:
                chour_begin = '00'
                chour_end = '06'
            elif ihour == 12:
                chour_begin = '06'
                chour_end = '12'
            elif ihour == 18:
                chour_begin = '12'
                chour_end = '18'
        
            cyyyymmddhh_end = cyear + cmonth + cday + chour
            print (cyyyymmddhh_end)
            cyyyymmddhh_begin = dateshift(cyyyymmddhh_end, -6)
            print (cyyyymmddhh_begin, cyyyymmddhh_end)
        
            # --- read the 6-hourly CCPA file. Make sure none subzero.
            
            try:
                infile = '/Volumes/Backup Plus/ccpa/ccpa.'+cyyyymmdd+\
                    '/'+chour+'/ccpa.t'+chour+'z.06h.ndgd2p5.conus.gb2'
                print (infile)
                grb = pygrib.open(infile)
                panal = grb.select()[0]
                precip_ccpa = panal.values
                grb.close()
                precip_ccpa = np.where(precip_ccpa < 0.0, zeros, precip_ccpa)
                if flipud == True:
                    precip_ccpa = np.flipud(precip_ccpa)
                    
                # ---- save to netCDF file.

                timev[ktr]             = ktr  
                yyyymmddhh_begin[ktr]  = int(cyyyymmddhh_begin)
                yyyymmddhh_end[ktr]    = int(cyyyymmddhh_end)
                apcp_anal[ktr]         = precip_ccpa
                ktr = ktr + 1
                
            except:
                print ('whoops!   some problem with ', infile)

    ncout.close()






                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 ._ccpa_to_netcdf.py                                                                                 000775  000765  000024  00000000416 14074365457 014527  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2                                              ATTR                                   com.apple.lastuseddate#PS           com.macromates.selectionRange      
     com.macromates.visibleIndex  `    Q    209:809141                                                                                                                                                                                                                                                  ccpa_to_netcdf.py                                                                                   000775  000765  000024  00000026527 14074365457 014325  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         """

python ccpa_to_netCDF.py cmonth

For chosen month (01 to 12), extract grib files of CCPA and MSWEP
on CONUS NDFD grid and merge and then save to a new netCDF file.

Tom Hamill, Dec 2020

"""

import os, sys
from datetime import datetime
import numpy as np
import numpy.ma as ma
import _pickle as cPickle
from netCDF4 import Dataset
import scipy.stats as stats
import pygrib
from netCDF4 import Dataset
from dateutils import hrs_since_day1CE_todate, \
    dateto_hrs_since_day1CE, hrstodate, datetohrs, dateshift
from mpl_toolkits.basemap import Basemap, interp
from merge_precipitation_analyses_f90 import \
    merge_precipitation_analyses_f90

# ---- get the month and end time from the commmand line.  The first 00
#      hour analysis of the month will need to access the data from
#      the previous month.

cmonth = sys.argv[1] # 01 etc
imonth = int(cmonth) - 1

if cmonth == '01':
    cmonth_before = '12'
else:
    imonth_before = int(cmonth)-1
    if imonth_before < 10:
        cmonth_before = '0'+str(imonth_before)
    else:
        cmonth_before = str(imonth_before)
        
daysomo = [31, 28, 31,  30, 31, 30,  31, 31, 30,  31, 30, 31]
daysomo_leap = [31, 28, 31,  30, 31, 30,  31, 31, 30,  31, 30, 31]
day1900 = datetime(1900,1,1,0)
hours1900 = dateto_hrs_since_day1CE(day1900, mixedcal=True)

# ---- get the lat/lons of the output NDFD CONUS grid.   These are
#      oriented S to N as interp requires

infile = '/Volumes/Backup Plus/ccpa/ccpa.20180101/00/'+\
    'ccpa.t00z.06h.ndgd2p5.conus.gb2'
print (infile)
flatlon = pygrib.open(infile)
fcst = flatlon.select()[0]
lats_ndfd, lons_ndfd = fcst.latlons()
if lats_ndfd[0,0] > lats_ndfd[-1,0]: 
    flipud = True
else:
    flipud = False
if flipud == True:
    lats_ndfd = np.flipud(lats_ndfd)
    lons_ndfd = np.flipud(lons_ndfd)
nlats_ndfd, nlons_ndfd = np.shape(lons_ndfd)
flatlon.close()
print ('min, max lons_ndfd = ', np.min(lons_ndfd), \
    np.max(lons_ndfd))
       
zeros = np.zeros((nlats_ndfd, nlons_ndfd), dtype=np.float32)
ones = np.ones((nlats_ndfd, nlons_ndfd), dtype=np.float32)

# --- read in the land-water mask

infile = 'ndfd_terrain_landwater.grib2.gb2'
print (infile)
flatlon = pygrib.open(infile)
fcst = flatlon.select()[0]
landmask = fcst.values
landmask = np.where(landmask > 1.0, ones, zeros)
lats_landmask, lons_landmask = fcst.latlons()
if lats_landmask[0,0] > lats_landmask[-1,0]: 
    flipud = True
else:
    flipud = False
if flipud == True:
    lats_landmask = np.flipud(lats_landmask)
    lons_landmask = np.flipud(lons_landmask)
nlats_landmask, nlons_landmask = np.shape(lons_landmask)
flatlon.close()

# --- read in the valid CCPA mask.  This contains
#     Gulf and Atlantic points we don't want to use.
#     These will be filtered out later.
#     Got these masks from Eric Engle, MDL

infile = '../ccpa/various_nbm_plus_mask.nc'
nc = Dataset(infile)
lats_ccpa = nc.variables['latitude'][:,:]
lons_ccpa = nc.variables['longitude'][:,:]
validmask_ccpa = nc.variables['validmask_ccpa'][:,:]
if lats_ccpa[0,0] > lats_ccpa[-1,0]: 
    validmask_ccpa = np.flipud(validmask_ccpa)
nc.close()

# ---- make the final mask as landmask*validmask_ccpa. 
#      Through this, we will default to using the 
#      alternative MSWEP analysis at all water points.

finalmask = validmask_ccpa*landmask

# ---- read in the mswep lat/lon to see if need to flip.

mswep_directory = '/Volumes/Backup Plus/mswep/'
infile = mswep_directory + '200001_on_ndfd_grid_6hourly.nc'
nc = Dataset(infile)
lats_mswep = nc.variables['lats'][:,:]
if lats_mswep[0,0] > lats_mswep[-1,0]: 
    flipud_mswep = True
else:
    flipud_mswep = False
nc.close()

mninenine = -99.99*np.ones((nlats_ndfd, nlons_ndfd), dtype=np.float64)

# ---- process all years for this month

for iyear in range(2002,2020):
    
    cyear = str(iyear)
    
    print ('****** processing year = ', iyear)
    
    # ---- determine the days of the month
    if iyear%4 == 0:
        ndays = daysomo_leap[imonth]
    else:
        ndays = daysomo[imonth]
    

    # ---- open netCDF output file and deal with all the variable definition and 
    #      such.
     
    outfile = '/Volumes/NBM/conus_panal/'+cyear+cmonth+'_ccpa_on_ndfd_grid_6hourly.nc'
    print ('   writing to ',outfile)
    ncout = Dataset(outfile,'w',format='NETCDF4_CLASSIC')

    xf = ncout.createDimension('xf',nlons_ndfd)
    xvf = ncout.createVariable('xf','f4',('xf',))
    xvf.long_name = "eastward grid point number on NDFD grid"
    xvf.units = "n/a"

    yf = ncout.createDimension('yf',nlats_ndfd)
    yvf = ncout.createVariable('yf','f4',('yf',))
    yvf.long_name = "northward grid point number on 1/4-degree lat-lon grid"
    yvf.units = "n/a"

    time = ncout.createDimension('time',None)
    timev = ncout.createVariable('time','f4',('time',))
    timev.units = "index to time dimension, that's all"

    lonsa = ncout.createVariable('lons','f4',('yf','xf',))
    lonsa.long_name = "longitude"
    lonsa.units = "degrees_east"

    latsa = ncout.createVariable('lats','f4',('yf','xf',))
    latsa.long_name = "latitude"
    latsa.units = "degrees_north"
    
    conusmask = ncout.createVariable('conusmask','i4',('yf','xf',))
    latsa.long_name = "mask (1=land, 0=water)"
    latsa.units = "none"

    yyyymmddhh_begin = ncout.createVariable('yyyymmddhh_begin','i4',('time',))
    yyyymmddhh_begin.longname = \
        "Precip accumulation period beginning in yyyymmddhh format"

    yyyymmddhh_end = ncout.createVariable('yyyymmddhh_end','i4',('time',))
    yyyymmddhh_end.longname = \
        "Precip accumulation period ending in yyyymmddhh format"

    # --- declare the single-level variable information on lat-lon grid

    apcp_anal = ncout.createVariable('apcp_anal','f4',('time','yf','xf',),
        zlib=True,least_significant_digit=4)
    apcp_anal.units = "mm"
    apcp_anal.long_name = \
        "Interpolated 6-h accumulated combined CCPA/MSWEP analysis on CONUS NDFD grid"
    apcp_anal.valid_range = [0.,1000.]
    apcp_anal.missing_value = np.array(-9999.99,dtype=np.float32)

    # ---- initialize

    xvf[:] = np.arange(nlons_ndfd)
    yvf[:] = np.arange(nlats_ndfd)
    lonsa[:] = lons_ndfd[:,:]
    latsa[:] = lats_ndfd[:,:]
    conusmask[:] = finalmask[:,:]

    # ---- metadata

    ncout.title = "NDFD CONUS domain interpolated from CCPA, 6 hourly accum."
    ncout.history = "Interpolated CCPA provided by Yan Luo, NCEP/EMC, Dec 2020"
    ncout.institution =  "NCEP/EMC"
    ncout.platform = "Precipitation analysis"
    ncout.references = "DOI: 10.1175/JHM-D-11-0140.1"

    # =========================================================================
    # ---- loop thru all dates, read reforecasts, and munge them into netCDF...
    # =========================================================================

    ktr = 0
    for iday in range(1,ndays+1):
        
        infile2 = ''
        if iday < 10:
            cday = '0'+str(iday)
        else:
            cday = str(iday)
            
        cyyyymmdd = cyear + cmonth + cday
        for chour in ['00','06','12','18',]:
            
            precip_final = np.zeros((nlats_ndfd, nlons_ndfd), dtype=np.float64)
                
            ihour = int(chour)
            if ihour == 0:
                chour_begin = '18'
                chour_end = '00'
            elif ihour == 6:
                chour_begin = '00'
                chour_end = '06'
            elif ihour == 12:
                chour_begin = '06'
                chour_end = '12'
            elif ihour == 18:
                chour_begin = '12'
                chour_end = '18'
        
            cyyyymmddhh_end = cyear + cmonth + cday + chour
            cyyyymmddhh_begin = dateshift(cyyyymmddhh_end, -6)
            iyyyymmddhh = int(cyyyymmddhh_end)

            # ---- read the MSWEP data.  Produced by mswep_to_netcdf.py
            #      data from gloh2o.org
                
            if iday == 1 and chour_end == '00':
                
                # the 18-00 UTC data on the first day of month should be stored with 
                # the data from the previous month.   For example, if
                # iyyyymmddhh == 2002010100, the data period is 2001123118 to
                # 2002010100 and it'd be stored in the Dec 2001 netCDF file
                
                if cmonth == '01':
                    cyear_before = str(int(cyear)-1)
                    infile2 = mswep_directory + cyear_before + \
                        cmonth_before + '_on_ndfd_grid_6hourly.nc'
                else:
                    infile2 = mswep_directory + cyear + \
                        cmonth_before + '_on_ndfd_grid_6hourly.nc'
            else:
                infile2 = mswep_directory + cyear + cmonth + '_on_ndfd_grid_6hourly.nc'
            nc = Dataset(infile2)
            print (infile2)
            yyyymmddhh_end_in = nc.variables['yyyymmddhh_end'][:]
            idx = int(np.where(yyyymmddhh_end_in == iyyyymmddhh)[0])
            precip_mswep = nc.variables['apcp_anal'][idx,:,:].astype('d')
            if flipud_mswep == True:
                precip_mswep = np.flipud(precip_mswep)
            nc.close()
            
            # ---- read the 6-hourly CCPA file. Use mask to flag missing data as 
            #      -99.99 value.   These were downloaded from Yan Luo's repository
            #      at NCEP, ftp.emc.ncep.noaa.gov, cd gc_wmb/yluo
            
            try:    
                infile1 = '/Volumes/NBM/ccpa/ccpa.'+cyyyymmdd+\
                    '/'+chour+'/ccpa.t'+chour+'z.06h.ndgd2p5.conus.gb2'
                print (infile1)
                grb = pygrib.open(infile1)
                panal = grb.select()[0]
                precip_ccpa = panal.values
                grb.close()
                if flipud == True:
                    precip_ccpa = np.flipud(precip_ccpa)
                
                precip_ccpa = ma.where(precip_ccpa.mask == True, mninenine, precip_ccpa)
                precip_ccpa_data = ma.getdata(precip_ccpa)
                
            except:
                
                # ---- if this date is missing, fill the whole date with MSWEP data.
                
                precip_ccpa_data = precip_mswep
               
            
            # =================================================================
            # ---- merge the ccpa and mswep data ------
            # =================================================================
    
            # ---- for land points where CCPA is available, use CCPA else flag as missing
            
            precip_final = np.where(np.logical_and(precip_ccpa_data >= 0., \
                finalmask > 0.0), precip_ccpa_data, mninenine)
                
            # ---- where CCPA flagged as missing and a land point, replace with MSWEP
            
            precip_final = np.where(np.logical_and(precip_ccpa_data < 0., \
                finalmask > 0.0), precip_mswep, precip_final)
                
            # ---- for non-land points, replace with MSWEP
            
            precip_final = np.where(finalmask == 0, precip_mswep, precip_final)
            
            # =================================================================
            # ---- write record to file.
            # =================================================================
            
            yyyymmddhh_begin[ktr] = int(cyyyymmddhh_begin)
            yyyymmddhh_end[ktr] = int(cyyyymmddhh_end)
            apcp_anal[ktr] = precip_final[:,:]
            ktr = ktr + 1

    ncout.close()
    print ('writing to ', outfile, ' completed.')






                                                                                                                                                                         ._ccpa_to_netcdf_sum.py                                                                             000775  000765  000024  00000000416 14016263145 015376  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2                                              ATTR                                   com.apple.lastuseddate#PS           com.macromates.selectionRange      
     com.macromates.visibleIndex  `    S    192:834829                                                                                                                                                                                                                                                  ccpa_to_netcdf_sum.py                                                                               000775  000765  000024  00000015375 14016263145 015173  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         """

python ccpa_to_netCDF.py cmonth

For chosen month (01 to 12), extract grib files of CCPA 
on CONUS NDFD grid and save to a new netCDF file.

Tom Hamill, Dec 2020

"""

import os, sys
from datetime import datetime
import numpy as np
import _pickle as cPickle
from netCDF4 import Dataset
import scipy.stats as stats
import pygrib
from netCDF4 import Dataset
from dateutils import hrs_since_day1CE_todate, \
    dateto_hrs_since_day1CE, hrstodate, datetohrs, dateshift
from mpl_toolkits.basemap import Basemap, interp

# ---- get the month and end time from the commmand line

cmonth = sys.argv[1] # 01 etc
imonth = int(cmonth) - 1
daysomo = [31, 28, 31,  30, 31, 30,  31, 31, 30,  31, 30, 31]
daysomo_leap = [31, 28, 31,  30, 31, 30,  31, 31, 30,  31, 30, 31]
day1900 = datetime(1900,1,1,0)
hours1900 = dateto_hrs_since_day1CE(day1900, mixedcal=True)


# ---- get the lat/lons of the output NDFD CONUS grid.   These are
#      oriented S to N as interp requires

infile = '/Volumes/Backup Plus/ccpa/ccpa.20180101/00/ccpa.t00z.06h.ndgd2p5.conus.gb2'
print (infile)
flatlon = pygrib.open(infile)
fcst = flatlon.select()[0]
lats_ndfd, lons_ndfd = fcst.latlons()
if lats_ndfd[0,0] > lats_ndfd[-1,0]: 
    flipud = True
else:
    flipud = False
if flipud == True:
    lats_ndfd = np.flipud(lats_ndfd)
    lons_ndfd = np.flipud(lons_ndfd)
nlats_ndfd, nlons_ndfd = np.shape(lons_ndfd)
flatlon.close()
print ('min, max lons_ndfd = ', np.min(lons_ndfd), np.max(lons_ndfd))

# ---- read in the CONUS mask.  Not sure about accuracy.

infile = '/Volumes/Backup Plus/ccpa/supplemental_locations_ndfd2p5_Jan.nc'
nc = Dataset(infile)
conusmask_in = nc.variables['conusmask'][:,:]
nc.close()

# ---- process all years for this month

#for iyear in range(2002,2020):
#for iyear in range(2002,2020):
for iyear in range(2002,2020):
    cyear = str(iyear)
    
    zeros_float = np.zeros((nlats_ndfd, nlons_ndfd), dtype=np.float64)
    zeros_int = np.zeros((nlats_ndfd, nlons_ndfd), dtype=np.int32)
    ones = np.ones((nlats_ndfd, nlons_ndfd), dtype=np.int32)
    ktrarr = np.zeros((nlats_ndfd, nlons_ndfd), dtype=np.int32)
    apcp_sum_working = np.zeros((nlats_ndfd, nlons_ndfd), dtype=np.float64)
    mninetynine = -99.99*np.ones((nlats_ndfd, nlons_ndfd), dtype=np.float64)
    
    print ('****** processing year = ', iyear)
    # ---- determine the days of the month
    if iyear%4 == 0:
        ndays = daysomo_leap[imonth]
    else:
        ndays = daysomo[imonth]
    

    # ---- open netCDF output file and deal with all the variable definition and 
    #      such.
     
    outfile = '../ccpa/'+cyear+cmonth+'_ccpa_sum_on_ndfd_grid_6hourly.nc'
    print ('   writing to ',outfile)
    ncout = Dataset(outfile,'w',format='NETCDF4_CLASSIC')

    xf = ncout.createDimension('xf',nlons_ndfd)
    xvf = ncout.createVariable('xf','f4',('xf',))
    xvf.long_name = "eastward grid point number on NDFD grid"
    xvf.units = "n/a"

    yf = ncout.createDimension('yf',nlats_ndfd)
    yvf = ncout.createVariable('yf','f4',('yf',))
    yvf.long_name = "northward grid point number on 1/4-degree lat-lon grid"
    yvf.units = "n/a"

    lonsa = ncout.createVariable('lons','f4',('yf','xf',))
    lonsa.long_name = "longitude"
    lonsa.units = "degrees_east"

    latsa = ncout.createVariable('lats','f4',('yf','xf',))
    latsa.long_name = "latitude"
    latsa.units = "degrees_north"
    
    conusmask = ncout.createVariable('conusmask','i4',('yf','xf',))
    latsa.long_name = "mask (1=land, 0=water)"
    latsa.units = "none"

    # --- declare the single-level variable information on lat-lon grid

    apcp_sum = ncout.createVariable('apcp_sum','f8',('yf','xf',),
        zlib=True,least_significant_digit=4)
    apcp_sum.units = "mm"
    apcp_sum.long_name = \
        "Averaged 6-h precipitation on CONUS NDFD grid"
    apcp_sum.valid_range = [0.,1000.]
    apcp_sum.missing_value = np.array(-99.99,dtype=np.float32)
    
    apcp_count = ncout.createVariable('apcp_count','i4',('yf','xf',),
        zlib=True,least_significant_digit=4)
    apcp_count.units = "n/a"
    apcp_count.long_name = \
        "count of the number of valid samples"
    apcp_count.valid_range = [0,100000]
    apcp_count.missing_value = np.array(-99,dtype=np.int32)
    

    # ---- initialize

    xvf[:] = np.arange(nlons_ndfd)
    yvf[:] = np.arange(nlats_ndfd)
    lonsa[:] = lons_ndfd[:,:]
    latsa[:] = lats_ndfd[:,:]
    conusmask[:] = conusmask_in[:,:]

    # ---- metadata

    ncout.title = "NDFD CONUS domain interpolated from CCPA, 6 hourly accum."
    ncout.history = "Interpolated CCPA provided by Yan Luo, NCEP/EMC, Dec 2020"
    ncout.institution =  "NCEP/EMC"
    ncout.platform = "Precipitation analysis"
    ncout.references = "DOI: 10.1175/JHM-D-11-0140.1"

    # ---- loop thru all dates, read reforecasts, and munge them into netCDF...

    ktr = 0
    for iday in range(1,ndays+1):
        
        if iday < 10:
            cday = '0'+str(iday)
        else:
            cday = str(iday)
            
        cyyyymmdd = cyear + cmonth + cday
        for chour in ['00','06','12','18',]:
            ihour = int(chour)
            if ihour == 0:
                chour_begin = '18'
                chour_end = '00'
            elif ihour == 6:
                chour_begin = '00'
                chour_end = '06'
            elif ihour == 12:
                chour_begin = '06'
                chour_end = '12'
            elif ihour == 18:
                chour_begin = '12'
                chour_end = '18'
        
            cyyyymmddhh_end = cyear + cmonth + cday + chour
            print (cyyyymmddhh_end)
            cyyyymmddhh_begin = dateshift(cyyyymmddhh_end, -6)
            print (cyyyymmddhh_begin, cyyyymmddhh_end)
        
            # --- read the 6-hourly CCPA file. Make sure none subzero.
            
            try:
                infile = '/Volumes/Backup Plus/ccpa/ccpa.'+cyyyymmdd+\
                    '/'+chour+'/ccpa.t'+chour+'z.06h.ndgd2p5.conus.gb2'
                print (infile)
                grb = pygrib.open(infile)
                panal = grb.select()[0]
                precip_ccpa = panal.values
                grb.close()
                
                precip_ccpa = np.where(precip_ccpa < 0.0, zeros_float, precip_ccpa)
                if flipud == True:
                    precip_ccpa = np.flipud(precip_ccpa)
                bine = np.where(precip_ccpa > 999., zeros_int, ones)   
                precip_ccpa = np.where(precip_ccpa > 999, zeros_float, precip_ccpa) 
                apcp_sum_working = apcp_sum_working + precip_ccpa
                ktrarr = ktrarr + bine
                
            except:
                print ('whoops!   some problem with ', infile)
        
    apcp_sum_working = np.where(ktrarr > 0, apcp_sum_working, mninetynine)            
    apcp_sum[:] = apcp_sum_working[:,:]
    apcp_count[:] = ktrarr[:,:]

    ncout.close()






                                                                                                                                                                                                                                                                   ._censored_likelihood.py                                                                            000775  000765  000024  00000000743 14016263145 015567  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2                                             ATTR        $                    $     com.apple.lastuseddate#PS      4     %com.apple.metadata:kMDItemWhereFroms        com.macromates.selectionRange           com.macromates.visibleIndex  `    ٰS    bplist00_uimap://tom%2Ehamill@email.boulder.noaa.gov:993/fetch%3EUID%3E/INBOX%3E135651?part=1.2&filename=censored_likelihood.pyP                            25:91015                             censored_likelihood.py                                                                              000775  000765  000024  00000005410 14016263145 015346  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         import numpy as np
import scipy as sp
import math
import os, sys
import matplotlib.pyplot as plt

from numpy import ma
from numpy.random import random_sample
from scipy import stats
from scipy.stats import gamma
from scipy.special import loggamma
from scipy.interpolate import interp1d
from scipy.optimize import minimize


####  Simulate from a 2-component gamma distribution  ###
#
w1 = 0.4
w2 = 0.6
shape1 = 0.2
shape2 = 0.6
scale1 = 3.5
scale2 = 2.2

n = 1000

cmp = np.random.binomial(1,w2,size=n)
n1 = np.sum(cmp==0)
n2 = np.sum(cmp==1)
x = np.zeros(n, dtype=np.float32)
x[cmp==0] = np.random.gamma(shape1,scale1,n1)
x[cmp==1] = np.random.gamma(shape2,scale2,n2)

#
##########################################################



## Fit single component model

pmean = np.mean(x)
lnxbar = np.log(pmean)
meanlnxi = np.mean(np.log(x))
D = lnxbar - meanlnxi
alpha0 = (1.0 + np.sqrt(1.0+4.0*D/3.0)) / (4.0*D)
invbeta0 = pmean / alpha0


## Fit single component only to upper 20% data, ignoring the censoring issue

c = np.percentile(x,80)

pmeanC1 = np.mean(x[x>c])
lnxbarC1 = np.log(pmeanC1)
meanlnxiC1 = np.mean(np.log(x[x>c]))
D = lnxbarC1 - meanlnxiC1
alphaC1 = (1.0 + np.sqrt(1.0+4.0*D/3.0)) / (4.0*D)
invbetaC1 = pmeanC1 / alphaC1



## Fit single component only to upper 20% data using censored maximum likelihood

def loglik(par,x_ucs,c,n_usc,n_cs):     # censored, negative log-likelihood function
    T1 = -n_cs * np.log(gamma.cdf(c,par[0],scale=par[1]))
    T2 = np.sum(x_ucs)/par[1] - (par[0]-1)*np.sum(np.log(x_ucs)) + n_ucs*(loggamma(par[0])+par[0]*np.log(par[1]))
    return T1 + T2

bnds = ((0.001,1.5), (0.001,3.0*invbeta0))
pst = [alpha0,invbeta0]                       # start with the parameters fitted to the full data set

x_ucs = x[x>c]
n_ucs = len(x_ucs)
n_cs = len(x) - n_ucs

par_opt = minimize(loglik, pst, args=(x_ucs,c,n_ucs,n_cs), method='L-BFGS-B', bounds=bnds, tol=1e-6).x

alphaC2 = par_opt[0]
invbetaC2 = par_opt[1]

##  Make some plots

prob = np.arange(1,n+1)/(n+1)
xgrd = np.arange(0,15,0.1)

# Plot empirical and fitted CDFs
plt.figure()
plt.scatter(np.sort(x), prob)
plt.plot(xgrd, gamma.cdf(xgrd,alpha0,scale=invbeta0), c='m',label='Single Gamma')
plt.plot(xgrd, gamma.cdf(xgrd,alphaC1,scale=invbetaC1), c='r',label='Single to upper 20%, ignore censor')
plt.plot(xgrd, gamma.cdf(xgrd,alphaC2,scale=invbetaC2), c='b',label='Single to upper 20%, censor')
plt.legend(loc=0)
plt.show()


# Q-Q Plots
plt.figure()
plt.plot(xgrd, xgrd, c='k')
plt.scatter(gamma.ppf(prob,alpha0,scale=invbeta0), np.sort(x), c='m',label='Single Gamma')
plt.scatter(gamma.ppf(prob,alphaC1,scale=invbetaC1), np.sort(x), c='r',label='Single to upper 20%, ignore censor')
plt.scatter(gamma.ppf(prob,alphaC2,scale=invbetaC2), np.sort(x), c='b',label='Single to upper 20%, censor')
plt.legend(loc=0)
plt.show()




                                                                                                                                                                                                                                                        ._chaos.py                                                                                          000775  000765  000024  00000000260 14016263145 012651  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2   ~                                            ATTR                                    com.apple.lastuseddate#PS    `    T                                                                                                                                                                                                                                                                                                                                                    chaos.py                                                                                            000775  000765  000024  00000000410 14016263145 012431  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         # File: chaos.py
# A simple program illustrating chaotic behavior.

def main():
    print "This program illustrates a chaotic function"
    x = input("Enter a number between 0 and 1: ")
    for i in range(10):
        x = 3.9 * x * (1 - x)
        print x

main()
                                                                                                                                                                                                                                                        ._chaos2.py                                                                                         000775  000765  000024  00000000260 14016263145 012733  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2   ~                                            ATTR                                    com.apple.lastuseddate#PS    `    NU                                                                                                                                                                                                                                                                                                                                                    chaos2.py                                                                                           000775  000765  000024  00000000510 14016263145 012514  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         # File: chaos2.py
# A simple program illustrating chaotic behavior.

def main():
    print "This program illustrates a chaotic function"
    x1,x2 = input("Enter 2 numbers between 0 and 1: ")
    y = input("How many iterations? ")
    for i in range(y):
        xcombo = 3.9 * xcombo * (1 - xcombo)
        print xcombo

main()
                                                                                                                                                                                        ._compare_errors_decay_conuskf.py                                                                   000775  000765  000024  00000000412 14016263145 017472  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2        
                                      ATTR      
                             com.apple.lastuseddate#PS           com.macromates.selectionRange      	     com.macromates.visibleIndex  `    |
V    25:320                                                                                                                                                                                                                                                      compare_errors_decay_conuskf.py                                                                     000775  000765  000024  00000010026 14016263145 017257  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         """
compare_errors_decay_conuskf.py

"""
import pygrib
from dateutils import daterange, dateshift, dayofyear, splitdate
import os, sys
import numpy as np
import _pickle as cPickle
import matplotlib.pyplot as plt
from matplotlib import rcParams
from mpl_toolkits.basemap import Basemap, interp
from mpl_toolkits.axes_grid1 import make_axes_locatable

rcParams['xtick.labelsize']='medium'
rcParams['ytick.labelsize']='medium'
rcParams['legend.fontsize']='large'

datadir = '/Users/Tom/python/ecmwf/'
cvariable = '2t'

rmse_kf_mean = np.zeros(4,dtype=np.float32)
rmse_decayavg_mean = np.zeros(4,dtype=np.float32)
rmse_raw_mean = np.zeros(4,dtype=np.float32)
calpha_optimal = ['0.10', '0.06', '0.04', '0.04']

for ilead in range(24,97,24):
    clead = str(ilead)
    datestart = dateshift('2019010100',ilead)
    dateend = dateshift('2019123100',-ilead)
    date_list_initial = daterange(datestart,dateend,24)
    print (date_list_initial)
    #date_list_anal = daterange(datestart,'2019013100',24)
    ndates = len(date_list_initial)
    rmse_kf = np.zeros((ndates), dtype=np.float32)
    rmse_decayavg = np.zeros((ndates), dtype=np.float32)
    rmse_raw = np.zeros((ndates), dtype=np.float32)
    
    ilead_idx = ilead//24 -1
    calpha = calpha_optimal[ilead_idx]
    date_list_valid = []
    for idate in range(ndates):
        date_list_valid.append(dateshift(date_list_initial[idate],ilead)) # valid times
        
    for idate, datea in enumerate(date_list_initial):
    
        datev = date_list_valid[idate]
        if datev == '2019010100': dstart = idate

        # ---- read the ECMWF ERA5 reanalysis at this analysis date.
    
        infile = datadir + 't2m_era5_halfdegree_'+datev+'.cPick'
        print (infile)
        inf = open(infile, 'rb')
        analysis = cPickle.load(inf)
        if idate == 0:
            lats = cPickle.load(inf)
            lons = cPickle.load(inf)
            nlats, nlons = np.shape(lats)
            npts = nlats*nlons 
        inf.close()
    
        # ---- read the ECMWF control forecast at this lead time and initial date
 
        infile = datadir + cvariable+'_'+datea+'_f'+clead+'.grib2'  
        grbfile = pygrib.open(infile) 
        grb = grbfile.select()[0] 
        forecast = grb.values
        grbfile.close()
    
        # ---- read the decaying average and Kalman filter bias corrections estimates
        #      for this date.
    
        infilename = datadir + 'bias_decayavg_alpha'+calpha+'_'+datea+'_f'+clead+'.cPick'
        inf = open(infilename, 'rb')
        bias_decayavg = cPickle.load(inf)
        inf.close()
        
        infilename = datadir + 'bias_est_conusKF'+datea+'_f'+clead+'.cPick'
        inf = open(infilename, 'rb')
        bias_estimate = cPickle.load(inf)
        inf.close()
    
        #frac2019[idate] = fracyear = doy/365.
    
        rmse_raw[idate] = np.sqrt(np.sum((forecast-analysis)**2)/(npts-1.))
        rmse_decayavg[idate] = np.sqrt(np.sum(((forecast-bias_decayavg)-analysis)**2)/(npts-1.))
        rmse_kf[idate] = np.sqrt(np.sum(((forecast-bias_estimate)-analysis)**2)/(npts-1.))

    rmse_raw_mean[ilead_idx] = np.mean(rmse_raw)
    rmse_decayavg_mean[ilead_idx]  = np.mean(rmse_decayavg)
    rmse_kf_mean[ilead_idx]  = np.mean(rmse_kf)

# ---- plot errors
    
f = plt.figure(figsize=(6.5,4))

ax = f.add_axes([.13,.13,.83,.79])
plt.title(r'RMSE of 2019 ECMWF T$_{2m}$ forecasts over CONUS',fontsize=14)
ax.plot([1,2,3,4], rmse_raw_mean, 'o-', color='Black',\
    lw=1.5, markersize=0.6, label='Raw', markerfacecolor='Red')
ax.plot([1,2,3,4], rmse_decayavg_mean, 'o-', color='RoyalBlue',\
    lw=1.5, markersize=0.6, label='Decaying average', markerfacecolor='RoyalBlue')
ax.plot([1,2,3,4], rmse_kf_mean, 'o-', color='Red',\
    lw=1.5, markersize=0.6, label='CONUS Kalman filter', markerfacecolor='Red')
ax.set_ylim(0,2.5)
ax.set_ylabel('RMSE (deg C)',fontsize=13)
ax.legend(loc=0)
plt.grid(True, lw=0.25)
ax.set_xlim(0,5)
ax.set_xticks([0,1,2,3,4,5])
ax.set_xlabel('Forecast lead (days)', fontsize=13)

imagefile = 'rmse.pdf'
plt.savefig(imagefile)
print ('Plot done', imagefile)
    
            

                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          ._compare_skint.py                                                                                  000775  000765  000024  00000000415 14016263145 014414  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2        
                                      ATTR      
                             com.apple.lastuseddate#PS           com.macromates.selectionRange      	     com.macromates.visibleIndex  `    V    84:511965                                                                                                                                                                                                                                                   compare_skint.py                                                                                    000775  000765  000024  00000010560 14016263145 014201  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         from netCDF4 import Dataset
import numpy as np
from dateutils import daterange, datetohrs, dayofyear
import sys
import pygrib
import os
import os.path
from os import path
import numpy.ma as ma
import matplotlib.pyplot as plt
from matplotlib import rcParams
from mpl_toolkits.basemap import Basemap, interp
from mpl_toolkits.axes_grid1 import make_axes_locatable

rcParams['xtick.labelsize']='x-small'
rcParams['ytick.labelsize']='x-small'

date_list = daterange('2019090100', '2019123100', 24)
ndates = len(date_list)

# ---- loop over dates.

for idate, date in enumerate(date_list):
        
    yy = date[0:4]
    yyyymmdd = date[0:8]
    mm = date[4:6]
    dd = date[6:8]
    infile = 'nsst/'+date[0:8]+'_gdas.t00z.pgrb2.0p25.f000'
    print (infile)
    fexist = path.exists(infile)
    if fexist == True:
        
        # ---- read in the skin temperatures from the operational
        
        sstfile = pygrib.open(infile)
        grb = sstfile.select(shortName = 't')[0]
        sst = grb.values
        if idate == 0:
            lats, lons = grb.latlons()
            nlats, nlons = np.shape(lats)
            nsst_save = ma.zeros((ndates,nlats,nlons), dtype=np.float32)
            reanal_save = ma.zeros((ndates,256,512), dtype=np.float32)
        sstfile.close()
        nsst_save[idate,:,:] = sst[:,:]
        
        # --- read the associated reanalysis skin temperature
            
        infile ='/Users/Tom/python/gefsv12/2015/bfg_'+\
            yyyymmdd+'00_fhr00_control2.nc4'  
        print (infile)          
        nc = Dataset(infile)
        tmpsfc_in = nc.variables['tmpsfc'][0,:,:]
        #tmpsfc_in = nc.variables['tmp2m'][0,:,:]
        if idate == 0:
            lon_reanal = nc.variables['lon'][:]
            lat_reanal = nc.variables['lat'][:]
            print ('lat_reanal = ',lat_reanal)
            landsfc = nc.variables['landsfc'][0,:,:]
        nc.close()
        reanal_save[idate,:,:] = tmpsfc_in[:,:]

    else:
        print ('unable to read ', infile)
        nsst_save[idate,:,:] = ma.masked
        reanal_save[idate,:,:] = ma.masked
    
        
nsst_mean = ma.mean(nsst_save,axis=0)

# --- flip nsst and reanalysis upside down so lats oriented S to N, increasing.
        
nsst_mean = np.flipud(nsst_mean)  # need latitudes ascending order 
lons = np.flipud(lons)
lats = np.flipud(lats)

reanal_mean = ma.mean(reanal_save,axis=0)
reanal_mean = np.flipud(reanal_mean)
lat_reanal = np.flipud(lat_reanal)

print ('reanal_mean max, min = ', ma.max(reanal_mean), ma.min(reanal_mean))
print ('nsst_mean max, min = ', ma.max(nsst_mean), ma.min(nsst_mean))

# ---- interpolate the reanalysis data to the NSST grid.

reanal_mean_NSSTgrid = interp(reanal_mean, lon_reanal, lat_reanal, \
    lons, lats, checkbounds=False, masked=False, order=1) # - 273.15
print ('max, min reanal_mean_NSSTgrid = ', ma.max(reanal_mean_NSSTgrid), \
    ma.min(reanal_mean_NSSTgrid))
    
# ---- plot difference

sst_difference = nsst_mean - reanal_mean_NSSTgrid

# ---- code to use for plotting

fig = plt.figure(figsize=(9.,5.6))
axloc = [0.07,0.11,0.9,0.82]
ax = fig.add_axes(axloc)
ax.set_title('00 UTC global skin temperature differences, operational minus reanalysis, 1 Sep 2019 to 31 Dec 2019')
colorst = ['#0000ff', '#6666ff', '#b2b2ff', '#e6e6ff', \
    'White', '#ffe6e6', '#ffb2b2', '#ff7373', '#ff0000']
#clevs = [-2.0,-1.5,-1.0,-0.5,-0.25,0.25,0.5,1.0,1.5,2.0]
clevs = [-6,-4,-2,-1,-0.5,0.5,1,2,4,6]
colorstblack='Black'
parallels = np.arange(-80.,90,20.)
meridians = np.arange(0.,360.,20.)
m = Basemap(llcrnrlon=lons[0,0],llcrnrlat=-75,\
    urcrnrlon=lons[-1,-1],urcrnrlat=75.,\
    projection='mill',resolution='l')
x, y = m(lons, lats)

CS2 = m.contourf(x,y,sst_difference,clevs,cmap=None,colors=colorst,extend='both')

m.drawcoastlines(linewidth=0.5,color='Gray')
m.drawcountries(linewidth=0.3,color='Gray')
m.drawparallels(parallels,labels=[1,0,0],linewidth=0.15,fontsize=8)
m.drawmeridians(meridians,labels=[0,0,0,1],linewidth=0.15,fontsize=8)

# ---- use axes_grid toolkit to make colorbar axes.

#ax = fig.add_axes([0.,0.,1.,1.])
divider = make_axes_locatable(ax)
cax = divider.append_axes("bottom", size="3%", pad=0.35)
cb = plt.colorbar(CS2,orientation='horizontal',cax=cax,\
    drawedges=True,ticks=clevs,format='%g')
cb.set_label('Difference (deg C)')

# ---- set plot title

plot_title = 'skint_difference.png'
fig.savefig(plot_title, dpi=300)
print ('saving plot to file = ',plot_title)
print ('Done!')

                                                                                                                                                ._compare_skint_parallel.py                                                                         000775  000765  000024  00000000411 14016263145 016264  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2        	                                      ATTR      	                             com.apple.lastuseddate#PS           com.macromates.selectionRange           com.macromates.visibleIndex  `    МW    40:50                                                                                                                                                                                                                                                       compare_skint_parallel.py                                                                           000775  000765  000024  00000024625 14016263145 016064  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         from netCDF4 import Dataset
import numpy as np
from dateutils import daterange, datetohrs, dayofyear
import sys
import pygrib
import os
import os.path
from os import path
import numpy.ma as ma
import matplotlib.pyplot as plt
from matplotlib import rcParams
from mpl_toolkits.basemap import Basemap, interp
from mpl_toolkits.axes_grid1 import make_axes_locatable

rcParams['xtick.labelsize']='x-small'
rcParams['ytick.labelsize']='x-small'

date_list = daterange('2017120100', '2019113000', 24)
date_list_winter = daterange('2017120100', '2018043000', 24) + \
     daterange('2018100100', '2019043000', 24) + \
     daterange('2019100100', '2019113000', 24)
date_list_summer = daterange('2018050100', '2018093000', 24) + \
     daterange('2019050100', '2019093000', 24) 
ndates = len(date_list)
ndates_summer = len(date_list_summer)
ndates_winter = len(date_list_winter)
print ('ndates, ndates_summer, ndates_winter = ',ndates, ndates_summer, ndates_winter )
#sys.exit()

# ---- loop over dates.

ktrsum = 0
ktrwin = 0
for idate, date in enumerate(date_list):
        
    yy = date[0:4]
    yyyymmdd = date[0:8]
    mm = date[4:6]
    dd = date[6:8]
    
    infile1 = '/Volumes/Backup Plus/gefsv12/skint/'+date+'_skint.grib2'
    infile2 = '/Volumes/Backup Plus/bfg/2015/bfg_'+yyyymmdd+'06_fhr00_control2.nc4'
    print (infile1, infile2)
    fexist1 = path.exists(infile1)
    fexist2 = path.exists(infile2)
    if fexist1 == True and fexist2 == True:
        
        # ---- read in the skin temperatures from the pre-operational parallel
        
        sstfile = pygrib.open(infile1)
        grb = sstfile.select(shortName = 't')[0]
        sst = grb.values
        if idate == 0:
            lats, lons = grb.latlons()
            nlats, nlons = np.shape(lats)
            nsst_save = ma.zeros((ndates,nlats,nlons), dtype=np.float32)
            reanal_save = ma.zeros((ndates,256,512), dtype=np.float32)
            nsst_save_summer = ma.zeros((ndates_summer,nlats,nlons), dtype=np.float32)
            reanal_save_summer = ma.zeros((ndates_summer,256,512), dtype=np.float32)
            nsst_save_winter = ma.zeros((ndates_winter,nlats,nlons), dtype=np.float32)
            reanal_save_winter = ma.zeros((ndates_winter,256,512), dtype=np.float32)
        sstfile.close()
        nsst_save[idate,:,:] = sst[:,:]
        
        # --- read the associated reanalysis skin temperature
                   
        nc = Dataset(infile2)
        tmpsfc_in = nc.variables['tmpsfc'][0,:,:]
        #tmpsfc_in = nc.variables['tmp2m'][0,:,:]
        if idate == 0:
            lon_reanal = nc.variables['lon'][:]
            lat_reanal = nc.variables['lat'][:]
            print ('lat_reanal = ',lat_reanal)
            landsfc = nc.variables['landsfc'][0,:,:] # land/water mask
        nc.close()
        reanal_save[idate,:,:] = tmpsfc_in[:,:]
        if date_list_summer.count(date) > 0:
             nsst_save_summer[ktrsum,:,:] = sst[:,:]
             reanal_save_summer[ktrsum,:,:] = tmpsfc_in[:,:]
             ktrsum = ktrsum + 1
        if date_list_winter.count(date) > 0:
             nsst_save_winter[ktrwin,:,:] = sst[:,:]
             reanal_save_winter[ktrwin,:,:] = tmpsfc_in[:,:]
             ktrwin = ktrwin + 1     
        

    else:
        if fexist1 == False:
            print ('unable to read ', infile1)
        if fexist2 == False:
            print ('unable to read ', infile2)
        nsst_save[idate,:,:] = ma.masked
        reanal_save[idate,:,:] = ma.masked
        if date_list_summer.count(date) > 0:
             nsst_save_summer[ktrsum,:,:] = ma.masked
             reanal_save_summer[ktrsum,:,:] = ma.masked
             ktrsum = ktrsum + 1
        if date_list_winter.count(date) > 0:
             nsst_save_winter[ktrwin,:,:] = ma.masked
             reanal_save_winter[ktrwin,:,:] = ma.masked
             ktrwin = ktrwin + 1
    
        
print ('nsst_save_summer[:,60,60]', nsst_save_summer[:,60,60])
print ('nsst_save_winter[:,60,60]', nsst_save_winter[:,60,60]) 
print ('reanal_save_summer[:,60,60]', reanal_save_summer[:,60,60])
print ('reanal_save_winter[:,60,60]', reanal_save_winter[:,60,60])        
        
nsst_mean = ma.mean(nsst_save,axis=0)
nsst_mean_summer = ma.mean(nsst_save_summer,axis=0)
nsst_mean_winter = ma.mean(nsst_save_winter,axis=0)

# --- flip nsst and reanalysis upside down so lats oriented S to N, increasing.
        
nsst_mean = np.flipud(nsst_mean)  # need latitudes ascending order 
nsst_mean_summer = np.flipud(nsst_mean_summer)  
nsst_mean_winter = np.flipud(nsst_mean_winter)  
lons = np.flipud(lons)
lats = np.flipud(lats)

reanal_mean = ma.mean(reanal_save,axis=0)
reanal_mean_summer = ma.mean(reanal_save_summer,axis=0)
reanal_mean_winter = ma.mean(reanal_save_winter,axis=0)

reanal_mean = np.flipud(reanal_mean)
reanal_mean_summer = np.flipud(reanal_mean_summer)
reanal_mean_winter = np.flipud(reanal_mean_winter)
lat_reanal = np.flipud(lat_reanal)

# ---- interpolate the reanalysis data to the NSST grid.

reanal_mean_NSSTgrid = interp(reanal_mean, lon_reanal, lat_reanal, \
    lons, lats, checkbounds=False, masked=False, order=1) # - 273.15    
reanal_mean_NSSTgrid_summer = interp(reanal_mean_summer, lon_reanal, lat_reanal, \
    lons, lats, checkbounds=False, masked=False, order=1) # - 273.15
reanal_mean_NSSTgrid_winter = interp(reanal_mean_winter, lon_reanal, lat_reanal, \
    lons, lats, checkbounds=False, masked=False, order=1) # - 273.15
print ('max, min reanal_mean_NSSTgrid = ', ma.max(reanal_mean_NSSTgrid), \
    ma.min(reanal_mean_NSSTgrid))
    
# ---- plot difference

sst_difference = nsst_mean - reanal_mean_NSSTgrid
sst_difference_summer = nsst_mean_summer - reanal_mean_NSSTgrid_summer
sst_difference_winter = nsst_mean_winter - reanal_mean_NSSTgrid_winter

# ---- code to use for plotting

fig = plt.figure(figsize=(9.,5.6))
axloc = [0.07,0.075,0.9,0.82]
ax = fig.add_axes(axloc)
ax.set_title('00 UTC global skin temperature differences,\npre-production parallel minus reanalysis, 1 Dec 2017 to 30 Nov 2019',fontsize=16)
colorst = ['#0000ff', '#6666ff', '#b2b2ff', '#e6e6ff', \
    'White', '#ffe6e6', '#ffb2b2', '#ff7373', '#ff0000']
#clevs = [-2.0,-1.5,-1.0,-0.5,-0.25,0.25,0.5,1.0,1.5,2.0]
clevs = [-6,-4,-2,-1,-0.5,0.5,1,2,4,6]
colorstblack='Black'
parallels = np.arange(-80.,90,10.)
meridians = np.arange(0.,360.,20.)
m = Basemap(llcrnrlon=lons[0,0],llcrnrlat=-75,\
    urcrnrlon=lons[-1,-1],urcrnrlat=75.,\
    projection='mill',resolution='l')
x, y = m(lons, lats)

CS2 = m.contourf(x,y,sst_difference,clevs,cmap=None,colors=colorst,extend='both')

m.drawcoastlines(linewidth=0.5,color='Gray')
m.drawcountries(linewidth=0.3,color='Gray')
m.drawparallels(parallels,labels=[1,0,0],linewidth=0.15,fontsize=8)
m.drawmeridians(meridians,labels=[0,0,0,1],linewidth=0.15,fontsize=8)

# ---- use axes_grid toolkit to make colorbar axes.

#ax = fig.add_axes([0.,0.,1.,1.])
divider = make_axes_locatable(ax)
cax = divider.append_axes("bottom", size="3%", pad=0.35)
cb = plt.colorbar(CS2,orientation='horizontal',cax=cax,\
    drawedges=True,ticks=clevs,format='%g')
cb.set_label('Difference (deg C)')

# ---- set plot title

plot_title = 'skint_difference.png'
fig.savefig(plot_title, dpi=300)
print ('saving plot to file = ',plot_title)
print ('Done!')


# ---- code to use for plotting

fig = plt.figure(figsize=(9.,9.))
axloc = [0.07,0.435,0.9,0.52]
ax = fig.add_axes(axloc)
ax.set_title('(a) 00 UTC global skin temperature differences,\npre-production parallel minus reanalysis, 1 Dec 2017 to 30 Nov 2019',fontsize=16)
colorst = ['#0000ff', '#6666ff', '#b2b2ff', '#e6e6ff', \
    'White', '#ffe6e6', '#ffb2b2', '#ff7373', '#ff0000']
#clevs = [-2.0,-1.5,-1.0,-0.5,-0.25,0.25,0.5,1.0,1.5,2.0]
clevs = [-6,-4,-2,-1,-0.5,0.5,1,2,4,6]
colorstblack='Black'
parallels = np.arange(-80.,90,10.)
meridians = np.arange(0.,360.,20.)
m = Basemap(llcrnrlon=lons[0,0],llcrnrlat=-75,\
    urcrnrlon=lons[-1,-1],urcrnrlat=75.,\
    projection='mill',resolution='l')
x, y = m(lons, lats)

CS2 = m.contourf(x,y,sst_difference,clevs,cmap=None,colors=colorst,extend='both')

m.drawcoastlines(linewidth=0.7,color='Gray')
m.drawcountries(linewidth=0.5,color='Gray')
m.drawparallels(parallels,labels=[1,0,0],linewidth=0.15,fontsize=8)
m.drawmeridians(meridians,labels=[0,0,0,1],linewidth=0.15,fontsize=8)



axloc = [0.07,0.095,0.42,0.32]
ax = fig.add_axes(axloc)
ax.set_title('(b) Nov-Apr differences',fontsize=16)
colorst = ['#0000ff', '#6666ff', '#b2b2ff', '#e6e6ff', \
    'White', '#ffe6e6', '#ffb2b2', '#ff7373', '#ff0000']
#clevs = [-2.0,-1.5,-1.0,-0.5,-0.25,0.25,0.5,1.0,1.5,2.0]
clevs = [-6,-4,-2,-1,-0.5,0.5,1,2,4,6]
colorstblack='Black'
parallels = np.arange(20.,60.,10.)
meridians = np.arange(60.,140.,20.)
m = Basemap(llcrnrlon=60.,llcrnrlat=20,\
    urcrnrlon=140.,urcrnrlat=60.,\
    projection='mill',resolution='l')
x, y = m(lons, lats)
print ('max, min sst_difference_winter ', np.max(sst_difference_winter), np.min(sst_difference_winter))
CS2 = m.contourf(x,y,sst_difference_winter,clevs,cmap=None,colors=colorst,extend='both')

m.drawcoastlines(linewidth=0.7,color='Gray')
m.drawcountries(linewidth=0.7,color='Gray')
m.drawparallels(parallels,labels=[1,0,0],linewidth=0.15,fontsize=8)
m.drawmeridians(meridians,labels=[0,0,0,1],linewidth=0.15,fontsize=8)


axloc = [0.55,0.095,0.42,0.32]
ax = fig.add_axes(axloc)
ax.set_title('(c) May-Oct differences',fontsize=16)
colorst = ['#0000ff', '#6666ff', '#b2b2ff', '#e6e6ff', \
    'White', '#ffe6e6', '#ffb2b2', '#ff7373', '#ff0000']
#clevs = [-2.0,-1.5,-1.0,-0.5,-0.25,0.25,0.5,1.0,1.5,2.0]
clevs = [-6,-4,-2,-1,-0.5,0.5,1,2,4,6]
colorstblack='Black'
parallels = np.arange(20.,60.,10.)
meridians = np.arange(60.,140.,20.)
m = Basemap(llcrnrlon=60.,llcrnrlat=20,\
    urcrnrlon=140.,urcrnrlat=60.,\
    projection='mill',resolution='l')
x, y = m(lons, lats)
print ('max, min sst_difference_summer ', np.max(sst_difference_summer), np.min(sst_difference_summer))
CS2 = m.contourf(x,y,sst_difference_summer,clevs,cmap=None,colors=colorst,extend='both')

m.drawcoastlines(linewidth=0.7,color='Gray')
m.drawcountries(linewidth=0.7,color='Gray')
m.drawparallels(parallels,labels=[1,0,0],linewidth=0.15,fontsize=8)
m.drawmeridians(meridians,labels=[0,0,0,1],linewidth=0.15,fontsize=8)


# ---- use axes_grid toolkit to make colorbar axes.

axloc = [0.02, 0.06, 0.96, 0.02]
cax = fig.add_axes(axloc)
cb = plt.colorbar(CS2,orientation='horizontal',cax=cax,\
    drawedges=True,ticks=clevs,format='%g')
cb.set_label('Difference (deg C)')

# ---- set plot title

plot_title = 'skint_difference_3panel.png'
fig.savefig(plot_title, dpi=300)
print ('saving plot to file = ',plot_title)
print ('Done!')



                                                                                                           ._compare_sst.py                                                                                    000775  000765  000024  00000000416 14016263145 014076  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2                                              ATTR                                   com.apple.lastuseddate#PS         	  com.macromates.selectionRange      
     com.macromates.visibleIndex  `    DX    1-13:56+60                                                                                                                                                                                                                                                  compare_sst.py                                                                                      000775  000765  000024  00000015074 14016263145 013667  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         from netCDF4 import Dataset
import numpy as np
from dateutils import daterange, datetohrs, dayofyear
import sys
import pygrib
import os
import os.path
from os import path
import numpy.ma as ma
import matplotlib.pyplot as plt
from matplotlib import rcParams
from mpl_toolkits.basemap import Basemap, interp
from mpl_toolkits.axes_grid1 import make_axes_locatable

rcParams['xtick.labelsize']='x-small'
rcParams['ytick.labelsize']='x-small'

date_list_nsst_00 = daterange('2019090100', '2020042300', 24)
date_list_nsst_06 = daterange('2019090106', '2020042306', 24)
date_list_nsst_12 = daterange('2019090112', '2020042312', 24)
date_list_nsst_18 = daterange('2019090118', '2020042318', 24)
ndates = len(date_list_nsst_00)
infile_2019 = 'nsst/sst.day.mean.2019.nc'
infile_2020 = 'nsst/sst.day.mean.2020.nc'
infile_ice_2019 = 'nsst/icec.day.mean.2019.nc'
infile_ice_2020 = 'nsst/icec.day.mean.2020.nc'
missing_list_yyyymmdd = []
missing_list_hour = []

# ---- read in the grib files of the NSST analyses.

for iset in range(4):
    if iset == 0:
        date_list = date_list_nsst_00
        cycle = '00'
    elif iset == 1:
        date_list = date_list_nsst_06
        cycle = '06'
    elif iset == 2:
        date_list = date_list_nsst_12
        cycle = '12'
    else:
        date_list = date_list_nsst_18
        cycle = '18'
      
    for idate, date in enumerate(date_list):
        
        yy = date[0:4]
        yyyymmdd = date[0:8]
        infile = 'nsst/'+date[0:8]+'_gdas.t'+cycle+'z.pgrb2.0p25.f000'
        #print (infile)
        fexist = path.exists(infile)
        if fexist == True:
            sstfile = pygrib.open(infile)
            grb = sstfile.select(shortName = 't')[0]
            sst = grb.values
            if idate == 0 and iset == 0:
                lats, lons = grb.latlons()
                #print (lats)
                #sys.exit()
                nlats, nlons = np.shape(lats)
                #print ('nlats, nlons = ', nlats, nlons)
                #sys.exit()
                nsst_save = ma.zeros((ndates,nlats,nlons), dtype=np.float32)
                OI_save = ma.zeros((ndates,nlats-1,nlons), dtype=np.float32)
                icec_save = ma.zeros((ndates,nlats-1,nlons), dtype=np.float32)
                nsst_mean_allcycles = np.zeros((4, nlats, nlons), dtype=np.float32)
            sstfile.close()
            nsst_save[idate,:,:] = sst[:,:]
            #print ('after writing to nsst_save')
            
            if yy == '2019':            
                yyyy = int(date[0:4])
                mm = int(date[4:6])
                dd = int(date[6:8])
                julday = dayofyear(yyyy,mm,dd) - 1 # so day 1 = 0 for python index
                nc = Dataset(infile_2019)
                sstin = nc.variables['sst'][julday,:,:]
                if idate == 0:
                    lon_OI = nc.variables['lon'][:]
                    lat_OI = nc.variables['lat'][:]
                nc.close()
                OI_save[idate,:,:] = sstin[:,:]
                
                nc = Dataset(infile_ice_2019)
                icein = nc.variables['icec'][julday,:,:]
                icec_save[idate,:,:] = icein[:,:]
                nc.close()
                
            else:
                yyyy = int(date[0:4])
                mm = int(date[4:6])
                dd = int(date[6:8])
                julday = dayofyear(yyyy,mm,dd) - 1 # so day 1 = 0 for python index
                nc = Dataset(infile_2020)
                sstin = nc.variables['sst'][julday,:,:]
                nc.close()
                OI_save[idate,:,:] = sstin[:,:]  
                
                nc = Dataset(infile_ice_2020)
                icein = nc.variables['icec'][julday,:,:]
                icec_save[idate,:,:] = icein[:,:]  
                nc.close()               
        else:
            missing_list_yyyymmdd.append(str(yyyymmdd))
            missing_list_hour.append(cycle)
            print ('unable to read ', infile)
            nsst_save[idate,:,:] = ma.masked
            OI_save[idate,:,:] = ma.masked
    
        #sys.exit()
    nsst_mean_allcycles[iset,:,:] = ma.mean(nsst_save,axis=0)
        
nsst_mean_overall = np.mean(nsst_mean_allcycles, axis=0)
sst_OI_mean = ma.mean(OI_save,axis=0)
icec_OI_mean = ma.mean(icec_save,axis=0)
sst_OI_mean = ma.masked_where(ma.logical_or(icec_OI_mean>0, sst_OI_mean.mask == True), sst_OI_mean)

nsst_mean_overall = np.flipud(nsst_mean_overall)  # need latitudes ascending order 
lons = np.flipud(lons)
lats = np.flipud(lats)
print ('OI max, min = ', ma.max(sst_OI_mean), ma.min(sst_OI_mean))
print ('nsst_mean_overall max, min = ', ma.max(nsst_mean_overall), ma.min(nsst_mean_overall))
# ---- interpolate the nsst data to the OI grid.

lon_1D = lons[0,:]
lat_1D = lats[:,0]
lon_OI_2D, lat_OI_2D = np.meshgrid(lon_OI, lat_OI)

nsst_mean_OIgrid = interp(nsst_mean_overall, lon_1D, lat_1D, \
    lon_OI_2D, lat_OI_2D, checkbounds=False, masked=False, order=1) - 273.15

# ---- plot difference

sst_difference = nsst_mean_OIgrid - sst_OI_mean

# ---- code to use for plotting

fig = plt.figure(figsize=(9.,5.6))
axloc = [0.07,0.11,0.9,0.82]
ax = fig.add_axes(axloc)
ax.set_title('Global differences, NSST - OI SST, 1 Sep 2019 to 22 April 2020')
colorst = ['#0000ff', '#6666ff', '#b2b2ff', '#e6e6ff', \
    'White', '#ffe6e6', '#ffb2b2', '#ff7373', '#ff0000']
clevs = [-2.0,-1.5,-1.0,-0.5,-0.25,0.25,0.5,1.0,1.5,2.0]
colorstblack='Black'
parallels = np.arange(-80.,90,20.)
meridians = np.arange(0.,360.,20.)
m = Basemap(llcrnrlon=lon_OI[0],llcrnrlat=-75,\
    urcrnrlon=lon_OI[-1],urcrnrlat=75.,\
    projection='mill',resolution='l')
x, y = m(lon_OI_2D, lat_OI_2D)

CS2 = m.contourf(x,y,sst_difference,clevs,cmap=None,colors=colorst,extend='both')

m.drawcoastlines(linewidth=0.5,color='Gray')
m.drawcountries(linewidth=0.3,color='Gray')
m.drawparallels(parallels,labels=[1,0,0],linewidth=0.15,fontsize=8)
m.drawmeridians(meridians,labels=[0,0,0,1],linewidth=0.15,fontsize=8)

# ---- use axes_grid toolkit to make colorbar axes.

#ax = fig.add_axes([0.,0.,1.,1.])
divider = make_axes_locatable(ax)
cax = divider.append_axes("bottom", size="3%", pad=0.35)
cb = plt.colorbar(CS2,orientation='horizontal',cax=cax,\
    drawedges=True,ticks=clevs,format='%g')
cb.set_label('Difference (deg C)')

# ---- set plot title

plot_title = 'NSST_vs_OI_difference.png'
fig.savefig(plot_title, dpi=300)
print ('saving plot to file = ',plot_title)
print ('Done!')

ouf = open('nsst/missing_dates_and_cycles.txt', 'w') 
for listitem1, listitem2 in zip(missing_list_yyyymmdd, missing_list_hour):
    line="'{0}', '{1}'\n".format(listitem1,listitem2)
    ouf.write(line)
ouf.close()
        
print ('wrote to nsst/missing_dates_and_cycles.txt')


                                                                                                                                                                                                                                                                                                                                                                                                                                                                    ._compare_sst_v2.py                                                                                 000775  000765  000024  00000000414 14016263145 014503  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2                                              ATTR                                   com.apple.lastuseddate#PS           com.macromates.selectionRange           com.macromates.visibleIndex  `    X    99:22002                                                                                                                                                                                                                                                    compare_sst_v2.py                                                                                   000775  000765  000024  00000013473 14016263145 014277  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         from netCDF4 import Dataset
import numpy as np
from dateutils import daterange, datetohrs, dayofyear
import sys
import pygrib
import os
import os.path
from os import path
import numpy.ma as ma
import matplotlib.pyplot as plt
from matplotlib import rcParams
from mpl_toolkits.basemap import Basemap, interp
from mpl_toolkits.axes_grid1 import make_axes_locatable

rcParams['xtick.labelsize']='x-small'
rcParams['ytick.labelsize']='x-small'

date_list_nsst_00 = daterange('2019090100', '2020042300', 24)
date_list_nsst_06 = daterange('2019090106', '2020042306', 24)
date_list_nsst_12 = daterange('2019090112', '2020042312', 24)
date_list_nsst_18 = daterange('2019090118', '2020042318', 24)
ndates = len(date_list_nsst_00)
infile_2019 = 'nsst/sst.day.mean.2019.nc'
infile_2020 = 'nsst/sst.day.mean.2020.nc'
infile_ice_2019 = 'nsst/icec.day.mean.2019.nc'
infile_ice_2020 = 'nsst/icec.day.mean.2020.nc'
missing_list_yyyymmdd = []
missing_list_hour = []

# ---- read in the grib files of the NSST analyses.

for iset in range(4):
    if iset == 0:
        date_list = date_list_nsst_00
        cycle = '00'
    elif iset == 1:
        date_list = date_list_nsst_06
        cycle = '06'
    elif iset == 2:
        date_list = date_list_nsst_12
        cycle = '12'
    else:
        date_list = date_list_nsst_18
        cycle = '18'
      
    for idate, date in enumerate(date_list):
        
        yy = date[0:4]
        yyyymmdd = date[0:8]
        infile = 'nsst/'+date[0:8]+'_gdas.t'+cycle+'z.pgrb2.0p25.f000'
        #print (infile)
        fexist = path.exists(infile)
        if fexist == True:
            
            sstfile = pygrib.open(infile)
            grb = sstfile.select(shortName = 't')[0]
            sst = grb.values
            if idate == 0 and iset == 0:
                lats, lons = grb.latlons()
                nlats, nlons = np.shape(lats)
                ones = np.ones((nlats-1, nlons), dtype=np.float32)
                zeros = np.ones((nlats-1, nlons), dtype=np.float32)
                nsst_save = ma.zeros((ndates,nlats,nlons), dtype=np.float32)
                OI_save = ma.zeros((ndates,nlats-1,nlons), dtype=np.float32)
                icec_save = ma.zeros((ndates,nlats-1,nlons), dtype=np.float32)
                nsst_mean_allcycles = np.zeros((4, nlats, nlons), dtype=np.float32)
            sstfile.close()
            nsst_save[idate,:,:] = sst[:,:]

            if iset == 0:
                infile = 'nsst/'+date[0:8]+'_oi_v2.nc'
                print (infile)
                nc = Dataset(infile_2019)
                if idate == 0:
                    lon_OI = nc.variables['lon'][:]
                    lat_OI = nc.variables['lat'][:]
                sstin = nc.variables['sst'][0,:,:]
                print ('min, max OI sst = ', np.min(sstin), np.max(sstin))
                #icein = np.where(sstin < 0.0, ones, zeros)
                nc.close()
                OI_save[idate,:,:] = sstin[:,:]
                icec_save[idate,:,:] = zeros[:,:] 
                   
        else:
            missing_list_yyyymmdd.append(str(yyyymmdd))
            missing_list_hour.append(cycle)
            print ('unable to read ', infile)
            nsst_save[idate,:,:] = ma.masked
            OI_save[idate,:,:] = ma.masked
            icec_save[idate,:,:] = ma.masked
    
        #sys.exit()
        
    nsst_mean_allcycles[iset,:,:] = ma.mean(nsst_save,axis=0)
        
nsst_mean_overall = np.mean(nsst_mean_allcycles, axis=0)
sst_OI_mean = ma.mean(OI_save,axis=0)
#icec_OI_mean = ma.mean(icec_save,axis=0)
#sst_OI_mean = ma.masked_where(ma.logical_or(icec_OI_mean>0, sst_OI_mean.mask == True), sst_OI_mean)

nsst_mean_overall = np.flipud(nsst_mean_overall)  # need latitudes ascending order 
lons = np.flipud(lons)
lats = np.flipud(lats)
print ('OI max, min = ', ma.max(sst_OI_mean), ma.min(sst_OI_mean))
print ('nsst_mean_overall max, min = ', ma.max(nsst_mean_overall), ma.min(nsst_mean_overall))

# ---- interpolate the nsst data to the OI grid.

lon_1D = lons[0,:]
lat_1D = lats[:,0]
lon_OI_2D, lat_OI_2D = np.meshgrid(lon_OI, lat_OI)

nsst_mean_OIgrid = interp(nsst_mean_overall, lon_1D, lat_1D, \
    lon_OI_2D, lat_OI_2D, checkbounds=False, masked=False, order=1) - 273.15

# ---- plot difference

sst_difference = nsst_mean_OIgrid - sst_OI_mean

# ---- code to use for plotting

fig = plt.figure(figsize=(9.,5.6))
axloc = [0.07,0.11,0.9,0.82]
ax = fig.add_axes(axloc)
ax.set_title('Global differences, NSST - OI SST, 1 Oct 2020 to 22 April 2020')
colorst = ['#0000ff', '#6666ff', '#b2b2ff', '#e6e6ff', \
    'White', '#ffe6e6', '#ffb2b2', '#ff7373', '#ff0000']
clevs = [-2.0,-1.5,-1.0,-0.5,-0.25,0.25,0.5,1.0,1.5,2.0]
colorstblack='Black'
parallels = np.arange(-80.,90,20.)
meridians = np.arange(0.,360.,20.)
m = Basemap(llcrnrlon=lon_OI[0],llcrnrlat=-75,\
    urcrnrlon=lon_OI[-1],urcrnrlat=75.,\
    projection='mill',resolution='l')
x, y = m(lon_OI_2D, lat_OI_2D)

CS2 = m.contourf(x,y,sst_difference,clevs,cmap=None,colors=colorst,extend='both')

m.drawcoastlines(linewidth=0.5,color='Gray')
m.drawcountries(linewidth=0.3,color='Gray')
m.drawparallels(parallels,labels=[1,0,0],linewidth=0.15,fontsize=8)
m.drawmeridians(meridians,labels=[0,0,0,1],linewidth=0.15,fontsize=8)

# ---- use axes_grid toolkit to make colorbar axes.

divider = make_axes_locatable(ax)
cax = divider.append_axes("bottom", size="3%", pad=0.35)
cb = plt.colorbar(CS2,orientation='horizontal',cax=cax,\
    drawedges=True,ticks=clevs,format='%g')
cb.set_label('Difference (deg C)')

# ---- set plot title

plot_title = 'NSST_vs_OI_difference.png'
fig.savefig(plot_title, dpi=300)
print ('saving plot to file = ',plot_title)
print ('Done!')

ouf = open('nsst/missing_dates_and_cycles.txt', 'w') 
for listitem1, listitem2 in zip(missing_list_yyyymmdd, missing_list_hour):
    line="'{0}', '{1}'\n".format(listitem1,listitem2)
    ouf.write(line)
ouf.close()
        
print ('wrote to nsst/missing_dates_and_cycles.txt')


                                                                                                                                                                                                     ._compute_closest_member.py                                                                         000644  000765  000024  00000000454 14073346542 016321  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2        ,                                      ATTR      ,      0                     *  $com.apple.metadata:_kMDItemUserTags    &     com.macromates.selectionRange      +     com.macromates.visibleIndex  bplist00                            	17:180                                                                                                                                                                                                                    compute_closest_member.py                                                                           000644  000765  000024  00000005212 14073346542 016101  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         def compute_closest_member(nmembers_x25, ny, nx, nfour, \
    thresh_low, thresh_mod, thresh_high, ensmean, precip_ens, \
    precip_anal, closest_histogram):
    
    import numpy as np
    
    # --- determine if there is any corrupt ensemble members, and if so, don't 
    #     tally stats for this day.
    
    rm = np.min(precip_ens)
    rma = np.min(precip_anal)
    if rm < 0.0 or rma < 0.0:
        istat = -1
        print ('rm, rma = ', rm, rma)
        print ('identified bad forecast or analysis data, so skip this day.')
    else:
        istat = 0
        for ix in range(nx):
            for jy in range(ny):
            
                # --- determine which member is closest to the analyzed and
                #       how many members have values lower than or equal to analyzed
                
                a = precip_anal[jy,ix]
                iclosest = 1
                rclosest = 9999.
                eclosest = 0.0
                for imem in range(nmembers_x25):
                    e = precip_ens[imem,jy,ix]
                    diff = np.abs(e-a)
                    if diff < rclosest and e > -99:
                        rclosest = diff
                        eclosest = e
                        iclosest = imem

                ibelow = 0
                iequal = 0
                for imem in range(nmembers_x25):

                    e = precip_ens[imem,jy,ix]
                    if imem == iclosest:
                        continue
                    else:
                        if e < eclosest: ibelow = ibelow + 1
                        if e == eclosest: iequal = iequal + 1
                
			        # --- determine the closest_histogram rank 
				
                    if iequal == 0:
                        iclosest = ibelow + 1			
                    else: #with equals, randomly assign rank
                        r = np.random.uniform()*iequal
                        ir = int(r)
                        if ir > iequal: ir = iequal
                        iclosest = ibelow + ir + 1
                
                    em = ensmean[jy,ix]
                    if em < thresh_low:
                        closest_histogram[iclosest,0] = closest_histogram[iclosest,0] + 1
                    elif em >= thresh_low and em < thresh_mod:
                        closest_histogram[iclosest,1] = closest_histogram[iclosest,1] + 1
                    elif em >= thresh_mod and em < thresh_high:
                        closest_histogram[iclosest,2] = closest_histogram[iclosest,2] + 1
                    else:
                        closest_histogram[iclosest,3] = closest_histogram[iclosest,3] + 1

    return closest_histogram, istat
     
                                                                                                                                                                                                                                                                                                                                                                                      ._compute_mean_precip_and_plot.py                                                                   000775  000765  000024  00000000416 14030401656 017452  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2                                              ATTR                                   com.apple.lastuseddate#PS           com.macromates.selectionRange      
     com.macromates.visibleIndex  `    ژY    148:322760                                                                                                                                                                                                                                                  compute_mean_precip_and_plot.py                                                                     000775  000765  000024  00000011645 14030401656 017243  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         """
compute_mean_precip_and_plot.py cmonth cleadb cleade

"""

import os, sys
from datetime import datetime
import numpy as np
import numpy.ma as ma
import _pickle as cPickle
from netCDF4 import Dataset
import matplotlib.pyplot as plt
from matplotlib import rcParams
from mpl_toolkits.basemap import Basemap, interp
from mpl_toolkits.axes_grid1 import make_axes_locatable
import cartopy
import cartopy.crs as ccrs
import cartopy.feature as cf
import _pickle as cPickle
import scipy.stats as stats
import cartopy.io.shapereader as shpreader

rcParams['xtick.labelsize']='medium'
rcParams['ytick.labelsize']='medium'
rcParams['legend.fontsize']='large'


cyyyymmddhh = sys.argv[1] # 
cleadb = sys.argv[2] # 018 not 18
cleade = sys.argv[3] # 018 not 18
ileadb = int(cleadb)
ileade = int(cleade)
cyear = cyyyymmddhh[0:4]
cmonth = cyyyymmddhh[4:6]
imonth = int(cmonth)-1
cmonths = ['Jan','Feb','Mar','Apr','May','Jun',\
    'Jul','Aug','Sep','Oct','Nov','Dec']
ccmonth = cmonths[imonth]
master_directory = '/Volumes/NBM/conus_gefsv12/qmapped/'

reader = shpreader.Reader('countyl010g_shp_nt00964/countyl010g.shp')
counties = list(reader.geometries())
COUNTIES = cf.ShapelyFeature(counties, ccrs.PlateCarree())

reader = shpreader.Reader('statesl010g/statesp010g.shp')
states = list(reader.geometries())
STATES = cf.ShapelyFeature(states, ccrs.PlateCarree())


for ilead in range(ileadb, ileade+1, 6):

    if ilead < 10: 
        clead = '00'+str(ilead)
    elif ilead >= 10 and ilead < 100:
        clead = '0'+str(ilead)
    else:
        clead = str(ilead)

    # ---- read ensemble data

    infile = master_directory + cyyyymmddhh+'_use99_lead='+clead+'.cPick'
    print ('reading raw and qmapped ens from ', infile) 
    inf = open(infile,'rb')
    precip_ens_raw_ndfd = cPickle.load(inf)
    precip_ens_qmapped_ndfd = cPickle.load(inf)
    lons_ndfd = cPickle.load(inf)
    lats_ndfd = cPickle.load(inf)
    inf.close()
    
    if ilead == ileadb:
        precip_ens_raw = precip_ens_raw_ndfd
        precip_ens_qmapped = precip_ens_qmapped_ndfd
    else:
        precip_ens_raw = precip_ens_raw + precip_ens_raw_ndfd
        precip_ens_qmapped =  precip_ens_qmapped + precip_ens_qmapped_ndfd


precip_raw_mean = np.mean(precip_ens_raw, axis=0)
precip_qmapped_mean = np.mean(precip_ens_qmapped, axis=0)

# ---- plot the stamp map

clevs = [0, 1, 3, 5, 10, 15, 20, 25, 30, 40, 50, 65, 80, 100.0]
colorst = ['White','#E4FFFF','#C4E8FF','#8FB3FF','#D8F9D8',\
    '#A6ECA6','#42F742','Yellow','Gold','Orange','#FCD5D9','#F6A3AE',\
    '#FA5257','Orchid','#AD8ADB','#A449FF','LightGray']

# ---- plot the storm total ens mean over the Rockies

#latb = 33.
#late = 47.
#lonb = -117.
#lone = -101.9

latb = 36.7 # Colorado domain
late = 41.2 # Colorado domain 4.5
lonb = -110. # Colorado domain
lone = -100.9 # Colorado domain 9.1

xdim = 6.
ydim = 4.8
drawcoasts = False

proj = ccrs.LambertConformal(\
    central_latitude = (latb+late)/2.,
    central_longitude = (lonb+lone)/2,
    standard_parallels = (latb, late))

fig = plt.figure(figsize=(xdim, ydim))
axloc = [0.02,0.14,0.96,0.78]
ax = plt.axes(axloc,projection = proj)
ax.set_extent([lonb,lone,latb,late])
ax.coastlines(resolution='50m',lw=0.5)
ax.add_feature(COUNTIES, facecolor='none', edgecolor='gray',lw=0.13)
ax.add_feature(cf.BORDERS,lw=0.5)
ax.add_feature(STATES, facecolor='none', edgecolor='black',lw=0.5)

title = 'Raw ensemble-mean forecast storm-total meltwater'
ax.set_title(title, fontsize=15,color='Black')
CS = ax.contourf(lons_ndfd, lats_ndfd, \
    precip_raw_mean, clevs, cmap=None, colors=colorst, \
    extend='both', transform=ccrs.PlateCarree())

cax = fig.add_axes([0.02,0.11,0.96,0.02])
cb = plt.colorbar(CS,orientation='horizontal',cax=cax,\
    drawedges=True,ticks=clevs,format='%g')
cb.ax.tick_params(labelsize=9)
cb.set_label('precipitation amount (mm)',\
    fontsize=11)

plot_title = 'storm_total_raw_Colorado.png'
fig.savefig(plot_title, dpi=400)
plt.close()
print ('saving plot to file = ',plot_title)



fig = plt.figure(figsize=(xdim, ydim))
axloc = [0.02,0.14,0.96,0.78]
ax = plt.axes(axloc,projection = proj)
ax.set_extent([lonb,lone,latb,late])
ax.coastlines(resolution='50m',lw=0.5)
ax.add_feature(COUNTIES, facecolor='none', edgecolor='gray',lw=0.13)
ax.add_feature(cf.BORDERS,lw=0.5)
ax.add_feature(STATES, facecolor='none', edgecolor='black',lw=0.5)

title = 'Statistically modified forecast storm-total meltwater'
ax.set_title(title, fontsize=15,color='Black')
CS = ax.contourf(lons_ndfd, lats_ndfd, \
    precip_qmapped_mean, clevs, cmap=None, colors=colorst, \
    extend='both', transform=ccrs.PlateCarree())

cax = fig.add_axes([0.02,0.11,0.96,0.02])
cb = plt.colorbar(CS,orientation='horizontal',cax=cax,\
    drawedges=True,ticks=clevs,format='%g')
cb.ax.tick_params(labelsize=9)
cb.set_label('precipitation amount (mm)',\
    fontsize=11)

plot_title = 'storm_total_qmapped_Colorado.png'
fig.savefig(plot_title, dpi=400)
plt.close()
print ('saving plot to file = ',plot_title)

              
                










                                                                                           ._contour_localization_error.py                                                                     000775  000765  000024  00000000413 14016263145 017226  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2                                              ATTR                                   com.apple.lastuseddate#PS           com.macromates.selectionRange      	     com.macromates.visibleIndex  `    2[    27:3481                                                                                                                                                                                                                                                     contour_localization_error.py                                                                       000775  000765  000024  00000005464 14016263145 017024  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         import csv
import sys
import numpy as np
from datetime import datetime
import os
import matplotlib.pyplot as plt
from matplotlib import rcParams 
rcParams['xtick.labelsize']='x-small'
rcParams['ytick.labelsize']='x-small'

clead = sys.argv[1]
warm_or_cold = sys.argv[2]

cpath_errorstats = '/Volumes/Backup Plus/python/fcst_stats/'

rmse_array = np.zeros((4,7), dtype=np.float32)
bias_array = np.zeros((4,7), dtype=np.float32)

# ---- declare arrays

random_localizations = np.array([200.0, 400.0, 600.0, 800.0])
bias_localizations = np.array([400.0, 600.0, 800.0, 1000.0, 1200.0, 1400.0, 1600.0])
for ktrr, efold_random in enumerate(random_localizations):
    strr = str(efold_random)
    for ktrb, efold_bias in enumerate(bias_localizations):
        strb = str(efold_bias)
        if warm_or_cold == 'cold':
            statsfile = cpath_errorstats + '2018_KF_forecast_errorstats_'+\
                clead+'h_flocal'+strr+'_blocal'+strb+'.txt'
        else:
            statsfile = cpath_errorstats + '2018_KF_forecast_errorstats_'+\
                clead+'h_flocal'+strr+'_blocal'+strb+'_'+warm_or_cold+'.txt'
        values = np.loadtxt(statsfile)
        rmse_array[ktrr,ktrb] = values[2]
        bias_array[ktrr,ktrb] = values[0]

# ----- make plots

print ('min, max rmse = ', np.min(rmse_array), np.max(rmse_array))
print ('min, max bias = ', np.min(bias_array), np.max(bias_array))

fig = plt.figure(figsize=(6.5,3.4))

#clevels = [-0.5,-0.4,-0.3,-0.2,-0.1,0.1,0.2,0.3,0.4,0.5]
colorst = ['#0000ff', '#6666ff', '#b2b2ff', '#e6e6ff', \
    'White', '#ffe6e6', '#ffb2b2', '#ff7373', '#ff0000'] 
fig.suptitle(clead+'-h forecasts, '+warm_or_cold+' season',fontsize=14)    
    
a1 = fig.add_axes([.11,.17,.37,.67])
a1.set_title('(a) RMSE',fontsize=13)
cf = a1.contour(bias_localizations,random_localizations,rmse_array,\
    colors = 'Black', linewidths=0.8)
a1.clabel(cf, inline=1, fontsize=9)
a1.set_ylabel('Random error localization\nlength scale (km)', fontsize=11)
a1.set_xlabel('Bias estimate localization\nlength scale (km)', fontsize=11)

a1 = fig.add_axes([.61,.17,.37,.67])
a1.set_title('(b) Bias',fontsize=13)
cf = a1.contour(bias_localizations,random_localizations,bias_array,\
    colors = 'Black', linewidths=0.8)
a1.clabel(cf, inline=1, fontsize=9)
a1.set_ylabel('Random error localization\nlength scale (km)', fontsize=11)
a1.set_xlabel('Bias estimate localization\nlength scale (km)', fontsize=11)

#cax = fig.add_axes([0.1,0.05,0.8,0.013])
#cb = fig.colorbar(cf, ticks=clevels,cax=cax, \
#    orientation='horizontal',drawedges=True,format='%g') # im1 "bottom", size="3%",
#   pad="1%",
#cb.set_label('Bias (deg C)',fontsize=6)
#cb.ax.tick_params(labelsize=6)

plot_title = 'localization_rmse_bias_2018'+warm_or_cold+'_'+clead+'h.pdf'
print ('saving plot to file = ',plot_title)
plt.savefig(plot_title)
print ('Plot done')



        
        
                                                                                                                                                                                                            ._control_CDF_fitting_ccpa_precip_spline.py                                                         000775  000765  000024  00000000406 14072637106 021344  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2                                              ATTR                                   com.apple.lastuseddate#PS           com.macromates.selectionRange           com.macromates.visibleIndex  `    h\    70                                                                                                                                                                                                                                                          control_CDF_fitting_ccpa_precip_spline.py                                                           000775  000765  000024  00000001220 14072637106 021122  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         # control_CDF_fitting_ccpa_precip_spline.py 
import os, sys
#from CDF_fitting_ccpa_precip_spline_v2 import CDF_fitting_ccpa_precip_spline_v2
from CDF_fitting_ccpa_precip_spline_flexiknot import CDF_fitting_ccpa_precip_spline_flexiknot

cmonths = ['01','02','03','04','05','06','07','08','09','10','11','12']
cend_hours = ['00','06','12','18']

#cmonths = ['10']
#cend_hours = ['00']

#cmonths = ['03']
#cend_hours = ['00','06','12','18']

for cmonth in cmonths:
    for cend_hour in cend_hours:
        #try:
        istat = CDF_fitting_ccpa_precip_spline_flexiknot(cmonth, cend_hour)
        #except:
        #    print ('Didnt work! ', cmonth, cend_hour)                                                                                                                                                                                                                                                                                                                                                                                ._control_CDF_fitting_ccpa_precip_spline_00UTC.py                                                   000755  000765  000024  00000000411 14072662557 022221  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2        	                                      ATTR      	                             com.apple.lastuseddate#PS           com.macromates.selectionRange           com.macromates.visibleIndex  `    h\    7:190                                                                                                                                                                                                                                                       control_CDF_fitting_ccpa_precip_spline_00UTC.py                                                     000755  000765  000024  00000001201 14072662557 022002  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         # control_CDF_fitting_ccpa_precip_spline.py 
import os, sys
#from CDF_fitting_ccpa_precip_spline_v2 import CDF_fitting_ccpa_precip_spline_v2
from CDF_fitting_ccpa_precip_spline_flexiknot import CDF_fitting_ccpa_precip_spline_flexiknot

cmonths = ['01','02','03','04','05','06','07','08','09','10','11','12']
cend_hours = ['00']

#cmonths = ['10']
#cend_hours = ['00']

#cmonths = ['03']
#cend_hours = ['00','06','12','18']

for cmonth in cmonths:
    for cend_hour in cend_hours:
        #try:
        istat = CDF_fitting_ccpa_precip_spline_flexiknot(cmonth, cend_hour)
        #except:
        #    print ('Didnt work! ', cmonth, cend_hour)                                                                                                                                                                                                                                                                                                                                                                                               ._control_CDF_fitting_ccpa_precip_spline_06UTC.py                                                   000755  000765  000024  00000000411 14072662601 022215  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2        	                                      ATTR      	                             com.apple.lastuseddate#PS           com.macromates.selectionRange           com.macromates.visibleIndex  `    h\    7:190                                                                                                                                                                                                                                                       control_CDF_fitting_ccpa_precip_spline_06UTC.py                                                     000755  000765  000024  00000001201 14072662601 021776  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         # control_CDF_fitting_ccpa_precip_spline.py 
import os, sys
#from CDF_fitting_ccpa_precip_spline_v2 import CDF_fitting_ccpa_precip_spline_v2
from CDF_fitting_ccpa_precip_spline_flexiknot import CDF_fitting_ccpa_precip_spline_flexiknot

cmonths = ['01','02','03','04','05','06','07','08','09','10','11','12']
cend_hours = ['06']

#cmonths = ['10']
#cend_hours = ['00']

#cmonths = ['03']
#cend_hours = ['00','06','12','18']

for cmonth in cmonths:
    for cend_hour in cend_hours:
        #try:
        istat = CDF_fitting_ccpa_precip_spline_flexiknot(cmonth, cend_hour)
        #except:
        #    print ('Didnt work! ', cmonth, cend_hour)                                                                                                                                                                                                                                                                                                                                                                                               ._control_CDF_fitting_ccpa_precip_spline_12UTC.py                                                   000755  000765  000024  00000000411 14072662622 022215  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2        	                                      ATTR      	                             com.apple.lastuseddate#PS           com.macromates.selectionRange           com.macromates.visibleIndex  `    h\    7:190                                                                                                                                                                                                                                                       control_CDF_fitting_ccpa_precip_spline_12UTC.py                                                     000755  000765  000024  00000001201 14072662622 021776  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         # control_CDF_fitting_ccpa_precip_spline.py 
import os, sys
#from CDF_fitting_ccpa_precip_spline_v2 import CDF_fitting_ccpa_precip_spline_v2
from CDF_fitting_ccpa_precip_spline_flexiknot import CDF_fitting_ccpa_precip_spline_flexiknot

cmonths = ['01','02','03','04','05','06','07','08','09','10','11','12']
cend_hours = ['12']

#cmonths = ['10']
#cend_hours = ['00']

#cmonths = ['03']
#cend_hours = ['00','06','12','18']

for cmonth in cmonths:
    for cend_hour in cend_hours:
        #try:
        istat = CDF_fitting_ccpa_precip_spline_flexiknot(cmonth, cend_hour)
        #except:
        #    print ('Didnt work! ', cmonth, cend_hour)                                                                                                                                                                                                                                                                                                                                                                                               ._control_CDF_fitting_ccpa_precip_spline_18UTC.py                                                   000755  000765  000024  00000000411 14072662636 022230  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2        	                                      ATTR      	                             com.apple.lastuseddate#PS           com.macromates.selectionRange           com.macromates.visibleIndex  `    h\    7:150                                                                                                                                                                                                                                                       control_CDF_fitting_ccpa_precip_spline_18UTC.py                                                     000755  000765  000024  00000001201 14072662636 022011  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         # control_CDF_fitting_ccpa_precip_spline.py 
import os, sys
#from CDF_fitting_ccpa_precip_spline_v2 import CDF_fitting_ccpa_precip_spline_v2
from CDF_fitting_ccpa_precip_spline_flexiknot import CDF_fitting_ccpa_precip_spline_flexiknot

cmonths = ['01','02','03','04','05','06','07','08','09','10','11','12']
cend_hours = ['18']

#cmonths = ['10']
#cend_hours = ['00']

#cmonths = ['03']
#cend_hours = ['00','06','12','18']

for cmonth in cmonths:
    for cend_hour in cend_hours:
        #try:
        istat = CDF_fitting_ccpa_precip_spline_flexiknot(cmonth, cend_hour)
        #except:
        #    print ('Didnt work! ', cmonth, cend_hour)                                                                                                                                                                                                                                                                                                                                                                                               ._control_CDF_fitting_mswep_precip_spline.py                                                        000775  000765  000024  00000000410 14021765231 021557  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2                                              ATTR                                   com.apple.lastuseddate#PS           com.macromates.selectionRange           com.macromates.visibleIndex  `    v^    5:20                                                                                                                                                                                                                                                        control_CDF_fitting_mswep_precip_spline.py                                                          000775  000765  000024  00000001037 14021765231 021350  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         # control_CDF_fitting_mswep_precip_spline.py 
import os, sys
from CDF_fitting_mswep_precip_spline_v2 import CDF_fitting_mswep_precip_spline_v2

#cmonths = ['01','02','03','04','05','06','07','08','09','10','11','12']
cend_hours = ['00','06','12','18']

#cmonths = ['02']
#cend_hours = ['06']

cmonths = ['03']
#cend_hours = ['18']

for cmonth in cmonths:
    for cend_hour in cend_hours:
        #try:
        istat = CDF_fitting_mswep_precip_spline_v2(cmonth, cend_hour)
        #except:
        #    print ('Didnt work! ', cmonth, cend_hour)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 ._control_CDF_spline_fitting_forecast_mean_precip.py                                                000775  000765  000024  00000000411 14016263146 023235  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2        	                                      ATTR      	                             com.apple.lastuseddate#PS           com.macromates.selectionRange           com.macromates.visibleIndex  `    n._    8:120                                                                                                                                                                                                                                                       control_CDF_spline_fitting_forecast_mean_precip.py                                                  000775  000765  000024  00000001456 14016263146 023032  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         # control_CDF_spline_fitting_forecast_mean_precip.py 

import os, sys
from CDF_spline_fitting_forecast_precip_mean import CDF_spline_fitting_forecast_precip_mean

#cmonths = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', \
#    'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
cmonths = ['Apr', 'May', 'Jun', \
    'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
cleads = ['06','12','18','24','30','36', \
    '42','48','54','60','66', \
    '72','78','84','90','96', \
    '102','108','114','120']
    
#cmonths = ['Jan']
#cleads = ['06']
        
cdomain = 'conus'

for cmonth in cmonths:
    for clead in cleads:
        #try:
        print (cmonth, clead)
        istat = CDF_spline_fitting_forecast_precip_mean(\
            cmonth, clead, cdomain)
        #except:
        #    print ('Didnt work! ', cmonth, clead, cdomain)
                                                                                                                                                                                                                  ._control_CDF_spline_fitting_forecast_precip.py                                                     000775  000765  000024  00000000465 14072071065 022245  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2       5                                      ATTR      5                           com.apple.lastuseddate#PS      (   
  com.macromates.bookmarks   2     com.macromates.selectionRange      4     com.macromates.visibleIndex  `    _    ( '14:5' )160                                                                                                                                                                                                           control_CDF_spline_fitting_forecast_precip.py                                                       000775  000765  000024  00000001640 14072071065 022024  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         # control_CDF_spline_fitting_forecast_precip.py 
import os, sys
from CDF_spline_fitting_forecast_precip_v4 import CDF_spline_fitting_forecast_precip_v4

#cmonths = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', \
#    'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] 
    
cmonths = ['Oct'] 

#cleads = ['006','012','018','024','030', '036','042','048','054','060', \
#          '066','072','078','084','090', '096','102','108','114','120', \
#          '126','132','138','144','150', '156','162','168','174','180', \
#          '186','192','198','204','210', '216','222','228','234','240']
    
#cmonths = ['Jan']
cleads = ['024']
#cleads = ['018']
        
cdomain = 'conus'

for cmonth in cmonths:
    for clead in cleads:
        #try:
        print (cmonth, clead)
        istat = CDF_spline_fitting_forecast_precip_v4(\
            cmonth, clead, cdomain)
        #except:
        #    print ('Didnt work! ', cmonth, clead, cdomain)
                                                                                                ._control_CDF_spline_fitting_forecast_precip_AMJ.py                                                 000755  000765  000024  00000000465 14073342534 022735  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2       5                                      ATTR      5                           com.apple.lastuseddate#PS      (   
  com.macromates.bookmarks   2     com.macromates.selectionRange      4     com.macromates.visibleIndex  `    _    ( '14:5' )130                                                                                                                                                                                                           control_CDF_spline_fitting_forecast_precip_AMJ.py                                                   000755  000765  000024  00000001653 14073342534 022520  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         # control_CDF_spline_fitting_forecast_precip.py 
import os, sys
from CDF_spline_fitting_forecast_precip_v4 import CDF_spline_fitting_forecast_precip_v4

#cmonths = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', \
#    'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] 
    
cmonths = ['Apr', 'May', 'Jun'] 

cleads = ['006','012','018','024','030', '036','042','048','054','060', \
          '066','072','078','084','090', '096','102','108','114','120', \
          '126','132','138','144','150', '156','162','168','174','180', \
          '186','192','198','204','210', '216','222','228','234','240']
    
#cmonths = ['Jan']
#cleads = ['024']
#cleads = ['018']
        
cdomain = 'conus'

for cmonth in cmonths:
    for clead in cleads:
        #try:
        print (cmonth, clead)
        istat = CDF_spline_fitting_forecast_precip_v4(\
            cmonth, clead, cdomain)
        #except:
        #    print ('Didnt work! ', cmonth, clead, cdomain)
                                                                                     ._control_CDF_spline_fitting_forecast_precip_JAS.py                                                 000755  000765  000024  00000000465 14073342636 022746  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2       5                                      ATTR      5                           com.apple.lastuseddate#PS      (   
  com.macromates.bookmarks   2     com.macromates.selectionRange      4     com.macromates.visibleIndex  `    _    ( '14:5' )130                                                                                                                                                                                                           control_CDF_spline_fitting_forecast_precip_JAS.py                                                   000755  000765  000024  00000001653 14073342636 022531  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         # control_CDF_spline_fitting_forecast_precip.py 
import os, sys
from CDF_spline_fitting_forecast_precip_v4 import CDF_spline_fitting_forecast_precip_v4

#cmonths = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', \
#    'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] 
    
cmonths = ['Jul', 'Aug', 'Sep'] 

cleads = ['006','012','018','024','030', '036','042','048','054','060', \
          '066','072','078','084','090', '096','102','108','114','120', \
          '126','132','138','144','150', '156','162','168','174','180', \
          '186','192','198','204','210', '216','222','228','234','240']
    
#cmonths = ['Jan']
#cleads = ['024']
#cleads = ['018']
        
cdomain = 'conus'

for cmonth in cmonths:
    for clead in cleads:
        #try:
        print (cmonth, clead)
        istat = CDF_spline_fitting_forecast_precip_v4(\
            cmonth, clead, cdomain)
        #except:
        #    print ('Didnt work! ', cmonth, clead, cdomain)
                                                                                     ._control_CDF_spline_fitting_forecast_precip_JFM.py                                                 000755  000765  000024  00000000467 14073342462 022744  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2       7                                      ATTR      7                           com.apple.lastuseddate#PS      (   
  com.macromates.bookmarks   2     com.macromates.selectionRange      6     com.macromates.visibleIndex  `    _    ( '14:5' )8:310                                                                                                                                                                                                         control_CDF_spline_fitting_forecast_precip_JFM.py                                                   000755  000765  000024  00000001653 14073342462 022525  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         # control_CDF_spline_fitting_forecast_precip.py 
import os, sys
from CDF_spline_fitting_forecast_precip_v4 import CDF_spline_fitting_forecast_precip_v4

#cmonths = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', \
#    'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] 
    
cmonths = ['Jan', 'Feb', 'Mar'] 

cleads = ['006','012','018','024','030', '036','042','048','054','060', \
          '066','072','078','084','090', '096','102','108','114','120', \
          '126','132','138','144','150', '156','162','168','174','180', \
          '186','192','198','204','210', '216','222','228','234','240']
    
#cmonths = ['Jan']
#cleads = ['024']
#cleads = ['018']
        
cdomain = 'conus'

for cmonth in cmonths:
    for clead in cleads:
        #try:
        print (cmonth, clead)
        istat = CDF_spline_fitting_forecast_precip_v4(\
            cmonth, clead, cdomain)
        #except:
        #    print ('Didnt work! ', cmonth, clead, cdomain)
                                                                                     ._control_CDF_spline_fitting_forecast_precip_OND.py                                                 000755  000765  000024  00000000467 14073342677 022760  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2       7                                      ATTR      7                           com.apple.lastuseddate#PS      (   
  com.macromates.bookmarks   2     com.macromates.selectionRange      6     com.macromates.visibleIndex  `    _    ( '14:5' )16:20                                                                                                                                                                                                         control_CDF_spline_fitting_forecast_precip_OND.py                                                   000755  000765  000024  00000001653 14073342677 022541  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         # control_CDF_spline_fitting_forecast_precip.py 
import os, sys
from CDF_spline_fitting_forecast_precip_v4 import CDF_spline_fitting_forecast_precip_v4

#cmonths = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', \
#    'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] 
    
cmonths = ['Oct', 'Nov', 'Dec'] 

cleads = ['006','012','018','024','030', '036','042','048','054','060', \
          '066','072','078','084','090', '096','102','108','114','120', \
          '126','132','138','144','150', '156','162','168','174','180', \
          '186','192','198','204','210', '216','222','228','234','240']
    
#cmonths = ['Jan']
#cleads = ['024']
#cleads = ['018']
        
cdomain = 'conus'

for cmonth in cmonths:
    for clead in cleads:
        #try:
        print (cmonth, clead)
        istat = CDF_spline_fitting_forecast_precip_v4(\
            cmonth, clead, cdomain)
        #except:
        #    print ('Didnt work! ', cmonth, clead, cdomain)
                                                                                     ._control_KF_biascorr.py                                                                            000775  000765  000024  00000000416 14016263146 015504  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2                                              ATTR                                   com.apple.lastuseddate#PS           com.macromates.selectionRange      
     com.macromates.visibleIndex  `    `    140:164101                                                                                                                                                                                                                                                  control_KF_biascorr.py                                                                              000775  000765  000024  00000021252 14016263146 015270  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         """
control_KF_biascorr.py

controls sequential execution of a process for the potential
refinement of location- and date-dependent estimates of 2-m
temperature forecast bias using a more formal Kalman-filter
extension of the decaying-average bias correction.

The data used in this process are deterministic forecast data 
from the ECMWF prediction system in a grid encompassing the 
CONUS (-125 to -60 W, 20 to 50 N, 1/2-degree grid spacing).
The 2-m temperature analysis produced in the ERA5 analysis
system on the same grid is used as the verification and 
training data.

The sequential process follows these steps:

1.  Apply a simple decaying-average bias correction independently, 
grid point by grid point.   Actually, the formulation of the 
decaying average bias correction is expressed as a Kalman-filter 
bias correction, strongly inspired by Dick Dee's article,

https://rmets.onlinelibrary.wiley.com/doi/epdf/10.1256/qj.05.137

and in particular eqs. (37) - (39) therein.  However, the B_x and
B_beta matrices therein are diagonal, and thus effectively 
become the decaying-average bias correction.  Validate these 
forecasts.

2.  With the bias estimates produced, correct the time series 
of forecasts, and use this corrected time series of forecasts
to produce a covariance matrix of the bias-corrected forecast 
errors.   From data assimilation experience, e.g., 
https://psl.noaa.gov/people/tom.hamill/covlocal_mwr.pdf
the localization of the matrix improves its accuracy, so 
apply localization.   Return the localized forecast error
covariance model.

3. Recompute the Kalman filter bias correction with a
spatially correlated forecast-error covariance matrix from 
step 2, but still independent estimates of the 
bias-correction coefficients. Validate these forecasts.

4.  Estimate the error covariance matrix of the bias
correction coefficients from the time series of bias estimates
at every grid point.   As with forecast-error covariances,
apply localization and return the localized bias-correction
error covariance.

5.  Apply a full Kalman-filter bias correction with the
more carefully validated forecast- and bias-correction error
covariances. Validate these forecasts.

"""

import pygrib
from dateutils import daterange, dateshift, dayofyear, splitdate
import os, sys
from datetime import datetime
import _pickle as cPickle
from read_reanalysis_timeseries import read_reanalysis_timeseries
from read_forecast_timeseries import read_forecast_timeseries
from forecast_error_covariance import forecast_error_covariance
from verify_forecasts import verify_forecasts
from Bxmodel import Bxmodel
from Bbeta_model import Bbeta_model
from Kalman_filter_biascorrection import Kalman_filter_biascorrection
from numba import jit
import numpy as np

# --------------------------------------------------------------
def form_diagonal_matrix(npts, vary):
    B = vary*np.identity(npts, dtype=np.float64) 
    return B
# --------------------------------------------------------------

def set_error_variances(clead):
    if clead == '24':
        fcst_err_var = 1.0
        b_beta_var = 0.16  # will yield approx 0.08 alpha coefficient if R=Bx=1.0
    else:
        print ('not ready to test this forecast lead time yet.')
        sys.exit()
    return fcst_err_var, b_beta_var

# --------------------------------------------------------------

def initialize_date_lists(warm_or_cold, cyear, clead):

    if warm_or_cold == 'warm' :
        start_date = cyear+'040100'
        end_date = cyear+'093000'
        date_list_anal = daterange(start_date, end_date, 24)
        ndates = len(date_list_anal)
        date_list_forecast = []
        for i in range(ndates):
            date_list_forecast.append(dateshift(date_list_anal[i], int(clead)))
    else:
        
        start_date = cyear+'010100'
        end_date = cyear+'033100'
        date_list_anal1 = daterange(start_date, end_date, 24)
        ndates = len(date_list_anal1)
        date_list_forecast1 = []
        for i in range(ndates):
            date_list_forecast1.append(dateshift(date_list_anal1[i], int(clead)))
            
        start_date = cyear+'100100'
        end_date = cyear+'123100'
        date_list_anal2 = daterange(start_date, end_date, 24)
        ndates = len(date_list_anal2)
        date_list_forecast2 = []
        for i in range(ndates):
            date_list_forecast2.append(dateshift(date_list_anal2[i], int(clead)))
            
        date_list_anal = date_list_anal2 + date_list_anal1
        date_list_forecast = date_list_forecast2 + date_list_forecast1

    return date_list_anal, date_list_forecast

# --------------------------------------------------------------   

# ---- various initialization

cyear = '2018'
clead = '24'
iskip = int(clead)//24
cvariable = '2t'
warm_or_cold = 'warm'

cpath_era5 = '/Users/Tom/python/ecmwf/'
cpath_forecast = '/Volumes/Backup Plus/ecmwf/'
cpath_biascorr_decay = '/Volumes/Backup Plus/ecmwf/biascorr/'
cpath_biascorr_afterBx = '/Volumes/Backup Plus/ecmwf/afterBx/'
cpath_biascorr_afterBbeta = '/Volumes/Backup Plus/ecmwf/afterBbeta/'
cpath_Bx = '/Volumes/Backup Plus/ecmwf/Bx/'
cpath_Bbeta = '/Volumes/Backup Plus/ecmwf/Bbeta/'

anal_err_var = 1.0
efold_bias = 1300. # km efolding length scale.
efold_random = 200. # km efolding length scale.
exponenty = 2.0
fcst_err_var, b_beta_var =  set_error_variances(clead)
date_list_anal, date_list_forecast = \
    initialize_date_lists(warm_or_cold, cyear, clead)    
ndates = len(date_list_forecast)

# ---- read the reanalysis time series on the dates specified.  Note that
#      we pass in the dates of the forecast valid time, as we wish to 
#      upload the analyses to compare against the forecasts at this time.

analyses_3d, lons, lats = read_reanalysis_timeseries(cpath_era5, \
    date_list_forecast)
nlats, nlons = np.shape(lons)
npts = nlats*nlons

# ---- read the forecast time series on the dates specified.   Pass in 
#      the lead time and the initial time of the analysis.

forecast_3d, lons, lats = read_forecast_timeseries(cpath_forecast, \
    date_list_anal, clead, cvariable)
    
# ---- form a simple diagonal estimate for the Bx and Bbeta error
#      covariance matrices.

npts = nlons*nlats
Bx = form_diagonal_matrix(npts, fcst_err_var)
Bbeta = form_diagonal_matrix(npts, b_beta_var)
R = form_diagonal_matrix(npts, anal_err_var)

# ---- verify the raw forecasts, i.e., with no bias correction

beta_3d = np.zeros((ndates, nlats, nlons), dtype=np.float64)
rmse, bias, mae = verify_forecasts(ndates, nlats, nlons, clead, \
    analyses_3d, forecast_3d, beta_3d, iskip)
print ('raw rmse, bias, mae = ', rmse, bias, mae)

# ---- apply the Kalman filter configured as a simple decaying-average
#      bias correction.

print ('producing decaying average bias correction')
beta_3d = Kalman_filter_biascorrection(\
    npts, nlats, nlons, forecast_3d, analyses_3d, beta_3d, \
    date_list_forecast, R, Bx, Bbeta)
    
# ---- verify the decaying-average bias correction forecasts

rmse, bias, mae = verify_forecasts(ndates, nlats, nlons, clead, \
    analyses_3d, forecast_3d, beta_3d, iskip)
print ('decay avg rmse, bias, mae = ', rmse, bias, mae)

# ---- write the bias correction time series to file

#bias_corr_3d, lons, lats = write_biascorrection_timeseries( \
#    cpath_biascorr_decay, date_list_anal, clead, cyear, \
#    warm_or_cold)

for iter in range(2):

    print ('******** iteration = ',iter)
    already = False
    print ('calling Bxmodel')
    
    # ---- produce a more careful estimate of the bias-corrected forecast
    #      error covariance
    
    Bx_localized = Bxmodel(nlats, nlons, ndates, lats, lons, cyear, clead, \
        warm_or_cold, cpath_Bx, efold_random, exponenty, forecast_3d, \
        analyses_3d, beta_3d, already)
 
    # ---- formulate and localize model for covariance of bias-correction
    #      estimates
 
    print ('calling Bbeta_model')
    Bbeta_localized = Bbeta_model(nlats, nlons, ndates, npts, \
        cyear, clead, warm_or_cold, cpath_Bbeta, efold_bias, \
        exponenty, beta_3d, already)
 
    # ---- apply the Kalman filter configured to use a more careful
    #      estimate of Bx, but still diagonal Bbeta.  
    
    print ('producing Kalman filter bias correction with improved Bx')
    beta_3d = Kalman_filter_biascorrection(\
        npts, nlats, nlons, forecast_3d, analyses_3d, beta_3d, \
        date_list_forecast, R, Bx_localized, Bbeta_localized)
    
    # ---- verify the spatially varying Bx bias correction forecasts

    rmse, bias, mae = verify_forecasts(ndates, nlats, nlons, clead, \
        analyses_3d, forecast_3d, beta_3d, iskip)
    print ('Localization, random and bias = ', efold_random, efold_bias,\
        'KF with Bx_localized, Bbeta_localized: rmse, bias, mae = ',\
        rmse, bias, mae)
        
print ('Finished!')
    
    

    
    
                                                                                                                                                                                                                                                                                                                                                          ._control_KF_biascorr_v2.py                                                                         000775  000765  000024  00000000416 14016263146 016113  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2                                              ATTR                                   com.apple.lastuseddate#PS           com.macromates.selectionRange      
     com.macromates.visibleIndex  `    da    214:546567                                                                                                                                                                                                                                                  control_KF_biascorr_v2.py                                                                           000775  000765  000024  00000023606 14016263146 015704  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         """
control_KF_biascorr_v2.py

controls sequential execution of a process for the potential
refinement of location- and date-dependent estimates of 2-m
temperature forecast bias using a more formal Kalman-filter
extension of the decaying-average bias correction.

The data used in this process are deterministic forecast data 
from the ECMWF prediction system in a grid encompassing the 
CONUS (-125 to -60 W, 20 to 50 N, 1/2-degree grid spacing).
The 2-m temperature analysis produced in the ERA5 analysis
system on the same grid is used as the verification and 
training data.

The sequential process follows these steps:

1.  Apply a simple decaying-average bias correction independently, 
grid point by grid point.   Actually, the formulation of the 
decaying average bias correction is expressed as a Kalman-filter 
bias correction, strongly inspired by Dick Dee's article,

https://rmets.onlinelibrary.wiley.com/doi/epdf/10.1256/qj.05.137

and in particular eqs. (37) - (39) therein.  However, the B_x and
B_beta matrices therein are diagonal, and thus effectively 
become the decaying-average bias correction.  Validate these 
forecasts.

2.  With the bias estimates produced, correct the time series 
of forecasts, and use this corrected time series of forecasts
to produce a covariance matrix of the bias-corrected forecast 
errors.   From data assimilation experience, e.g., 
https://psl.noaa.gov/people/tom.hamill/covlocal_mwr.pdf
the localization of the matrix improves its accuracy, so 
apply localization.   Return the localized forecast error
covariance model.

3. Recompute the Kalman filter bias correction with a
spatially correlated forecast-error covariance matrix from 
step 2, but still independent estimates of the 
bias-correction coefficients. Validate these forecasts.

4.  Estimate the error covariance matrix of the bias
correction coefficients from the time series of bias estimates
at every grid point.   As with forecast-error covariances,
apply localization and return the localized bias-correction
error covariance.

5.  Apply a full Kalman-filter bias correction with the
more carefully validated forecast- and bias-correction error
covariances. Validate these forecasts.

"""

import pygrib
from dateutils import daterange, dateshift, dayofyear, splitdate
import os, sys
from datetime import datetime
import _pickle as cPickle
from read_reanalysis_timeseries import read_reanalysis_timeseries
from read_forecast_timeseries import read_forecast_timeseries
from forecast_error_covariance import forecast_error_covariance
from verify_forecasts import verify_forecasts
from Bxmodel import Bxmodel
from Bbeta_model import Bbeta_model
from Kalman_filter_biascorrection import Kalman_filter_biascorrection
from numba import jit
import numpy as np

# --------------------------------------------------------------
def form_diagonal_matrix(npts, vary):
    B = vary*np.identity(npts, dtype=np.float64) 
    return B
# --------------------------------------------------------------

def set_error_variances(clead):
    if clead == '24':
        fcst_err_var = 1.0
        b_beta_var = 0.16  # will yield approx 0.08 alpha coefficient if R=Bx=1.0
    elif clead == '48':
        fcst_err_var = 2.0
        b_beta_var = 0.12  
    elif clead == '72':
        fcst_err_var = 3.0
        b_beta_var = 0.08  
    elif clead == '96':
        fcst_err_var = 4.0
        b_beta_var = 0.07  
    elif clead == '120':
        fcst_err_var = 5.0
        b_beta_var = 0.06  

    return fcst_err_var, b_beta_var

# --------------------------------------------------------------

def initialize_date_lists(warm_or_cold, cyear, clead):

    if warm_or_cold == 'warm' :
        start_date = cyear+'040100'
        end_date = cyear+'093000'
        date_list_anal = daterange(start_date, end_date, 24)
        ndates = len(date_list_anal)
        date_list_forecast = []
        for i in range(ndates):
            date_list_forecast.append(dateshift(date_list_anal[i], int(clead)))
    else:
        
        start_date = cyear+'010100'
        end_date = cyear+'033100'
        date_list_anal1 = daterange(start_date, end_date, 24)
        ndates = len(date_list_anal1)
        date_list_forecast1 = []
        for i in range(ndates):
            date_list_forecast1.append(dateshift(date_list_anal1[i], int(clead)))
            
        start_date = cyear+'100100'
        end_date = cyear+'123100'
        date_list_anal2 = daterange(start_date, end_date, 24)
        ndates = len(date_list_anal2)
        date_list_forecast2 = []
        for i in range(ndates):
            date_list_forecast2.append(dateshift(date_list_anal2[i], int(clead)))
            
        date_list_anal = date_list_anal2 + date_list_anal1
        date_list_forecast = date_list_forecast2 + date_list_forecast1

    return date_list_anal, date_list_forecast

# --------------------------------------------------------------   

# ---- various initialization

cyear = '2018'
clead = '24'
iskip = int(clead)//24
cvariable = '2t'

cpath_era5 = '/Users/Tom/python/ecmwf/'
cpath_forecast = '/Volumes/Backup Plus/ecmwf/'
cpath_Bx = '/Volumes/Backup Plus/ecmwf/Bx/'
cpath_Bbeta = '/Volumes/Backup Plus/ecmwf/Bbeta/'
cpath_errorstats = '/Users/Tom/python/fcst_stats/'

anal_err_var = 1.0
exponenty = 2.0
already = False
    
for clead in ['24','48','72','96','120']:
    
    for warm_or_cold in ['warm', 'cold']:
        
        print ('&&&&&&&&&&&&&&&&&&&&&&&& LEAD TIME = ',clead,' SEASON = ',\
             warm_or_cold,'  &&&&&&&&&&&&&&&&&&&&&&&&&')
    
        date_list_anal, date_list_forecast = \
            initialize_date_lists(warm_or_cold, cyear, clead)
        ndates = len(date_list_forecast)
        fcst_err_var, b_beta_var =  set_error_variances(clead)
    
        # ---- read the reanalysis time series on the dates specified.  Note that
        #      we pass in the dates of the forecast valid time, as we wish to 
        #      upload the analyses to compare against the forecasts at this time.

        analyses_3d, lons, lats = read_reanalysis_timeseries(cpath_era5, \
            date_list_forecast)
        nlats, nlons = np.shape(lons)
        npts = nlats*nlons
    
        # ---- form a simple diagonal estimate for the Bx and Bbeta error
        #      covariance matrices.

        npts = nlons*nlats
        Bx = form_diagonal_matrix(npts, fcst_err_var)
        Bbeta = form_diagonal_matrix(npts, b_beta_var)
        R = form_diagonal_matrix(npts, anal_err_var)
    
        # ---- read the forecast time series on the dates specified.   Pass in 
        #      the lead time and the initial time of the analysis.

        forecast_3d, lons, lats = read_forecast_timeseries(cpath_forecast, \
            date_list_anal, clead, cvariable)
      
        # ---- verify the raw forecasts, i.e., with no bias correction

        beta_3d = np.zeros((ndates, nlats, nlons), dtype=np.float64)
        statsfile = cpath_errorstats + 'raw_forecast_errorstats_'+\
            clead+'h_'+warm_or_cold+'.txt'
        rmse, bias, mae = verify_forecasts(ndates, nlats, nlons, clead, \
            analyses_3d, forecast_3d, beta_3d, iskip, statsfile)
        print ('   raw forecast rmse, bias, mae = ', rmse, bias, mae)

        # ---- apply the Kalman filter configured as a simple decaying-average
        #      bias correction.

        beta_3d = Kalman_filter_biascorrection(\
            npts, nlats, nlons, forecast_3d, analyses_3d, beta_3d, \
            date_list_forecast, R, Bx, Bbeta)
    
        # ---- verify, save to file   
    
        statsfile = cpath_errorstats + 'decayavg_forecast_errorstats_'+\
            clead+'h_'+warm_or_cold+'.txt'
        rmse, bias, mae = verify_forecasts(ndates, nlats, nlons, clead, \
            analyses_3d, forecast_3d, beta_3d, iskip, statsfile)
        print ('   decay avg rmse, bias, mae = ', rmse, bias, mae)    
    
        beta_3d_save = np.copy(beta_3d)
        
        #for efold_random in [50.0,100.0,150.0,200.0]:
        #    for efold_bias in [400.0, 600.0, 800.0, 1000.0, 1200.0, 1400.0, 1600.0]:
        for efold_random in [250.0,300.0]:
            for efold_bias in [1700.0, 2000.0, 2500.0]:
    
                # ---- produce a more careful estimate of the bias-corrected forecast
                #      error covariance
    
                beta_3d = np.copy(beta_3d_save)
                Bx_localized = Bxmodel(nlats, nlons, ndates, lats, lons, cyear, clead, \
                    warm_or_cold, cpath_Bx, efold_random, exponenty, forecast_3d, \
                    analyses_3d, beta_3d, already)
 
                # ---- formulate and localize model for covariance of bias-correction
                #      estimates
 
                print ('calling Bbeta_model')
                Bbeta_localized = Bbeta_model(nlats, nlons, ndates, npts, \
                    cyear, clead, warm_or_cold, cpath_Bbeta, efold_bias, \
                    exponenty, beta_3d, already)
 
                # ---- apply the Kalman filter configured to use a more careful
                #      estimate of Bx, but still diagonal Bbeta.  
    
                print ('producing Kalman filter bias correction with improved Bx')
                beta_3d = Kalman_filter_biascorrection(\
                    npts, nlats, nlons, forecast_3d, analyses_3d, beta_3d, \
                    date_list_forecast, R, Bx_localized, Bbeta_localized)
    
                # ---- verify the spatially varying Bx bias correction forecasts

                strr = str(efold_random)
                strb = str(efold_bias)
                statsfile = cpath_errorstats + 'KF_forecast_errorstats_'+\
                    clead+'h_flocal'+strr+'_blocal'+strb+'.txt'
                rmse, bias, mae = verify_forecasts(ndates, nlats, nlons, clead, \
                    analyses_3d, forecast_3d, beta_3d, iskip, statsfile)
                print ('   Localization, random and bias = ', efold_random, efold_bias,\
                    ' KF with Bx_localized, Bbeta_localized: rmse, bias, mae = ',\
                    rmse, bias, mae)
        
print ('Finished!')
    
    

    
    
                                                                                                                              ._control_brier_reliability.py                                                                      000664  000765  000024  00000000407 14062146136 017011  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2                                              ATTR                                   com.apple.lastuseddate#PS           com.macromates.selectionRange           com.macromates.visibleIndex  `    b    180                                                                                                                                                                                                                                                         control_brier_reliability.py                                                                        000664  000765  000024  00000002004 14062146136 016567  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         """ control_brier_reliability.py """
import os, sys

types = ['thinned', 'upscaled']

#cleads = ['006','012','018','024','030','036', \
#    '042','048','054','060','066','072','078',\
#    '084','090','096','102','108','114','120',\
#    '126','132','138','144','150','156','162',\
#    '168','174','180','186','192','198','204',\
#    '210','216','222','228','234','240']
cleads = ['024','072','120']
    
cyyyymms = ['201712','201801','201802','201803','201804',\
    '201805','201806','201807','201808','201809',\
    '201810','201811','201812','201901','201902',\
    '201903','201904','201905','201906','201907',\
    '201908','201909','201910','201911']

    
for ctype in types:
    for clead in cleads:
        for cyyyymm in cyyyymms:
            cmd = 'python calculate_brier_reliability.py '+ \
                cyyyymm + ' ' + clead + ' ' + ctype
            print (cmd)

            try:
                istat = os.system(cmd)
            except:
                print ('this didnt work!')
                        
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            ._control_decayavg_bias_corr.py                                                                     000775  000765  000024  00000000417 14016263146 017127  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2                                              ATTR                                   com.apple.lastuseddate#PS           com.macromates.selectionRange           com.macromates.visibleIndex  `    c    147:1024123                                                                                                                                                                                                                                                 control_decayavg_bias_corr.py                                                                       000775  000765  000024  00000014426 14016263146 016717  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         """
control_KF_biascorr_v2.py

controls sequential execution of a process for the potential
refinement of location- and date-dependent estimates of 2-m
temperature forecast bias using a more formal Kalman-filter
extension of the decaying-average bias correction.

The data used in this process are deterministic forecast data 
from the ECMWF prediction system in a grid encompassing the 
CONUS (-125 to -60 W, 20 to 50 N, 1/2-degree grid spacing).
The 2-m temperature analysis produced in the ERA5 analysis
system on the same grid is used as the verification and 
training data.

The sequential process follows these steps:

1.  Apply a simple decaying-average bias correction independently, 
grid point by grid point.   Actually, the formulation of the 
decaying average bias correction is expressed as a Kalman-filter 
bias correction, strongly inspired by Dick Dee's article,

https://rmets.onlinelibrary.wiley.com/doi/epdf/10.1256/qj.05.137

and in particular eqs. (37) - (39) therein.  However, the B_x and
B_beta matrices therein are diagonal, and thus effectively 
become the decaying-average bias correction.  Validate these 
forecasts.

2.  With the bias estimates produced, correct the time series 
of forecasts, and use this corrected time series of forecasts
to produce a covariance matrix of the bias-corrected forecast 
errors.   From data assimilation experience, e.g., 
https://psl.noaa.gov/people/tom.hamill/covlocal_mwr.pdf
the localization of the matrix improves its accuracy, so 
apply localization.   Return the localized forecast error
covariance model.

3. Recompute the Kalman filter bias correction with a
spatially correlated forecast-error covariance matrix from 
step 2, but still independent estimates of the 
bias-correction coefficients. Validate these forecasts.

4.  Estimate the error covariance matrix of the bias
correction coefficients from the time series of bias estimates
at every grid point.   As with forecast-error covariances,
apply localization and return the localized bias-correction
error covariance.

5.  Apply a full Kalman-filter bias correction with the
more carefully validated forecast- and bias-correction error
covariances. Validate these forecasts.

"""

import pygrib
from dateutils import daterange, dateshift, dayofyear, splitdate
import os, sys
from datetime import datetime
import _pickle as cPickle
from read_reanalysis_timeseries import read_reanalysis_timeseries
from read_forecast_timeseries import read_forecast_timeseries
from verify_forecasts import verify_forecasts
import numpy as np
from simple_decayavg_biascorrection import simple_decayavg_biascorrection


# --------------------------------------------------------------

def initialize_date_lists(warm_or_cold, cyear, clead):

    if warm_or_cold == 'warm' :
        start_date = cyear+'040100'
        end_date = cyear+'093000'
        date_list_anal = daterange(start_date, end_date, 24)
        ndates = len(date_list_anal)
        date_list_forecast = []
        for i in range(ndates):
            date_list_forecast.append(dateshift(date_list_anal[i], int(clead)))
    else:
        
        start_date = cyear+'010100'
        end_date = cyear+'033100'
        date_list_anal1 = daterange(start_date, end_date, 24)
        ndates = len(date_list_anal1)
        date_list_forecast1 = []
        for i in range(ndates):
            date_list_forecast1.append(dateshift(date_list_anal1[i], int(clead)))
            
        start_date = cyear+'100100'
        end_date = cyear+'123100'
        date_list_anal2 = daterange(start_date, end_date, 24)
        ndates = len(date_list_anal2)
        date_list_forecast2 = []
        for i in range(ndates):
            date_list_forecast2.append(dateshift(date_list_anal2[i], int(clead)))
            
        date_list_anal = date_list_anal2 + date_list_anal1
        date_list_forecast = date_list_forecast2 + date_list_forecast1

    return date_list_anal, date_list_forecast

# --------------------------------------------------------------   

# ---- various initialization

cyear = '2018'
clead = '24'
iskip = int(clead)//24
cvariable = '2t'

cpath_era5 = '/Users/Tom/python/ecmwf/'
cpath_forecast = '/Volumes/Backup Plus/ecmwf/'
cpath_Bx = '/Volumes/Backup Plus/ecmwf/Bx/'
cpath_Bbeta = '/Volumes/Backup Plus/ecmwf/Bbeta/'
cpath_errorstats = '/Users/Tom/python/fcst_stats/'

already = False
    
for clead in ['24','48','72','96','120']:
    
    for warm_or_cold in ['warm', 'cold']:
        
        print ('&&&&&&&&&&&&&&&&&&&&&&&& LEAD TIME = ',clead,' SEASON = ',\
             warm_or_cold,'  &&&&&&&&&&&&&&&&&&&&&&&&&')
    
        date_list_anal, date_list_forecast = \
            initialize_date_lists(warm_or_cold, cyear, clead)
        ndates = len(date_list_forecast)

    
        # ---- read the reanalysis time series on the dates specified.  Note that
        #      we pass in the dates of the forecast valid time, as we wish to 
        #      upload the analyses to compare against the forecasts at this time.

        analyses_3d, lons, lats = read_reanalysis_timeseries(cpath_era5, \
            date_list_forecast)
        nlats, nlons = np.shape(lons)
        npts = nlats*nlons
    
        # ---- read the forecast time series on the dates specified.   Pass in 
        #      the lead time and the initial time of the analysis.

        forecast_3d, lons, lats = read_forecast_timeseries(cpath_forecast, \
            date_list_anal, clead, cvariable)

        for alpha in [0.01,0.02,0.03,0.04,0.05,0.06,0.07,0.08,0.09,0.10,0.11,0.12,0.13,0.14,0.15,0.16]:

            # ---- apply the Kalman filter configured as a simple decaying-average
            #      bias correction.

            beta_3d = np.zeros((ndates,nlats,nlons), dtype=np.float64)
            beta_3d = simple_decayavg_biascorrection(\
                nlats, nlons, forecast_3d, analyses_3d, beta_3d, \
                date_list_forecast, alpha)
                
            # ---- verify, save to file   
    
            statsfile = cpath_errorstats + 'simple_decayavg_forecast_errorstats_'+\
                clead+'h_'+warm_or_cold+'.txt'
            rmse, bias, mae = verify_forecasts(ndates, nlats, nlons, clead, \
                analyses_3d, forecast_3d, beta_3d, iskip, statsfile)
            print ('   alpha =  ',alpha,'  decay avg rmse, bias, mae = ', rmse, bias, mae)           
        
print ('Finished!')
    
    

    
    
                                                                                                                                                                                                                                              ._control_fcst_error_stats.py                                                                       000775  000765  000024  00000000411 14016263146 016701  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2        	                                      ATTR      	                             com.apple.lastuseddate#PS           com.macromates.selectionRange           com.macromates.visibleIndex  `    w/d    7:310                                                                                                                                                                                                                                                       control_fcst_error_stats.py                                                                         000775  000765  000024  00000002352 14016263146 016472  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         import sys
from fcst_error_stats import fcst_error_stats

clevels = ['925','850','800','750','700','600','500','400','300','250','200','150','100','10']
for input_hour in ['006','024','048','072','096','120']:
    #for input_stream in ['1999','2003','2007','2011']:
    for input_stream in ['2015']:
        for input_model in ['cfsr','reanalysis']:
            for input_variable in ['T','U','V','Z']:
                for input_level in clevels:
                    print (input_stream,input_model,input_variable,input_hour,input_level)
                    istat = fcst_error_stats(input_stream, input_model, \
                        input_variable, input_level, input_hour)
                        
#clevels = ['200']
#for input_stream in ['1999','2003','2007','2011']:
#    for input_model in ['reanalysis']:
#        for input_variable in ['T']:
#            for input_hour in ['006']:
#                for input_level in clevels:
#                    print (input_stream,input_model,input_variable,input_hour,input_level)
#                    istat = fcst_error_stats(input_stream, input_model, \
#                        input_variable, input_level, input_hour)                                                  
                        
print ('done')                                                                                                                                                                                                                                                                                      ._control_quantile_mapping_precip.py                                                                000775  000765  000024  00000000473 14016263146 020222  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2  	     ;                                      ATTR      ;     #                      com.apple.lastuseddate#PS      (   	  com.macromates.bookmarks   1     com.macromates.selectionRange      7     com.macromates.visibleIndex  `    Pe    ( '180' )310:459385                                                                                                                                                                                                     control_quantile_mapping_precip.py                                                                  000775  000765  000024  00000025570 14016263146 020012  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         """
control_quantile_mapping_precip.py cyyyymmddhh clead

"""

import os, sys
from datetime import datetime
import numpy as np
import numpy.ma as ma
import _pickle as cPickle
from netCDF4 import Dataset
import matplotlib.pyplot as plt
from matplotlib import rcParams
from mpl_toolkits.basemap import Basemap, interp
from mpl_toolkits.axes_grid1 import make_axes_locatable
import pygrib
from quantile_mapping_gamma_mixture_f90 import quantile_mapping_gamma_mixture_f90
from mpl_toolkits.basemap import Basemap, interp
from mpl_toolkits.axes_grid1 import make_axes_locatable

import _pickle as cPickle
import scipy.stats as stats


rcParams['xtick.labelsize']='medium'
rcParams['ytick.labelsize']='medium'
rcParams['legend.fontsize']='large'

# =====================================================================

def set_domain_boundaries(cdomain):

    """ used grib file of 2.5-km blend output grid to determine bounding
        lat and lon, and from that, the domain bounding indices for the
        0.25 GEFSv12 reforecast data that will encompass the domain.
    """
    if cdomain == 'conus':
        jmin = 93
        jmax = 246
        imin = 368
        imax = 686
    elif cdomain == 'pr':
        jmin = 243
        jmax = 256
        imin = 649
        imax = 667
    elif cdomain == 'ak':
        jmin = 19
        jmax = 161
        imin = 201
        imax = 967
    else:
        print ('invalid domain.  Exiting.')
        sys.exit()

    return jmin, jmax, imin, imax
    
# =====================================================================

def fraczero_possamps(nsamps, precip):
    """
    from the vector input sample precip_ens, define the fraction of
    samples with zero precipitation.   For the positive samples, add
    a small random number to deal with the fact that the data was 
    discretized to 0.1 mm, so that when later creating CDFs we don't 
    have values with lots of tied amounts.   Sort the nonzero amounts 
    and return.
    """
    number_zeros = 0
    precip_nonzero = np.delete(precip, \
        np.where(precip <= 0.0))  # censor at 0.1 mm
    nz = len(precip_nonzero)
    # data discretized, so add random component of this magnitude
    precip_nonzero = precip_nonzero + \
        np.random.uniform(low=-0.005,high=0.005,size=nz) 
    precip_nonzero = np.sort(precip_nonzero)  
    #print (precip_ens_nonzero[0:10]) 
    ntotal = len(precip)
    nzero = ntotal - len(precip_nonzero)
    fraction_zero = float(nzero) / float(ntotal)
    return fraction_zero, precip_nonzero, nz


# =====================================================================

# --- read grib data on a single level

def read_gribdata(gribfilename, endStep):
    istat = -1
    fexist_grib = False
    fexist_grib = os.path.exists(gribfilename)
    print (gribfilename, endStep)
    if fexist_grib:
        try:
            fcstfile = pygrib.open(gribfilename)
            #print ('opened ', gribfilename)
            #grb = fcstfile.select(shortName='tp',\
            #    validityDate=validityDate, \
            #    validityTime=validityTime)[0]
            grb = fcstfile.select(shortName='tp',endStep=endStep)[0]
            #print ('selected grb')
            precip_realtime = grb.values
            #print ('read values')
            lats_full, lons_full = grb.latlons()
            #print ('got lat/lon ',lats_full[0,0], lons_full[0,0])
            istat = 0
            fcstfile.close()
            #print ('closed file ')
        except IOError:
            print ('   IOError in read_gribdata reading ', \
                gribfilename, validityDate, validityTime)
            istat = -1
        except ValueError:
            print ('   ValueError in read_gribdata reading ', \
                gribfilename, validityDate, validityTime)
            istat = -1
        except RuntimeError:
            print ('   RuntimeError in read_gribdata reading ', \
                gribfilename, validityDate, validityTime)
            istat = -1
    return istat, precip_realtime, lats_full, lons_full


    
# =====================================================================

# ---- inputs from command line

nstride = 1
cyyyymmddhh = sys.argv[1] # 
clead = sys.argv[2]
cmonth = cyyyymmddhh[4:6]
imonth = int(cmonth)-1
cmonths = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']
ccmonth = cmonths[imonth]
cdomain = 'conus'
mswep_directory = '/Volumes/Backup Plus/mswep/'

# ---- read in the previously generated netCDF file with precipitation
#      to get lat/lon of MSWEP grid

infile = mswep_directory + '200001_on_ndfd_grid_6hourly.nc'
nc = Dataset(infile)
lons_mswep = nc.variables['lons'][:,:]
lons_mswep = lons_mswep - 360.0
lats_mswep = nc.variables['lats'][:,:]
print ('mswep lons dtype = ',lons_mswep.dtype)
nc.close()
    
# ---- read the MSWEP fitted Gamma parameters from cPickle file

data_directory = '/Volumes/Backup Plus/mswep/'
infile = data_directory + cmonth+'_conus'+\
    '_MSWEP_apcp_gamma_parameters_h'+clead+'.cPick'       
    
print ('reading from ', infile)
inf = open(infile, 'rb')
weights_mswep = cPickle.load(inf)
alpha_mswep = cPickle.load(inf)
beta_mswep = cPickle.load(inf)
fzero_mswep = cPickle.load(inf)

Dn = cPickle.load(inf)
nmixture_mswep = cPickle.load(inf)
ny_mswep, nx_mswep = np.shape(nmixture_mswep)
ncomponents = 3
print ('ny_nswep, nx_mswep = ',ny_mswep, nx_mswep)
inf.close()

# ---- read the GEFSv12 fitted Gamma parameters from cPickle file

cdomain = 'conus'
gefs_directory = '/Volumes/Backup Plus/gefsv12/precip/netcdf/'
infile = gefs_directory + ccmonth+'_'+cdomain+\
    '_apcp_gamma_parameters_h' + clead + '.cPick'
print ('reading from ', infile)
inf = open(infile, 'rb')
weights_gefsv12 = cPickle.load(inf)
alpha_gefsv12 = cPickle.load(inf)
beta_gefsv12 = cPickle.load(inf)
fzero_gefsv12 = cPickle.load(inf)

Dnstat1a_gefsv12 = cPickle.load(inf)
Dnstat2a_gefsv12 = cPickle.load(inf)
Dnstat3a_gefsv12 = cPickle.load(inf)
nmixture_gefsv12 = cPickle.load(inf)
inf.close()

# ---- get the GEFSv12 domain subgrid lat/lon

jmin, jmax, imin, imax = set_domain_boundaries(cdomain)

ncfile = gefs_directory + ccmonth + '_apcp_h' + clead + '.nc'
nc = Dataset(ncfile)
lons_1d_gefsv12 = nc.variables['lons_fcst'][imin:imax]
lats_1d_gefsv12 = nc.variables['lats_fcst'][jmin:jmax]
print ('lons_1d_gefsv12 dtype = ', lons_1d_gefsv12.dtype)
nx_gefsv12 = len(lons_1d_gefsv12)
ny_gefsv12 = len(lats_1d_gefsv12)
print ('ny_gefsv12, nx_gefsv12 = ', ny_gefsv12, nx_gefsv12)
nc.close()

lons_fcst_2d, lats_fcst_2d = np.meshgrid(lons_1d_gefsv12,lats_1d_gefsv12)

# ---- get the desired 2021 GEFSv12 forecast as grib file downloaded
#      from NOMADS server

input_directory = '/Volumes/Backup Plus/gefsv12/2021/'
infile = input_directory + cyyyymmddhh + \
    '_gec00.t00z.pgrb2s.0p25.f0' + clead
endStep = int(clead)
istat, precip_realtime, lats_full, lons_full = \
    read_gribdata(infile, endStep)
precip_realtime_conus = precip_realtime[jmin:jmax, imin:imax]
print ('precip_realtime_conus.dtype = ', precip_realtime_conus.dtype)
print ('shape precip_realtime_conus = ', np.shape(precip_realtime_conus))

# ---- now call the fortran routine to perform the quantile mapping 
#      more quickly


print ('np.shape(weights_mswep) = ', np.shape(weights_mswep) )
print ('np.shape(alpha_mswep) = ', np.shape(alpha_mswep) )
print ('np.shape(beta_mswep) = ', np.shape(beta_mswep) )
print ('np.shape(fzero_mswep) = ', np.shape(fzero_mswep) )

print ('np.shape(weights_gefsv12) = ', np.shape(weights_gefsv12) )
print ('np.shape(alpha_gefsv12) = ', np.shape(alpha_gefsv12) )
print ('np.shape(beta_gefsv12) = ', np.shape(beta_gefsv12) )
print ('np.shape(fzero_gefsv12) = ', np.shape(fzero_gefsv12) )

print (quantile_mapping_gamma_mixture_f90.__doc__)
qmapped_precip = np.zeros((ny_mswep, nx_mswep), dtype=np.float64)
ncomponents = 3
qmapped_precip = quantile_mapping_gamma_mixture_f90( \
    weights_mswep, alpha_mswep, beta_mswep, fzero_mswep, \
    lons_mswep, lats_mswep,  \
    weights_gefsv12, alpha_gefsv12, beta_gefsv12, \
    fzero_gefsv12, precip_realtime_conus, lons_1d_gefsv12, \
    lats_1d_gefsv12, ncomponents, ny_mswep, nx_mswep, ny_gefsv12, nx_gefsv12 )

        
# ---- plot the raw forecast.

m = Basemap(llcrnrlon=233.7234,llcrnrlat=19.229,
    urcrnrlon = 300.95782, urcrnrlat = 54.37279,\
    projection='lcc',lat_1=25.,lat_2=25.,lon_0=265.,\
    resolution ='l',area_thresh=1000.)
x, y = m(lons_mswep, lats_mswep)

clevs = [0.0,0.2,0.4,0.6,0.8,1,1.5,2,2.5,3,4,5,6,8,10,15,20]
colorst = ['White','#E4FFFF','#C4E8FF','#8FB3FF','#D8F9D8',\
    '#A6ECA6','#42F742','Yellow','Gold','Orange','#FCD5D9','#F6A3AE',\
    '#FA5257','Orchid','#AD8ADB','#A449FF','LightGray']
    
fig = plt.figure(figsize=(9,6.5))
axloc = [0.02,0.1,0.96,0.84]
ax1 = fig.add_axes(axloc)
cleadb = str(int(clead)-6)
title = 'Quantile mapped forecast, IC = '+cyyyymmddhh+' lead = '+clead+' h'
ax1.set_title(title, fontsize=14,color='Black')
CS2 = m.contourf(x, y, qmapped_precip, clevs,\
    cmap=None, colors=colorst, extend='both')
    
m.drawcoastlines(linewidth=0.8,color='Gray')
m.drawcountries(linewidth=0.8,color='Gray')
m.drawstates(linewidth=0.8,color='Gray')
    
# ---- use axes_grid toolkit to make colorbar axes.

cax = fig.add_axes([0.06,0.07,0.88,0.02])
cb = plt.colorbar(CS2,orientation='horizontal',cax=cax,\
    drawedges=True,ticks=clevs,format='%g')
cb.ax.tick_params(labelsize=7)
cb.set_label('Precipitation (mm)',fontsize=9)

# ---- set plot title

plot_title = 'qmapped_precip_'+cyyyymmddhh+'_lead'+clead+'.png'
fig.savefig(plot_title, dpi=300)
print ('saving plot to file = ',plot_title)
print ('Done!')






# ---- first plot GEFS control forecast amount

m = Basemap(llcrnrlon=lons_fcst_2d[0,0],llcrnrlat=lats_fcst_2d[-1,-1],\
    urcrnrlon=lons_fcst_2d[-1,-1],urcrnrlat=lats_fcst_2d[0,0],\
    resolution='l', projection='mill')
x, y = m(lons_fcst_2d, lats_fcst_2d)

# ---- make plots of fraction positive precip

colorst = ['White','#E4FFFF','#C4E8FF','#8FB3FF','#D8F9D8',\
    '#A6ECA6','#42F742','Yellow','Gold','Orange','#FCD5D9','#F6A3AE',\
    '#FA5257','Orchid','#AD8ADB','#A449FF','LightGray']

clevs = [0.0,0.2,0.4,0.6,0.8,1,1.5,2,2.5,3,4,5,6,8,10,15,20]
fig = plt.figure(figsize=(9,6.))
axloc = [0.02,0.1,0.96,0.8]
ax1 = fig.add_axes(axloc)
cleadb = str(int(clead)-6)
title = 'Control forecast precipitation amount (mm) for '+clead+\
    '-h IC = '+cyyyymmddhh
ax1.set_title(title, fontsize=14,color='Black')
CS2 = m.contourf(x, y, precip_realtime_conus, clevs,\
    cmap=None, colors=colorst, extend='both')
m.drawcoastlines(linewidth=0.8,color='Gray')
m.drawcountries(linewidth=0.8,color='Gray')
m.drawstates(linewidth=0.8,color='Gray')
    
# ---- use axes_grid toolkit to make colorbar axes.

cax = fig.add_axes([0.02,0.07,0.96,0.02])
cb = plt.colorbar(CS2,orientation='horizontal',cax=cax,\
    drawedges=True,ticks=clevs,format='%g')
cb.ax.tick_params(labelsize=7)
cb.set_label('Mean precipitation (mm)',fontsize=9)

# ---- set plot title

plot_title = 'forecast_precip_'+clead+'_h_IC'+cyyyymmddhh+'.png'
fig.savefig(plot_title, dpi=300)
print ('saving plot to file = ',plot_title)
print ('Done!')


                                                                                                                                        ._control_quantile_mapping_precip_v2.py                                                             000775  000765  000024  00000000415 14016263146 020625  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2        
                                      ATTR      
                             com.apple.lastuseddate#PS           com.macromates.selectionRange      	     com.macromates.visibleIndex  `    ?f    228:25623                                                                                                                                                                                                                                                   control_quantile_mapping_precip_v2.py                                                               000775  000765  000024  00000026431 14016263146 020416  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         """
control_quantile_mapping_precip_v2.py cyyyymmddhh clead

"""

import os, sys
from datetime import datetime
import numpy as np
import numpy.ma as ma
import _pickle as cPickle
from netCDF4 import Dataset
import matplotlib.pyplot as plt
from matplotlib import rcParams
from mpl_toolkits.basemap import Basemap, interp
from mpl_toolkits.axes_grid1 import make_axes_locatable
import pygrib
from quantile_mapping_gamma_mixture_v2_f90 import \
    quantile_mapping_gamma_mixture_v2_f90
from mpl_toolkits.basemap import Basemap, interp
from mpl_toolkits.axes_grid1 import make_axes_locatable

import _pickle as cPickle
import scipy.stats as stats


rcParams['xtick.labelsize']='medium'
rcParams['ytick.labelsize']='medium'
rcParams['legend.fontsize']='large'

# =====================================================================

def set_domain_boundaries(cdomain):

    """ used grib file of 2.5-km blend output grid to determine bounding
        lat and lon, and from that, the domain bounding indices for the
        0.25 GEFSv12 reforecast data that will encompass the domain.
    """
    if cdomain == 'conus':
        jmin = 93
        jmax = 246
        imin = 368
        imax = 686
    elif cdomain == 'pr':
        jmin = 243
        jmax = 256
        imin = 649
        imax = 667
    elif cdomain == 'ak':
        jmin = 19
        jmax = 161
        imin = 201
        imax = 967
    else:
        print ('invalid domain.  Exiting.')
        sys.exit()

    return jmin, jmax, imin, imax
    
# =====================================================================

def fraczero_possamps(nsamps, precip):
    """
    from the vector input sample precip_ens, define the fraction of
    samples with zero precipitation.   For the positive samples, add
    a small random number to deal with the fact that the data was 
    discretized to 0.1 mm, so that when later creating CDFs we don't 
    have values with lots of tied amounts.   Sort the nonzero amounts 
    and return.
    """
    number_zeros = 0
    precip_nonzero = np.delete(precip, \
        np.where(precip <= 0.0))  # censor at 0.1 mm
    nz = len(precip_nonzero)
    # data discretized, so add random component of this magnitude
    precip_nonzero = precip_nonzero + \
        np.random.uniform(low=-0.005,high=0.005,size=nz) 
    precip_nonzero = np.sort(precip_nonzero)  
    #print (precip_ens_nonzero[0:10]) 
    ntotal = len(precip)
    nzero = ntotal - len(precip_nonzero)
    fraction_zero = float(nzero) / float(ntotal)
    return fraction_zero, precip_nonzero, nz


# =====================================================================

# --- read grib data on a single level

def read_gribdata(gribfilename, endStep):
    istat = -1
    fexist_grib = False
    fexist_grib = os.path.exists(gribfilename)
    print (gribfilename, endStep)
    if fexist_grib:
        try:
            fcstfile = pygrib.open(gribfilename)
            #print ('opened ', gribfilename)
            #grb = fcstfile.select(shortName='tp',\
            #    validityDate=validityDate, \
            #    validityTime=validityTime)[0]
            grb = fcstfile.select(shortName='tp',endStep=endStep)[0]
            #print ('selected grb')
            precip_realtime = grb.values
            #print ('read values')
            lats_full, lons_full = grb.latlons()
            #print ('got lat/lon ',lats_full[0,0], lons_full[0,0])
            istat = 0
            fcstfile.close()
            #print ('closed file ')
        except IOError:
            print ('   IOError in read_gribdata reading ', \
                gribfilename, validityDate, validityTime)
            istat = -1
        except ValueError:
            print ('   ValueError in read_gribdata reading ', \
                gribfilename, validityDate, validityTime)
            istat = -1
        except RuntimeError:
            print ('   RuntimeError in read_gribdata reading ', \
                gribfilename, validityDate, validityTime)
            istat = -1
    return istat, precip_realtime, lats_full, lons_full


    
# =====================================================================

# ---- inputs from command line

nstride = 1
cyyyymmddhh = sys.argv[1] # 
clead = sys.argv[2]
cmonth = cyyyymmddhh[4:6]
imonth = int(cmonth)-1
cmonths = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']
ccmonth = cmonths[imonth]
cdomain = 'conus'
mswep_directory = '/Volumes/Backup Plus/mswep/'

# ---- read in the previously generated netCDF file with precipitation
#      to get lat/lon of MSWEP grid

infile = mswep_directory + '200001_on_ndfd_grid_6hourly.nc'
nc = Dataset(infile)
lons_mswep = nc.variables['lons'][:,:]
lons_mswep = lons_mswep - 360.0
lats_mswep = nc.variables['lats'][:,:]
#print ('lats_mswep[0], [-1] = ', lats_mswep[0], lats_mswep[-1])
#print ('mswep lons dtype = ',lons_mswep.dtype)
nc.close()
   
    
# ---- read the MSWEP fitted Gamma parameters from cPickle file

data_directory = '/Volumes/Backup Plus/mswep/'
infile = data_directory + cmonth+'_conus'+\
    '_MSWEP_apcp_gamma_parameters_h'+clead+'.cPick'       
    
print ('reading from ', infile)
inf = open(infile, 'rb')
weights_mswep = cPickle.load(inf)
alpha_mswep = cPickle.load(inf)
beta_mswep = cPickle.load(inf)
fzero_mswep = cPickle.load(inf)

Dn = cPickle.load(inf)
nmixture_mswep = cPickle.load(inf)
ny_mswep, nx_mswep = np.shape(nmixture_mswep)
ncomponents = 3
print ('ny_nswep, nx_mswep = ',ny_mswep, nx_mswep)
inf.close()

# ---- read the GEFSv12 fitted Gamma parameters from cPickle file

cdomain = 'conus'
gefs_directory = '/Volumes/Backup Plus/gefsv12/precip/netcdf/'
infile = gefs_directory + ccmonth+'_'+cdomain+\
    '_apcp_gamma_parameters_h' + clead + '.cPick'
print ('reading from ', infile)
inf = open(infile, 'rb')
weights_gefsv12 = cPickle.load(inf)
alpha_gefsv12 = cPickle.load(inf)
beta_gefsv12 = cPickle.load(inf)
fzero_gefsv12 = cPickle.load(inf)

Dnstat1a_gefsv12 = cPickle.load(inf)
Dnstat2a_gefsv12 = cPickle.load(inf)
Dnstat3a_gefsv12 = cPickle.load(inf)
nmixture_gefsv12 = cPickle.load(inf)
inf.close()

# ---- get the GEFSv12 domain subgrid lat/lon

jmin, jmax, imin, imax = set_domain_boundaries(cdomain)

ncfile = gefs_directory + ccmonth + '_apcp_h' + clead + '.nc'
nc = Dataset(ncfile)
lons_1d_gefsv12 = nc.variables['lons_fcst'][imin:imax]
lats_1d_gefsv12 = nc.variables['lats_fcst'][jmin:jmax]
lons_1d_gefsv12_full = nc.variables['lons_fcst'][:]
lats_1d_gefsv12_full = nc.variables['lats_fcst'][:]
print ('lons_1d_gefsv12 dtype = ', lons_1d_gefsv12.dtype)
nx_gefsv12 = len(lons_1d_gefsv12)
ny_gefsv12 = len(lats_1d_gefsv12)
print ('ny_gefsv12, nx_gefsv12 = ', ny_gefsv12, nx_gefsv12)
nc.close()

lons_fcst_2d, lats_fcst_2d = np.meshgrid(lons_1d_gefsv12,lats_1d_gefsv12)

# ---- get the desired 2021 GEFSv12 forecast as grib file downloaded
#      from NOMADS server

input_directory = '/Volumes/Backup Plus/gefsv12/2021/'
infile = input_directory + cyyyymmddhh + \
    '_gec00.t00z.pgrb2s.0p25.f0' + clead 
    
endStep = int(clead)
istat, precip_realtime, lats_full, lons_full = \
    read_gribdata(infile, endStep)
    
if lats_full[0,0] > lats_full[-1,0]: 
    lats_full = np.flipud(lats_full)
    precip_realtime = np.flipud(precip_realtime)   
lons_full = lons_full - 360.
    
print (lons_full[0,:])  
print (lats_full[:,0])  
#sys.exit() 
    
precip_gefsv12_on_mswep = interp(precip_realtime, \
    lons_full[0,:], lats_full[:,0], \
    lons_mswep, lats_mswep, checkbounds=False, \
    masked=False, order=1)    

# ---- now call the fortran routine to perform the quantile mapping 
#      more quickly


print ('np.shape(weights_mswep) = ', np.shape(weights_mswep) )
print ('np.shape(alpha_mswep) = ', np.shape(alpha_mswep) )
print ('np.shape(beta_mswep) = ', np.shape(beta_mswep) )
print ('np.shape(fzero_mswep) = ', np.shape(fzero_mswep) )

print ('np.shape(weights_gefsv12) = ', np.shape(weights_gefsv12) )
print ('np.shape(alpha_gefsv12) = ', np.shape(alpha_gefsv12) )
print ('np.shape(beta_gefsv12) = ', np.shape(beta_gefsv12) )
print ('np.shape(fzero_gefsv12) = ', np.shape(fzero_gefsv12) )

print (quantile_mapping_gamma_mixture_v2_f90.__doc__)
qmapped_precip = np.zeros((ny_mswep, nx_mswep), dtype=np.float64)
ncomponents = 3
qmapped_precip = quantile_mapping_gamma_mixture_v2_f90( \
    weights_mswep, alpha_mswep, beta_mswep, fzero_mswep, \
    lons_mswep, lats_mswep, weights_gefsv12, alpha_gefsv12, \
    beta_gefsv12, fzero_gefsv12, precip_gefsv12_on_mswep, \
    lons_1d_gefsv12, lats_1d_gefsv12, ncomponents, \
    ny_mswep, nx_mswep, ny_gefsv12, nx_gefsv12 )
        
# ---- plot the raw forecast.

m = Basemap(llcrnrlon=233.7234,llcrnrlat=19.229,
    urcrnrlon = 300.95782, urcrnrlat = 54.37279,\
    projection='lcc',lat_1=25.,lat_2=25.,lon_0=265.,\
    resolution ='l',area_thresh=1000.)
x, y = m(lons_mswep, lats_mswep)

clevs = [0.0,0.2,0.4,0.6,0.8,1,1.5,2,2.5,3,4,5,6,8,10,15,20]
colorst = ['White','#E4FFFF','#C4E8FF','#8FB3FF','#D8F9D8',\
    '#A6ECA6','#42F742','Yellow','Gold','Orange','#FCD5D9','#F6A3AE',\
    '#FA5257','Orchid','#AD8ADB','#A449FF','LightGray']
    
fig = plt.figure(figsize=(9,6.5))
axloc = [0.02,0.1,0.96,0.84]
ax1 = fig.add_axes(axloc)
cleadb = str(int(clead)-6)
title = 'Quantile mapped forecast, IC = '+cyyyymmddhh+' lead = '+clead+' h'
ax1.set_title(title, fontsize=14,color='Black')
CS2 = m.contourf(x, y, qmapped_precip, clevs,\
    cmap=None, colors=colorst, extend='both')
    
m.drawcoastlines(linewidth=0.8,color='Gray')
m.drawcountries(linewidth=0.8,color='Gray')
m.drawstates(linewidth=0.8,color='Gray')
    
# ---- use axes_grid toolkit to make colorbar axes.

cax = fig.add_axes([0.06,0.07,0.88,0.02])
cb = plt.colorbar(CS2,orientation='horizontal',cax=cax,\
    drawedges=True,ticks=clevs,format='%g')
cb.ax.tick_params(labelsize=7)
cb.set_label('Precipitation (mm)',fontsize=9)

# ---- set plot title

plot_title = 'qmapped_precip_'+cyyyymmddhh+'_lead'+clead+'.png'
fig.savefig(plot_title, dpi=300)
print ('saving plot to file = ',plot_title)
print ('Done!')






# ---- first plot GEFS control forecast amount

m = Basemap(llcrnrlon=233.7234,llcrnrlat=19.229,
    urcrnrlon = 300.95782, urcrnrlat = 54.37279,\
    projection='lcc',lat_1=25.,lat_2=25.,lon_0=265.,\
    resolution ='l',area_thresh=1000.)
x, y = m(lons_mswep, lats_mswep)

# ---- make plots of fraction positive precip

colorst = ['White','#E4FFFF','#C4E8FF','#8FB3FF','#D8F9D8',\
    '#A6ECA6','#42F742','Yellow','Gold','Orange','#FCD5D9','#F6A3AE',\
    '#FA5257','Orchid','#AD8ADB','#A449FF','LightGray']

clevs = [0.0,0.2,0.4,0.6,0.8,1,1.5,2,2.5,3,4,5,6,8,10,15,20]
fig = plt.figure(figsize=(9,6.5))
axloc = [0.02,0.1,0.96,0.84]
ax1 = fig.add_axes(axloc)
cleadb = str(int(clead)-6)
title = 'Control forecast precipitation amount (mm) for '+clead+\
    '-h IC = '+cyyyymmddhh
ax1.set_title(title, fontsize=14,color='Black')
CS2 = m.contourf(x, y, precip_gefsv12_on_mswep, clevs,\
    cmap=None, colors=colorst, extend='both')

m.drawcoastlines(linewidth=0.8,color='Gray')
m.drawcountries(linewidth=0.8,color='Gray')
m.drawstates(linewidth=0.8,color='Gray')
    
# ---- use axes_grid toolkit to make colorbar axes.

cax = fig.add_axes([0.06,0.07,0.88,0.02])
cb = plt.colorbar(CS2,orientation='horizontal',cax=cax,\
    drawedges=True,ticks=clevs,format='%g')
cb.ax.tick_params(labelsize=7)
cb.set_label('Mean precipitation (mm)',fontsize=9)

# ---- set plot title

plot_title = 'forecast_precip_'+clead+'_h_IC'+cyyyymmddhh+'.png'
fig.savefig(plot_title, dpi=300)
print ('saving plot to file = ',plot_title)
print ('Done!')


                                                                                                                                                                                                                                       ._control_reforecast_2netcdf.py                                                                     000775  000765  000024  00000000411 14016263146 017055  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2        	                                      ATTR      	                             com.apple.lastuseddate#PS           com.macromates.selectionRange           com.macromates.visibleIndex  `    Af    2:300                                                                                                                                                                                                                                                       control_reforecast_2netcdf.py                                                                       000775  000765  000024  00000003101 14016263146 016637  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         """ 
control_reforecast_2netcdf.py

control the execution of the production of netCDF files for 
precipitation from grib files """

from reforecast_2netcdf import reforecast_2netcdf
from dateutils import daterange, dateshift, dayofyear, splitdate
import sys

clead = sys.argv[1] # Enter lead time in hours.   Enter 06 for 6
   # Do every 3 hours between 03 and 240.
ilead = int(clead)
cdayend_noleap = ['31','28','31','30','31','30',  '31','31','30','31','30','31']
cdayend_leap = ['31','29','31','30','31','30',  '31','31','30','31','30','31']
cmonths = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']
outfiledir = '/Volumes/Backup Plus/gefsv12/precip/netcdf/'
for iyear in range(2000,2020):
    cyear = str(iyear)
    #for imonth in range(1,13):
    for imonth in range(1,2):
        
        if imonth < 10:
            cmonth = '0'+str(imonth)
        else:
            cmonth = str(imonth)
        cdaystart = '01'
        if iyear%4 == 0:
            cdayend = cdayend_leap[imonth-1]
        else:
            cdayend = cdayend_noleap[imonth-1]
        cyyyymmddhh_start = cyear+cmonth+'0100'
        cyyyymmddhh_end = cyear+cmonth+cdayend+'00'
        cmonth = cmonths[imonth-1]
        print ('***** processing ', cyear, cmonth, ' lead = '+clead )
        date_list = daterange(cyyyymmddhh_start,cyyyymmddhh_end,24)
        outfilename = outfiledir + cmonth+cyear+'_h'+clead+'.nc'
        print ('   start, end dates: ',cyyyymmddhh_start, cyyyymmddhh_end)
        print ('   writing to ', outfilename)
        istat = reforecast_2netcdf(date_list, ilead, outfilename)                                                                                                                                                                                                                                                                                                                                                                                                                                                               ._control_reforecast_2netcdf_240h.py                                                                000775  000765  000024  00000000407 14016263146 017617  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2                                              ATTR                                   com.apple.lastuseddate#PS           com.macromates.selectionRange           com.macromates.visibleIndex  `    Vh    400                                                                                                                                                                                                                                                         control_reforecast_2netcdf_240h.py                                                                  000775  000765  000024  00000002723 14016263146 017405  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         """ 
control_reforecast_2netcdf_240h.py

control the execution of the production of netCDF files for 
precipitation from grib files """

from reforecast_2netcdf_240h import reforecast_2netcdf_240h
from dateutils import daterange, dateshift, dayofyear, splitdate
import sys

cdayend_noleap = ['31','28','31','30','31','30',  '31','31','30','31','30','31']
cdayend_leap = ['31','29','31','30','31','30',  '31','31','30','31','30','31']
cmonths = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']
#outfiledir = '/Volumes/Backup Plus/gefsv12/precip/netcdf/'
outfiledir = '/Volumes/NBM/conus_gefsv12/precip/netcdf/'

date_list = []
for imonth in range(1,13):  #range(2,3):
    for iyear in range(2000, 2020):
        cyear = str(iyear)
        if imonth < 10:
            cmonth = '0'+str(imonth)
        else:
            cmonth = str(imonth)
        cdaystart = '01'
        if iyear%4 == 0:
            cdayend = cdayend_leap[imonth-1]
        else:
            cdayend = cdayend_noleap[imonth-1]
        cyyyymmddhh_start = cyear+cmonth+'0100'
        cyyyymmddhh_end = cyear+cmonth+cdayend+'00'
        date_list_thisyear = daterange(cyyyymmddhh_start, cyyyymmddhh_end, 24)
        date_list.extend(date_list_thisyear)
    
    outfilename = outfiledir + cmonths[imonth-1] +'_apcp_mean_10day.nc'
    print ('   start, end dates: ',date_list[0], date_list[-1])
    print ('   writing to ', outfilename)
    istat = reforecast_2netcdf_240h(date_list, outfilename)
    
                                             ._control_reforecast_2netcdf_6h.py                                                                  000775  000765  000024  00000000412 14016263146 017453  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2        
                                      ATTR      
                             com.apple.lastuseddate#PS           com.macromates.selectionRange      	     com.macromates.visibleIndex  `    i    27:270                                                                                                                                                                                                                                                      control_reforecast_2netcdf_6h.py                                                                    000775  000765  000024  00000003622 14016263146 017244  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         """ 
control_reforecast_2netcdf_6h.py

control the execution of the production of netCDF files for 
precipitation from grib files """

from reforecast_2netcdf_6h import reforecast_2netcdf_6h
from dateutils import daterange, dateshift, dayofyear, splitdate
import sys

#clead = sys.argv[1] # Enter lead time in hours.   Enter 06 for 6
#   # Do every 6 hours between 06 and 246.

cdayend_noleap = ['31','28','31','30','31','30',  '31','31','30','31','30','31']
cdayend_leap = ['31','29','31','30','31','30',  '31','31','30','31','30','31']
cmonths = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']
#outfiledir = '/Volumes/Backup Plus/gefsv12/precip/netcdf/'
outfiledir = '/Volumes/NBM/conus_gefsv12/precip/netcdf/'

date_list = []
for ilead in range(6,246,6):
    if ilead < 10:
        clead = '00'+str(ilead)
    elif ilead > 10 and ilead < 100:
        clead = '0'+str(ilead)
    else:
        clead = str(ilead)
    for imonth in range(1,13):  #range(2,3):
        for iyear in range(2000, 2020):
            cyear = str(iyear)
            if imonth < 10:
                cmonth = '0'+str(imonth)
            else:
                cmonth = str(imonth)
            cdaystart = '01'
            if iyear%4 == 0:
                cdayend = cdayend_leap[imonth-1]
            else:
                cdayend = cdayend_noleap[imonth-1]
            cyyyymmddhh_start = cyear+cmonth+'0100'
            cyyyymmddhh_end = cyear+cmonth+cdayend+'00'
            date_list_thisyear = daterange(cyyyymmddhh_start, cyyyymmddhh_end, 24)
            #print ('iyear, date_list_thisyear ', iyear, date_list_thisyear )
            date_list.extend(date_list_thisyear)
    
        outfilename = outfiledir + cmonths[imonth-1] + '_apcp_h'+clead+'.nc'
        print ('   start, end dates: ',date_list[0], date_list[-1])
        print ('   writing to ', outfilename)
        istat = reforecast_2netcdf_6h(date_list, ilead, outfilename)                                                                                                              ._control_reforecast_2netcdf_v2.py                                                                  000775  000765  000024  00000000412 14016263146 017465  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2        
                                      ATTR      
                             com.apple.lastuseddate#PS           com.macromates.selectionRange      	     com.macromates.visibleIndex  `    ٥i    26:140                                                                                                                                                                                                                                                      control_reforecast_2netcdf_v2.py                                                                    000775  000765  000024  00000003104 14016263146 017251  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         """ 
control_reforecast_2netcdf_v2.py

control the execution of the production of netCDF files for 
precipitation from grib files """

from reforecast_2netcdf_v2 import reforecast_2netcdf_v2
from dateutils import daterange, dateshift, dayofyear, splitdate
import sys

clead = sys.argv[1] # Enter lead time in hours.   Enter 06 for 6
   # Do every 3 hours between 03 and 240.
ilead = int(clead)
cdayend_noleap = ['31','28','31','30','31','30',  '31','31','30','31','30','31']
cdayend_leap = ['31','29','31','30','31','30',  '31','31','30','31','30','31']
cmonths = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']
outfiledir = '/Volumes/Backup Plus/gefsv12/precip/netcdf/'

date_list = []
for imonth in range(1,2):

    for iyear in range(2000, 2020):
        cyear = str(iyear)
        if imonth < 10:
            cmonth = '0'+str(imonth)
        else:
            cmonth = str(imonth)
        cdaystart = '01'
        if iyear%4 == 0:
            cdayend = cdayend_leap[imonth-1]
        else:
            cdayend = cdayend_noleap[imonth-1]
        cyyyymmddhh_start = cyear+cmonth+'0100'
        cyyyymmddhh_end = cyear+cmonth+cdayend+'00'
        date_list_thisyear = daterange(cyyyymmddhh_start, cyyyymmddhh_end, 24)
        print ('iyear, date_list_thisyear ', iyear, date_list_thisyear )
        date_list.extend(date_list_thisyear)
    
    outfilename = outfiledir + cmonths + '+'_apcp_h'+clead+'.nc'
    print ('   start, end dates: ',date_list[0], date_list[-1])
    print ('   writing to ', outfilename)
    istat = reforecast_2netcdf_v2(date_list, ilead, outfilename)                                                                                                                                                                                                                                                                                                                                                                                                                                                            ._control_soilq.py                                                                                  000775  000765  000024  00000000412 14016263146 014443  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2        
                                      ATTR      
                             com.apple.lastuseddate#PS           com.macromates.selectionRange      	     com.macromates.visibleIndex  `    j    17:790                                                                                                                                                                                                                                                      control_soilq.py                                                                                    000775  000765  000024  00000002135 14016263146 014232  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         python determine_mean_soil_moisture.py 1999 73.0 17.0 84.0 28.0 NIndia
python determine_mean_soil_moisture.py 2003 73.0 17.0 84.0 28.0 NIndia
python determine_mean_soil_moisture.py 2007 73.0 17.0 84.0 28.0 NIndia
python determine_mean_soil_moisture.py 2011 73.0 17.0 84.0 28.0 NIndia
python determine_mean_soil_moisture.py 2015 73.0 17.0 84.0 28.0 NIndia

python determine_mean_soil_moisture.py 1999 0.0 5.0 30.0 12.0 NEq_Africa
python determine_mean_soil_moisture.py 2003 0.0 5.0 30.0 12.0 NEq_Africa
python determine_mean_soil_moisture.py 2007 0.0 5.0 30.0 12.0 NEq_Africa
python determine_mean_soil_moisture.py 2011 0.0 5.0 30.0 12.0 NEq_Africa
python determine_mean_soil_moisture.py 2015 0.0 5.0 30.0 12.0 NEq_Africa
 
python determine_mean_soil_moisture.py 1999 255.0 25.0 265.0 40.0 Great_Plains
python determine_mean_soil_moisture.py 2003 255.0 25.0 265.0 40.0 Great_Plains
python determine_mean_soil_moisture.py 2007 255.0 25.0 265.0 40.0 Great_Plains
python determine_mean_soil_moisture.py 2011 255.0 25.0 265.0 40.0 Great_Plains
python determine_mean_soil_moisture.py 2015 255.0 25.0 265.0 40.0 Great_Plains                                                                                                                                                                                                                                                                                                                                                                                                                                   ._create_alphanumeric_grade.py                                                                      000775  000765  000024  00000000260 14016263147 016713  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2   ~                                            ATTR                                    com.apple.lastuseddate#PS    `    tm                                                                                                                                                                                                                                                                                                                                                    create_alphanumeric_grade.py                                                                        000775  000765  000024  00000000660 14016263147 016502  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         # create_alphanumeric_grade
#    for a number grade 1-5, assign a letter grade A-F

import string  # include string library for the split function.

def main():
    lettergrade =['A','B','C','D','F','F']
    print "This program converts a number to letter grade"
    print
    
    # Get the message to encode
    numgrade = input("Please enter the number grade: ")
    print "The letter grade is:", lettergrade[5-numgrade]

main()
                                                                                ._create_bias_correlation_matrix.py                                                                 000775  000765  000024  00000000412 14016263147 020003  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2        
                                      ATTR      
                             com.apple.lastuseddate#PS           com.macromates.selectionRange      	     com.macromates.visibleIndex  `    En    70:230                                                                                                                                                                                                                                                      create_bias_correlation_matrix.py                                                                   000775  000765  000024  00000014332 14016263147 017574  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         """ create bias correlation matrix
"""
import pygrib
import os, sys
from datetime import datetime
import numpy as np
import _pickle as cPickle
import matplotlib.pyplot as plt
from matplotlib import rcParams
from mpl_toolkits.basemap import Basemap, interp
from mpl_toolkits.axes_grid1 import make_axes_locatable
import scipy.signal as signal
import scipy.stats as stats
from calculate_terrain_gradients_f90 import calculate_terrain_gradients_f90

rcParams['xtick.labelsize']='medium'
rcParams['ytick.labelsize']='medium'
rcParams['legend.fontsize']='large'

# =====================================================================

def find_nearest(vec, value):
    idx = np.abs(vec-value).argmin()
    return idx
    
# =====================================================================    

hlocal = 10. # measured in grid points
vlocal = 300. # measured in meters
#tlocal = 10.0
lsmask_local = 0.05


# --- read terrain and land-sea masks for ERA5 grid over CONUS

infile = 'lsmask.grib'
grbfile = pygrib.open(infile)
grb = grbfile.select()[0] # shortName = lsm
lats, lons = grb.latlons()
lons = lons-360
lsmask = grb.values
grbfile.close()
ny, nx = np.shape(lsmask)
lons = np.flipud(lons)
lats = np.flipud(lats)
lsmask = np.flipud(lsmask)

lons_1d = lons[0,:]
lats_1d = lats[:,0]


infile = 'terrain_height.grib'
grbfile = pygrib.open(infile)
grb = grbfile.select()[0] # shortName = orog
terrain_height = grb.values
terrain_height = np.flipud(terrain_height)
print ('max, min terrain_height = ', np.max(terrain_height), np.min(terrain_height))
grbfile.close()

# --- calculate terrain height gradients

#earth_radius_meters = 6378100.
#print (calculate_terrain_gradients_f90.__doc__)
#terrain_gradx, terrain_grady = calculate_terrain_gradients_f90(\
#    earth_radius_meters, terrain_height, lats, ny, nx)
#txmax = np.max(terrain_gradx)
#txmin = np.min(terrain_gradx)
#tymax = np.max(terrain_grady)
#tymin = np.min(terrain_grady)
#print ('max terrain_gradx, min terrain_gradx = ', txmax, txmin)
#print ('max terrain_grady, min terrain_grady = ', tymax, tymin)
#txfactor = np.max([txmax, -1.*txmin])
#tyfactor = np.max([tymax, -1.*tymin])
#terrain_gradx = terrain_gradx / txfactor
#terrain_grady = terrain_grady / tyfactor
    
# --- develop a model of correlations as a function of horizontal distance,
#     land-sea mask, terrain_height difference, and terrain gradient difference

already = False
if already == False:
    
    correlation_bias = np.zeros((ny, nx, ny, nx), dtype=np.float64)
    for ix1 in range(nx):
        print ('processing ',ix1,' of ', nx)
        for jy1 in range(ny):
            for ix2 in range(nx):
                for jy2 in range(ny):
                
                    hdist_factor = np.sqrt( (ix1-ix2)**2 + (jy1-jy2)**2) / hlocal
                    vdist_factor = np.abs(terrain_height[jy1,ix1] - terrain_height[jy2,ix2]) / vlocal
                    #grad_factor = np.sqrt((terrain_gradx[jy1,ix1] - terrain_gradx[jy2,ix2])**2 + \
                    #    (terrain_grady[jy1,ix1] - terrain_grady[jy2,ix2])**2) / tlocal
                    lsmask_factor = np.abs(lsmask[jy1,ix1]-lsmask[jy2,ix2])**2 / lsmask_local
                    dist = np.sqrt(hdist_factor**2 + vdist_factor**2 + lsmask_factor**2)
                    if dist != dist:
                        print ('improper distance at ix1,jy1,ix2,jy2 = ', ix1,jy1,ix2,jy2)
                        print ('hdist_factor, vdist_factor, grad_factor, lsmask_factor = ', \
                            hdist_factor, vdist_factor, grad_factor, lsmask_factor)
                        sys.exit()
                    c = np.exp(-dist**2)
                    if ix1 == ix2 and jy1 == jy2:
                        correlation_bias[jy1,ix1,jy2,ix2] = 1.0
                    else:
                        correlation_bias[jy1,ix1,jy2,ix2] = 0.03*np.exp(-dist**2)
                
    # --- save correlation of bias to file

    cfile = 'correlation_bias_ERA5grid.cPick'
    ouf = open(cfile, 'wb')
    cPickle.dump(correlation_bias, ouf)
    ouf.close()
    
else:
    
    # --- save correlation of bias to file

    cfile = 'correlation_bias_ERA5grid.cPick'
    inf = open(cfile, 'rb')
    correlation_bias = cPickle.load(inf)
    inf.close()
    

# ---- make some sample plots

plot_lons = [-105.0, -123.0, -105.5, -100.0, -90.0, -122.5, -126.0]
plot_lats = [40.0, 45.0, 40.0, 40.0, 35.0, 45.0, 45.0 ]
nplots = len(plot_lons)

for rlon, rlat in zip(plot_lons, plot_lats):
    
    ilon = find_nearest(lons_1d, rlon)
    ilat = find_nearest(lats_1d, rlat)
    clon = str(rlon)
    clat = str(rlat)
    bias_error_corr_map = correlation_bias[:,:,ilat,ilon]
    print ('max, min bias_error_corr_map ',\
        np.max(bias_error_corr_map), np.min(bias_error_corr_map))

    # --- now plot the single-point correlation model for the selected point 

    clevs = [-0.99,-0.9,-0.7,-0.5,-0.3,-0.1,0.1,0.3,0.5,0.7,0.9,0.99]
    colorst = ['#0000ff', '#6666ff', '#b2b2ff', '#ccccff','#e6e6ff', \
        'White', '#ffe6e6', '#ffcccc', '#ffb2b2', '#ff7373', '#ff0000'] 
    
    fig = plt.figure(figsize=(6.,4.2))
    axloc = [0.02,0.09,0.96,0.82]
    ax1 = fig.add_axes(axloc)
    title = r'Estimated T$_{2m}$ bias error correlation map, '+ \
        ' lon = '+clon+', lat = '+clat
    ax1.set_title(title, fontsize=11,color='Black')
    m = Basemap(llcrnrlon=lons[0,0],llcrnrlat=lats[0,0],\
        urcrnrlon=lons[-1,-1],urcrnrlat=lats[-1,-1],\
        resolution='l', projection='mill')
    x, y = m(lons, lats)
    CS2 = m.contourf(x,y,bias_error_corr_map,clevs,\
        cmap=None,colors=colorst,extend='both')
    xdot, ydot = m(rlon,rlat)
    m.plot(xdot,ydot,marker='.',markersize=5,color='Black')
    m.drawcoastlines(linewidth=0.8,color='Gray')
    m.drawcountries(linewidth=0.8,color='Gray')
    m.drawstates(linewidth=0.4,color='Gray')

    # ---- use axes_grid toolkit to make colorbar axes.

    divider = make_axes_locatable(ax1)
    cax = divider.append_axes("bottom", size="3%", pad=0.1)
    cb = plt.colorbar(CS2,orientation='horizontal',cax=cax,\
        drawedges=True,ticks=clevs,format='%g')
    cb.set_label('Temperature bias correlation')

    # ---- set plot title

    plot_title = 't2m_bias_correlation'+'_lon'+clon+'_lat'+clat+'.png'
    fig.savefig(plot_title, dpi=300,fontsize=9)
    print ('saving plot to file = ',plot_title)
    
print ('Done!')



                                                                                                                                                                                                                                                                                                      ._create_conserved_timeseries.py                                                                    000775  000765  000024  00000000425 14016263147 017325  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2                                              ATTR            !                       com.apple.lastuseddate#PS         
  com.macromates.selectionRange           com.macromates.visibleIndex  `    xn    123:12-123:662916                                                                                                                                                                                                                                           create_conserved_timeseries.py                                                                      000775  000765  000024  00000013036 14016263147 017112  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         """ create_conserved_timeseries.py """
from dateutils import daterange, datetohrs, dayofyear
import sys
import os
import os.path
import numpy as np
from os import path
import numpy.ma as ma
import _pickle as cPickle
import matplotlib.pyplot as plt
from matplotlib import rcParams
from mpl_toolkits.basemap import Basemap, interp
from mpl_toolkits.axes_grid1 import make_axes_locatable
from netCDF4 import Dataset

def set_Lwater(temp): # return the Latent heat of evaporation, f(temp)
    Lwater = 2500.8 - 2.36*temp + 0.016*temp**2 - 0.00006*temp**3
    return Lwater

date_list_1999 = daterange('2000010106','2003123106', 24)
date_list_2003 = daterange('2004010106','2007123106', 24)
date_list_2007 = daterange('2008010106','2011123106', 24)
date_list_2011 = daterange('2012010106','2015123106', 24)
date_list_2015 = daterange('2016010106','2019123106', 24)
date_list = date_list_1999 + date_list_2003 + \
    date_list_2007 + date_list_2011 + date_list_2015
ndates = len(date_list)
decimalyear = ma.zeros((ndates), dtype=np.float64)
total_precip = ma.zeros((ndates), dtype=np.float64)
surface_pressure = ma.zeros((ndates), dtype=np.float64)
precipitation_rate = ma.zeros((ndates), dtype=np.float64)
evaporation_rate = ma.zeros((ndates), dtype=np.float64)
temp_2m = ma.zeros((ndates), dtype=np.float64)
hourssince1CE_2000 = datetohrs('2000010100')

for idate, date in enumerate(date_list):
    hourssince2000  = datetohrs(date_list[idate]) - hourssince1CE_2000
    decimalyear[idate] = 2000. + hourssince2000 /(24.*365.25)
    if decimalyear[idate] < 2004:
        cpath = '/Volumes//Backup Plus/bfg/1999/'
    elif decimalyear[idate] >= 2004 and decimalyear[idate] < 2008:
        cpath = '/Volumes//Backup Plus/bfg/2003/'
    elif decimalyear[idate] >= 2008 and decimalyear[idate] < 2012:
        cpath = '/Volumes//Backup Plus/bfg/2007/'
    elif decimalyear[idate] >= 2012 and decimalyear[idate] < 2016:
        cpath = '/Volumes//Backup Plus/bfg/2011/'  
    else:      
        cpath = '/Volumes//Backup Plus/bfg/2015/' 
        
    infile = cpath + 'bfg_'+date+'_fhr00_control2.nc4'
    fexist = path.exists(infile)
    #print (infile, fexist)
    if fexist == True:       
        
        try:
            nc = Dataset(infile)
            presssfc = nc.variables['pressfc'][0,:,:]
            prate_avesfc = nc.variables['prate_avesfc'][0,:,:]
            lhtfl_avesfc = nc.variables['lhtfl_avesfc'][0,:,:]
            precipitable_water_pressure = nc.variables['pwatacol'][0,:,:] *9.81
            
            temp_2m = nc.variables['tmp2m'][0,:,:] - 273.16
            if idate == 0:
                lon = nc.variables['lon'][:]
                lat = nc.variables['lat'][:]
                lon2d, lat2d = np.meshgrid(lon,lat)
                coslat = np.cos(lat2d*3.1415926/180.)
                coslatsum = np.sum(coslat)
                
            #print ('mean precipitable_water_pressure = ', \
            #    np.sum(precipitable_water_pressure*coslat)/coslatsum)
            # ---- from the 2-meter temperature, estimate the latent heat of evaporation
            #      Lwater calculated from https://en.wikipedia.org/wiki/Latent_heat
            #      as a cubic function of temperature in degrees C.

            Lwater = set_Lwater(temp_2m)
            #Lwater = 2500.*np.ones((ny,nx), dtype=np.float32) # a common approximation

            # Evaporation rate can be calculated from the latent heat flux divided by
            # the latent heat of evaporation of water (Lwater).
            # E = (latent heat flux)/Lwater , where latent heat flux
            # read in from netCDF file.
            #
            # Do the units work out?
            #
            # Numerator's units: evaporative heat flux units: W/m**2
            # 1 W = 1 J/s = 1 Nm/s = 1 kg*m**2/s**3 , so evaporative heat flux
            # units are (kg m**2/s**3)*(1/m**2) = kg/s**3
            #
            # Denominator's units: Lwater as calculated there has units of
            # J/gm = (kg*m**2/s**2)/ gm.  Hence multiply by 0.001 kg/gm
            # to get expressed in m**2/s**2 .
            #
            # So, the final units are (kg/s**3) / (m**2/s**2) =
            #   kg/s**3 * s**2/m**2 = kg/(m**2 s)
            #
            # which coincides with the precipitation rate units in the grib table
            # https://www.nco.ncep.noaa.gov/pmb/docs/on388/table2.html
            #

            evap = lhtfl_avesfc / (Lwater*1000.)
            #total_precip[idate] = np.sum(tprcpsfc*coslat) / coslatsum
            surface_pressure[idate] = (np.sum(presssfc*coslat) - \
                np.sum(precipitable_water_pressure*coslat)) / (coslatsum*100.)
            precipitation_rate[idate] = 2.0 * np.sum(prate_avesfc*coslat) / coslatsum
            evaporation_rate[idate] = np.sum(evap*coslat) / coslatsum
            nc.close() 
        except (RuntimeError, OSError) as e:
            surface_pressure[idate] = ma.masked
            precipitation_rate[idate] = ma.masked
            evaporation_rate[idate] = ma.masked
        
    else:
        #total_precip[idate] = ma.masked
        surface_pressure[idate] = ma.masked
        precipitation_rate[idate] = ma.masked
        evaporation_rate[idate] = ma.masked
    print (idate,  precipitation_rate[idate], \
        evaporation_rate[idate], surface_pressure[idate])

# ---- save to cPickle file.

outfile = 'precip_budget_and_pressure_reanalysis_timeseries.cPick'
ouf = open(outfile, 'wb')
cPickle.dump(decimalyear, ouf)
#cPickle.dump(total_precip, ouf)
cPickle.dump(surface_pressure, ouf)
cPickle.dump(precipitation_rate, ouf)
cPickle.dump(evaporation_rate, ouf)
ouf.close()
    
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      ._create_grade_0to100.py                                                                            000775  000765  000024  00000000260 14016263150 015160  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2   ~                                            ATTR                                    com.apple.lastuseddate#PS    `    ĕo                                                                                                                                                                                                                                                                                                                                                    create_grade_0to100.py                                                                              000775  000765  000024  00000001147 14016263150 014750  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         # create_alphanumeric_grade
#    for a number grade 1-5, assign a letter grade A-F

import string  # include string library for the split function.

def main():
    lettergrade =['A','A','B','C','D']
    lettergrade.extend(5*'F')
    #for iter in range(5):
    #    lettergrade.append('F')
    print lettergrade
    print "This program converts a number to letter grade"
    print
    
    # Get the message to encode
    numgrade = input("Please enter the number grade: ")
    numgraded10 = numgrade/10
    print "numgrade/10 = ", numgraded10
    print "The letter grade is:", lettergrade[10-numgraded10]

main()

                                                                                                                                                                                                                                                                                                                                                                                                                         ._crps.py                                                                                           000775  000765  000024  00000000412 14016263150 012516  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2        
                                      ATTR      
                             com.apple.lastuseddate#PS           com.macromates.selectionRange      	     com.macromates.visibleIndex  `    Pp    72:180                                                                                                                                                                                                                                                      crps.py                                                                                             000775  000765  000024  00000006050 14016263150 012305  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         def crps (nmembers, ny, nx,  nthresh, thresholds, lats, ensemble, observed):
    """ 
    
    crps:  computes CRPS score from ensemble for one specific case.  
    The routine has been tailored here for precipitation.  Assumes lat-lon
    grid input.
    
    Inputs:
    ------
    nmembers: number of members
    ny: number of grid points in y direction
    nx: number of grid points in x direction
    nthresh
    lats[ny]: latitudes so we can perform cos(latitude) weighting of samples
        to allow for different grid areas
    nthresh: number of thresholds over which to integrate CRPS
    thresholds[nthresh]: values of the precipitation thresholds. 
         Start with nonzero value.
    lats[ny]: in degrees
    ensemble[nmembers, ny, nx]: ensemble precipitation values, assumed mm
    observed[ny, nx]: observed/analyzed precipitation amount    
    
    returns:
    ------- 
    CRPS_domain_average: the area-weighted average CRPS over the whole domain
    CRPS_by_gridpoint: (ny,nx) array of CRPS values for each grid point
    
    """
    
    import numpy as np
    
    # ---- define variables, arrays

    FCDF = np.zeros((nthresh), dtype=np.float64)
    OCDF = np.zeros((nthresh), dtype=np.float64)
    ones = np.ones((nmembers), dtype=np.float64) 
    zeros = np.zeros((nmembers), dtype=np.float64)
    pi = 3.141592654
    CRPS_by_gridpoint = np.zeros((ny,nx), dtype=np.float64)

    # ---- compute CRPS at each grid point + totals needed to compute
    #      area weighted domain average.

    ensemble_sorted = np.sort(ensemble, axis=0)
    #print ('ensemble_sorted[:,0,0] = ', ensemble_sorted[:,0,0])
    #print ('observed[0,0] = ', observed[0,0])
    CRPS_sum = 0.0
    CRPS_sum_coslat = 0.0
    for jy in range(ny):
        coslat = np.cos(2.0*pi*lats[jy]/360.)
        #print ('coslat = ', coslat)
        for ix in range(nx):
            #print ('ithresh, thresh, FCDF, ACDF, CRPS_by_gridpoint = ')
            ensemble_sorted_1d = ensemble_sorted[:,jy,ix]
            for ithresh, thresh in enumerate(thresholds):
                if ithresh == 0:
                    dp = thresholds[0]
                else:
                    dp = thresholds[ithresh] - thresholds[ithresh-1]
                    
                    
                # --- integrate to compute CRPS following Wilks 2011 text,
                #     section 2.5, eq. 8.54a
                    
                a = np.where(thresh < ensemble_sorted_1d, zeros, ones)
                FCDF = np.sum(a) / float(nmembers)
                if thresh < observed[jy,ix]  :
                    ACDF = 0.0
                else:
                    ACDF = 1.0
                CRPS_by_gridpoint[jy,ix] = CRPS_by_gridpoint[jy,ix] + dp*(FCDF-ACDF)**2
                #print (ithresh, thresh, FCDF, ACDF, CRPS_by_gridpoint[jy,ix])
                
            CRPS_sum = CRPS_sum + CRPS_by_gridpoint[jy,ix]*coslat
            CRPS_sum_coslat = CRPS_sum_coslat + coslat
            
    CRPS_domain_average = CRPS_sum / CRPS_sum_coslat 
    
    return CRPS_domain_average, CRPS_by_gridpoint
                    
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        ._ctof.py                                                                                           000775  000765  000024  00000000260 14016263150 012503  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2   ~                                            ATTR                                    com.apple.lastuseddate#PS    `    p                                                                                                                                                                                                                                                                                                                                                    ctof.py                                                                                             000775  000765  000024  00000000232 14016263150 012265  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         # convert degrees c to degrees f

def main():
    degc = input("Enter temp in degrees C: ")
    degf = 1.8*degc + 32.
    print degf," degrees F"

main()
                                                                                                                                                                                                                                                                                                                                                                      ._dateutils.py                                                                                      000775  000765  000024  00000000733 14016263150 013553  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2                                             ATTR        $                    $     com.apple.lastuseddate#PS      4     %com.apple.metadata:kMDItemWhereFroms        com.macromates.selectionRange           com.macromates.visibleIndex  `    Pr    bplist00_kimap://tom%2Ehamill@email.boulder.noaa.gov:993/fetch%3EUID%3E/INBOX%3E123312?part=1.2&filename=dateutils.pyPy                            z144:502870                                     dateutils.py                                                                                        000775  000765  000024  00000010771 14016263150 013341  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         import datetime, calendar

"""
utilities for working with dates using datetime module (Python 2.3 or later)

Jeff Whitaker <jeffrey.s.whitaker@noaa.gov>
"""


hrsgregstart = 13865688 # hrs from 00010101 to 15821015 in Julian calendar.
# times in many datasets use mixed Gregorian/Julian calendar, datetime 
# module uses a proleptic Gregorian calendar. So, I use datetime to compute
# hours since start of Greg. calendar (15821015) and add this constant to
# get hours since 1-Jan-0001 in the mixed Gregorian/Julian calendar.
gregstart = datetime.datetime(1582,10,15) # datetime.datetime instance
day1 = datetime.datetime(1,1,1) # datetime.datetime instance

def dateto_hrs_since_day1CE(curdate,mixedcal=True):
    """given datetime.datetime instance, compute hours since 1-Jan-0001"""
    if mixedcal:
        if curdate < gregstart:
            msg = 'date must be after start of gregorian calendar (15821015)!'
            raise ValueError(msg)
        difftime = curdate-gregstart
        hrsdiff = 24*difftime.days + difftime.seconds/3600
        return hrsdiff+hrsgregstart
    else:
        difftime = curdate-day1
        return 24.*(difftime.days+1)+difftime.seconds/3600.

def hrs_since_day1CE_todate(hrs,mixedcal=True):
    """return datetime.datetime instance given hours since 1-Jan-0001"""
    if hrs < 0.0:
        msg = "hrs must be positive!"
        raise ValueError(msg)
    delta = datetime.timedelta(hours=1)
    if mixedcal:
        hrs_sincegreg = hrs - hrsgregstart
        curdate = gregstart + hrs_sincegreg*delta
    else:
        curdate = hrs*delta
    return curdate

def dateshift(analdate,fcsthr):
    """
 verifdate = incdate(analdate, fcsthr)

 compute verification date given analysis date string (yyyymmddhh) and
 fcst hour.
    """
    yyyy,mm,dd,hh = splitdate(analdate)
    analdate = datetime.datetime(yyyy,mm,dd,hh)
    verifdate = analdate + fcsthr*datetime.timedelta(hours=1)
    verifdate = makedate(verifdate.year,verifdate.month,verifdate.day,verifdate.hour)
    return verifdate


def splitdate(yyyymmddhh):
    """
 yyyy,mm,dd,hh = splitdate(yyyymmddhh)

 give an date string (yyyymmddhh) return integers yyyy,mm,dd,hh.
    """
    yyyy = int(yyyymmddhh[0:4])
    mm = int(yyyymmddhh[4:6])
    dd = int(yyyymmddhh[6:8])
    hh = int(yyyymmddhh[8:10])
    return yyyy,mm,dd,hh

def makedate(yyyy,mm,dd,hh):
    """
 yyyymmddhh = makedate(yyyy,mm,dd,hh)

 return a date string of the form yyyymmddhh given integers yyyy,mm,dd,hh.
    """
    return '%0.4i'%(yyyy)+'%0.2i'%(mm)+'%0.2i'%(dd)+'%0.2i'%(hh)

def hrstodate(hrs,mixedcal=True):
    """
 yyyymmddhh = hrstodate(hrs)

 return a date string of the form yyyymmddhh given hrs since day 1 CE.
    """
    date = hrs_since_day1CE_todate(hrs,mixedcal=mixedcal)
    return makedate(date.year,date.month,date.day,date.hour)

def datetohrs(yyyymmddhh,mixedcal=True):
    """
 hrs = hrstodate(yyyymmddhh)

 return hrs since day 1 CE given a date string of the form yyyymmddhh.
    """
    yyyy,mm,dd,hh = splitdate(yyyymmddhh)
    return dateto_hrs_since_day1CE(datetime.datetime(yyyy,mm,dd,hh),mixedcal=mixedcal)

def daterange(date1,date2,hrinc):
    """
 date_list = daterange(date1,date2,hrinc)

 return of list of date strings of the form yyyymmddhh given
 a starting date, ending date and an increment in hours.
    """
    date = date1
    delta = datetime.timedelta(hours=1)
    yyyy,mm,dd,hh = splitdate(date)
    d = datetime.datetime(yyyy,mm,dd,hh)
    n = 0
    dates = [date]
    while date < date2:
       d = d + hrinc*delta
       date = makedate(d.year,d.month,d.day,d.hour)
       dates.append(date)
       n = n + 1
    return dates

def dayofyear(yyyy,mm,dd):
    """
 return integer day of year given yyyy,mm,dd
    """
    d = datetime.datetime(yyyy,mm,dd)
    d0 = datetime.datetime(yyyy,1,1)
    return (d-d0).days

def getyrmon(day_of_year,yyyy=2001):
    d1 = datetime.datetime(yyyy,1,1)
    if calendar.isleap(d1.year) and day_of_year > 366:
        raise ValueError('not that many days in the year')
    if not calendar.isleap(d1.year) and day_of_year > 365:
        raise ValueError('not that many days in the year')
    d2 = d1 + (day_of_year-1)*datetime.timedelta(days=1)
    return d2.month,d2.day

def daysinmonth(yyyy,mm):
    """
 return number of days in month given yyyy,mm
    """
    return calendar.monthrange(yyyy,mm)[1]

if __name__ == "__main__":
    print (dayofyear(2000,2,29))
    print (daysinmonth(2000,2))
    print (datetohrs('0001010100',mixedcal=False))
    print (datetohrs('2001010100',mixedcal=False))
    print (datetohrs('2001010100',mixedcal=True))
       ._decayavg_biascorrection2.py                                                                       000775  000765  000024  00000000412 14016263150 016502  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2        
                                      ATTR      
                             com.apple.lastuseddate#PS           com.macromates.selectionRange      	     com.macromates.visibleIndex  `    r    31:870                                                                                                                                                                                                                                                      decayavg_biascorrection2.py                                                                         000775  000765  000024  00000002367 14016263150 016300  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         
def decayavg_biascorrection2(npts, nlats, nlons, \
    forecast_3d, analyses_3d, beta_3d, lsmask, \
    date_list_forecast, alpha):
    
    """ apply decaying average bias correction 
        to forecasts.  
    """
    
    import numpy as np
    from datetime import datetime
    import sys
    
    # -------------------------------------------------------------
    
    now = datetime.now()
    current_time = now.strftime("%H:%M:%S")
    
    # ---- sequentially loop through dates during the sample, 
    #      updating the previous day's bias correction 
    #      to the new days fcst vs. obs discrepancy.
    
    ndates = int(len(date_list_forecast))
    obsinc_2d = np.zeros((nlats, nlons), dtype=np.float64)
    beta_2d = np.zeros((nlats, nlons), dtype=np.float64)
    for idate, date in enumerate(date_list_forecast):

        # ---- calculate the "observation" increment (term in parentheses
        #      in eq. 37 in Dee paper)
        
        obsinc_2d[:,:] = lsmask[:,:]*(forecast_3d[idate,:,:] - analyses_3d[idate,:,:])
        
        if idate == 0:
            beta_3d[idate,:,:] = alpha*obsinc_2d[:,:]
        else:
            beta_3d[idate,:,:] = (1.-alpha)*beta_3d[idate-1,:,:] + \
                alpha*obsinc_2d[:,:]

    return beta_3d                                                                                                                                                                                                                                                                         ._decayavg_biascorrection_meantemp.py                                                               000775  000765  000024  00000000411 14016263150 020305  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2        	                                      ATTR      	                             com.apple.lastuseddate#PS           com.macromates.selectionRange           com.macromates.visibleIndex  `    s    3:400                                                                                                                                                                                                                                                       decayavg_biascorrection_meantemp.py                                                                 000775  000765  000024  00000004223 14016263150 020075  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         
def decayavg_biascorrection_meantemp(npts, nlats, nlons, \
    forecast_3d, analyses_3d, beta_3d, date_list_forecast, alpha):
    
    """ apply modified decaying average bias correction 
        to forecasts.  Use the mean of all locations with temperatures 
        less than 1.5C different
    """
    
    import numpy as np
    from datetime import datetime
    import sys
    
    # -------------------------------------------------------------
    
    now = datetime.now()
    current_time = now.strftime("%H:%M:%S")
    
    # ---- sequentially loop through dates during the sample, 
    #      updating the previous day's bias correction 
    #      to the new days fcst vs. obs discrepancy.
    
    ndates = int(len(date_list_forecast))
    obsinc_2d = np.zeros((nlats, nlons), dtype=np.float64)
    ones = np.ones((nlats,nlons), dtype=np.int32)
    zeros = np.zeros((nlats,nlons), dtype=np.int32)
    for idate, date in enumerate(date_list_forecast):

        # ---- calculate a bias correction using the mean difference
        #      of F-O's for the points that have similar forecast temperatures.
        
        print ('processing date = ', date)
        forecast_2d = forecast_3d[idate,:,:] 
        analysis_2d = analyses_3d[idate,:,:]
        for i in range(nlons):
            for j in range(nlats):
                ftoday = forecast_2d[j,i]
                adiff = np.abs(forecast_2d - ftoday)
                a = np.where(adiff < 1.5, ones, zeros)
                fmean = np.sum(forecast_2d*a) / np.sum(ones)
                amean = np.sum(analysis_2d*a) / np.sum(ones)
                obsinc_2d[j,i] = fmean - amean
        #obsinc_2d[:,:] = forecast_3d[idate,:,:] - analyses_3d[idate,:,:]
            
        #if idate > 0: 
        #    obsinc_2d[:,:] = analyses_3d[idate,:,:] - \
        #        (forecast_3d[idate,:,:] - beta_3d[idate-1,:,:])
        #else:
        #    obsinc_2d[:,:] = analyses_3d[idate,:,:] - \
        #        forecast_3d[idate,:,:]
        
        if idate == 0:
            beta_3d[idate,:,:] = alpha*obsinc_2d[:,:]
        else:
            beta_3d[idate,:,:] = (1.-alpha)*beta_3d[idate-1,:,:] + alpha*obsinc_2d[:,:]

    return beta_3d                                                                                                                                                                                                                                                                                                                                                                             ._decayavg_filter_conus_B.py                                                                        000775  000765  000024  00000000415 14016263151 016353  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2        
                                      ATTR      
                             com.apple.lastuseddate#PS           com.macromates.selectionRange      	     com.macromates.visibleIndex  `    |Pt    81:241352                                                                                                                                                                                                                                                   decayavg_filter_conus_B.py                                                                          000775  000765  000024  00000005204 14016263151 016137  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         """

"""
import pygrib
from dateutils import daterange, dateshift, dayofyear, splitdate
import os, sys
from datetime import datetime
import numpy as np
import _pickle as cPickle
import matplotlib.pyplot as plt
from matplotlib import rcParams
from mpl_toolkits.basemap import Basemap, interp
from mpl_toolkits.axes_grid1 import make_axes_locatable
import scipy.signal as signal
import scipy.stats as stats
from astropy.convolution import convolve


rcParams['xtick.labelsize']='medium'
rcParams['ytick.labelsize']='medium'
rcParams['legend.fontsize']='large'

# =====================================================================

clead = sys.argv[1]
                
# ---- read covariance matrices from cPickle file

infile = 'covstats_bias_random_ecmwf2019_localized_lead='+clead+'.cPick'
inf = open(infile,'rb')
cov_bias_localized = cPickle.load(inf) 
cov_random_localized = cPickle.load(inf) 
var_bias = cPickle.load(inf) 
var_random = cPickle.load(inf) 
lats = cPickle.load(inf) 
lons = cPickle.load(inf) 
nlats, nlons = np.shape(lats)
inf.close()  

# --- make the sum of the random and the bias into a 2D-array.
#     to the diagonal add the obs variance.   Then invert

npts = nlats*nlons
B_and_R = np.zeros((npts, npts), dtype=np.float64)
ktr1 = 0
for i1 in range(nlons):
    now = datetime.now()
    current_time = now.strftime("%H:%M:%S")
    print ('processing i1 = ',i1,' of ',nlons,'. Current time = ',current_time)
    for j1 in range(nlats):
        ktr2 = 0 
        for i2 in range(nlons): 
            for j2 in range(nlats):
                B_and_R[ktr1,ktr2] = cov_bias_localized[j1,i1,j2,i2] + \
                    1.5*cov_random_localized[j1,i1,j2,i2] 
                ktr2 = ktr2 + 1   
        ktr1 = ktr1 + 1
        
#for ktr1 in range(npts):
#    B_and_R[ktr1,ktr1] = B_and_R[ktr1,ktr1] + 1.0

now = datetime.now()
current_time = now.strftime("%H:%M:%S")
print ('Inverting matrix. Current time = ',current_time)
B_and_R_inverse = np.linalg.inv(B_and_R) 
now = datetime.now()
current_time = now.strftime("%H:%M:%S")
print ('Finished inverting matrix. Current time = ',current_time)

# ---- reform matrix inverse into 4D-array.

matrix_inverse_4D = np.zeros((nlats, nlons, nlats, nlons), dtype=np.float64)
ktr1 = 0
for i1 in range(nlons):
    for j1 in range(nlats):
        ktr2 = 0 
        for i2 in range(nlons): 
            for j2 in range(nlats):
                matrix_inverse_4D[j1,i1,j2,i2] = B_and_R_inverse[ktr1,ktr2] 
                ktr2 = ktr2+1   
        ktr1 = ktr1 + 1
        
# ---- save to cPickle file.

outfile = 'B_and_R_matrix_inverse_lead='+clead+'.cPick'
ouf = open(outfile, 'wb')
cPickle.dump(matrix_inverse_4D, ouf)
ouf.close()

                                                                                                                                                                                                                                                                                                                                                                                                ._decayavg_filter_conus_B_2018.py                                                                   000775  000765  000024  00000000415 14016263151 017025  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2        
                                      ATTR      
                             com.apple.lastuseddate#PS           com.macromates.selectionRange      	     com.macromates.visibleIndex  `    Pt    88:151457                                                                                                                                                                                                                                                   decayavg_filter_conus_B_2018.py                                                                     000775  000765  000024  00000005473 14016263151 016621  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         """
decayavg_filter_conus_B_2018.py
"""
import pygrib
from dateutils import daterange, dateshift, dayofyear, splitdate
import os, sys
from datetime import datetime
import numpy as np
import _pickle as cPickle
import matplotlib.pyplot as plt
from matplotlib import rcParams
from mpl_toolkits.basemap import Basemap, interp
from mpl_toolkits.axes_grid1 import make_axes_locatable
import scipy.signal as signal
import scipy.stats as stats
from astropy.convolution import convolve


rcParams['xtick.labelsize']='medium'
rcParams['ytick.labelsize']='medium'
rcParams['legend.fontsize']='large'

# =====================================================================

clead = sys.argv[1]
                
# ---- read covariance matrices from cPickle file

#infile = 'covstats_bias_random_ecmwf2019_localized_lead='+clead+'.cPick'
infile = 'covstats_bias_random_ecmwf2018_localized_lead='+clead+'.cPick'
inf = open(infile,'rb')
cov_bias_localized = cPickle.load(inf) 
cov_random_localized = cPickle.load(inf) 
var_bias = cPickle.load(inf) 
var_random = cPickle.load(inf) 
lats = cPickle.load(inf) 
lons = cPickle.load(inf) 
nlats, nlons = np.shape(lats)
inf.close()  

# --- make the sum of the random and the bias into a 2D-array.
#     to the diagonal add the obs variance.   Then invert

npts = nlats*nlons
B_and_R = np.zeros((npts, npts), dtype=np.float64)
ktr1 = 0
for i1 in range(nlons):
    now = datetime.now()
    current_time = now.strftime("%H:%M:%S")
    print ('processing i1 = ',i1,' of ',nlons,'. Current time = ',current_time)
    for j1 in range(nlats):
        ktr2 = 0 
        for i2 in range(nlons): 
            for j2 in range(nlats):
                B_and_R[ktr1,ktr2] = cov_bias_localized[j1,i1,j2,i2] + \
                    1.5*cov_random_localized[j1,i1,j2,i2] 
                ktr2 = ktr2 + 1   
        ktr1 = ktr1 + 1
        
#for ktr1 in range(npts):
#    B_and_R[ktr1,ktr1] = B_and_R[ktr1,ktr1] + 1.0

now = datetime.now()
current_time = now.strftime("%H:%M:%S")
print ('Inverting matrix. Current time = ',current_time)
B_and_R_inverse = np.linalg.inv(B_and_R) 
now = datetime.now()
current_time = now.strftime("%H:%M:%S")
print ('Finished inverting matrix. Current time = ',current_time)

# ---- reform matrix inverse into 4D-array.

matrix_inverse_4D = np.zeros((nlats, nlons, nlats, nlons), dtype=np.float64)
ktr1 = 0
for i1 in range(nlons):
    for j1 in range(nlats):
        ktr2 = 0 
        for i2 in range(nlons): 
            for j2 in range(nlats):
                matrix_inverse_4D[j1,i1,j2,i2] = B_and_R_inverse[ktr1,ktr2] 
                ktr2 = ktr2+1   
        ktr1 = ktr1 + 1
        
# ---- save to cPickle file.

#outfile = 'B_and_R_matrix_inverse_lead='+clead+'.cPick'
outfile = 'B_and_R_matrix_inverse_2018_lead='+clead+'.cPick'
print (outfile)
ouf = open(outfile, 'wb')
cPickle.dump(matrix_inverse_4D, ouf)
ouf.close()

                                                                                                                                                                                                         ._define_important_casedates.py                                                                     000775  000765  000024  00000000417 14024437035 017123  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2                                              ATTR                                   com.apple.lastuseddate#PS         
  com.macromates.selectionRange           com.macromates.visibleIndex  `    {v    25-30:26+20                                                                                                                                                                                                                                                 define_important_casedates.py                                                                       000775  000765  000024  00000030326 14024437035 016710  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         # python define_important_casedates.py warm/cold ncases cexclude
import _pickle as cPickle
import sys, os
import numpy as np
import math
from matplotlib.path import Path
from shapely.geometry import Polygon

from netCDF4 import Dataset
import matplotlib.pyplot as plt
from matplotlib import rcParams
from mpl_toolkits.basemap import Basemap, interp
from mpl_toolkits.axes_grid1 import make_axes_locatable
import shapefile

# ---- input the season and total number of reforecast cases from command line

cseason = sys.argv[1] # warm or cool
ctotal_ncases = sys.argv[2] 
cexclude_hucs = sys.argv[3] # if 1, then exclude HUCs 9,13,16

# --- per feedback from Sunghee, provide ability to save data with or
#     without excluding HUCs

if cexclude_hucs == '1':
    huc_use = [1,1,1,1,1, 1,1,1,0,1, 1,1,0,1,1, 0,1,1,1]
    cexclude = '_no9_13_16'
else:
    huc_use = [1,1,1,1,1, 1,1,1,1,1, 1,1,1,1,1, 1,1,1,1]
    cexclude = '_allhucs'
    
total_ncases_out = int(ctotal_ncases)
print ('total_ncases_out = ', total_ncases_out)
print ('cexclude_hucs = ',cexclude_hucs)
print ('cexclude = ', cexclude)
print ('np.sum(huc_use) = ', np.sum(huc_use))

# ---- set the parameters for how to divide up cases, some selected by the
#      average mean precip over the HUC; others by the precip max at the top
#      20 grid points within the HUC, and the rest by CONUS-averaged mean precip

nhucs = 19 # 18 HUC-2's + whole conus [#19]
fraction_by_each_huc_bymean = \
    0.3 / float(np.sum(huc_use)-1) 
    # the fraction of cases by HUC mean 
fraction_by_each_huc_bymax20 = \
    0.6 / float(np.sum(huc_use)-1) 
    # the fraction of cases excl. CONUS
    # by HUC max at top 20 points within that HUC
fraction_conuswide = 1.0 - \
    fraction_by_each_huc_bymean*(np.sum(huc_use)-1) -  \
    fraction_by_each_huc_bymax20*(np.sum(huc_use)-1) \
    # the fraction of cases by CONUS mean
print ('fractions bymean bymax20 conus = ', \
    fraction_by_each_huc_bymean, fraction_by_each_huc_bymax20,\
    fraction_conuswide)
cases_for_each_huc_bymean = int(total_ncases_out*fraction_by_each_huc_bymean)
cases_for_each_huc_bymax20 = int(total_ncases_out*fraction_by_each_huc_bymax20)
cases_for_conus = total_ncases_out - (nhucs-1)*cases_for_each_huc_bymean - \
    (nhucs-1)*cases_for_each_huc_bymax20
print ('cases_for_each_huc_bymean, cases_for_each_huc_bymax20, cases_for_conus = ',\
     cases_for_each_huc_bymean, cases_for_each_huc_bymax20, cases_for_conus)
print ('total number of cases = ',\
    (np.sum(huc_use)-1)*cases_for_each_huc_bymean + \
    (np.sum(huc_use)-1)*cases_for_each_huc_bymax20 + \
    cases_for_conus)

# ---- do some preprocessing, loading in the data and getting the indices associated
#      with sorted data.  The last HUC is for CONUS.

for ihuc in range(19):
    
    # --- read the yyyymmddhh of the initial date, the mean precip in this HUC over the 
    #     240 h period, and the mean of the precip at the grid points with 20 largest 
    #     values.   
    
    infile = cseason + '_precip_stats_huc2number'+str(ihuc+1)+'.cPick'
    print ('reading from ', infile)
    inf = open(infile, 'rb')
    yyyymmddhh = cPickle.load(inf)
    mean_precip = cPickle.load(inf)
    max20_precip = cPickle.load(inf)
    max20_meanlon = cPickle.load(inf)
    max20_meanlat = cPickle.load(inf)    
    inf.close()
    
    # ---- Apply argsort to get the associated indices of sorted low to high values.
    #      Initialize arrays also.
        
    argsort_mean = np.argsort(mean_precip)
    argsort_max20 = np.argsort(max20_precip)
    if ihuc == 0: # first time through
        ndates = len(yyyymmddhh)
        argsort_mean_allhucs = np.zeros((19,ndates), dtype=np.int32)
        argsort_max20_allhucs = np.zeros((19,ndates), dtype=np.int32)
        weighting_mean_allhucs = np.zeros((19,ndates), dtype=np.float64)
        weighting_max20_allhucs = np.zeros((19,ndates), dtype=np.float64)
        lon_max20_allhucs = np.zeros((19,ndates), dtype=np.float64)
        lat_max20_allhucs = np.zeros((19,ndates), dtype=np.float64)
    argsort_mean_allhucs[ihuc,:] = argsort_mean[:]
    argsort_max20_allhucs[ihuc,:] = argsort_max20[:]
    
    # ---- Develop an initial weighting estimate based on the precipitation
    #      value divided by the maximum precipitation value
    
    weighting_mean_allhucs[ihuc,:] = mean_precip[:] / mean_precip[argsort_mean[-1]]
    weighting_max20_allhucs[ihuc,:] = max20_precip[:] / max20_precip[argsort_max20[-1]]
    lon_max20_allhucs[ihuc,:] = max20_meanlon[:]
    lat_max20_allhucs[ihuc,:] = max20_meanlat[:]

# ---- develop a set of cases across US based on ***ensemble mean*** in each real HUC.
#      Process the full CONUS (stored in the last HUC index) later.

print ('******** beginning loop thru cases')
casedates = np.zeros((ndates), dtype=np.int32)
which_huc = np.zeros((ndates), dtype=np.int16)
meanlon = -99.99*np.ones((ndates), dtype=np.float32) # used to ID center lon when selected by max20
meanlat = -99.99*np.ones((ndates), dtype=np.float32) # used to ID center lat when selected by max20
casenum = 0
for icase in range(cases_for_each_huc_bymean):
    print ('processing cases by mean', icase, cases_for_each_huc_bymean)
    for ihuc in range(18): # 18, so excl. CONUS
        
        if huc_use[ihuc] == 1:

            # ---- find the date with the maximum weight for mean precip.  Adjust
            #      weights of nearby cases downward somewhat so that they are less
            #      likely to be chosen so that we don't choose too many cases
            #      that are clustered around a few chosen dates.
        
            weights_mean = weighting_mean_allhucs[ihuc,:]
            #print ('len(weights_mean) = ', len(weights_mean))
            indices = np.argsort(weights_mean)
        
            if indices[-1] != 0:
                iminus = indices[-1]-1
            else:
                iminus = indices[-1]
            if indices[-1] != ndates-1:
                iplus = indices[-1]+1
            else:
                iplus = indices[-1]
        
            if indices[-1] > 1:
                iminus2 = indices[-1]-2
            else:
                iminus2 = indices[-1]
            if indices[-1] < ndates-2:
                iplus2 = indices[-1]+2
            else:
                iplus2 = indices[-1]
        
            casedates[indices[-1]] = 1 # choose the date with max weight
            which_huc[indices[-1]] = ihuc+1
        
            if ihuc == 17: print ('casedate, huc, precip, wt = ',\
                yyyymmddhh[indices[-1]], which_huc[indices[-1]], \
                mean_precip[indices[-1]], weights_mean[indices[-1]])
            weighting_mean_allhucs[:,indices[-1]] = 0.0 
                # zero out this date's weight so not chosen again
            weighting_mean_allhucs[:,iminus] = \
                weighting_mean_allhucs[:,iminus]*0.7 #de-emph the surr. dates
            weighting_mean_allhucs[:,iplus] = \
                weighting_mean_allhucs[:,iplus]*0.7
            weighting_max20_allhucs[:,indices[-1]] = 0.0 
                # zero out this date's weight so not chosen again
            weighting_max20_allhucs[:,iminus] = \
                weighting_max20_allhucs[:,iminus]*0.7 #de-emph the surr. dates
            weighting_max20_allhucs[:,iplus] = \
                weighting_max20_allhucs[:,iplus]*0.7
            weighting_max20_allhucs[:,iminus2] = \
                weighting_max20_allhucs[:,iminus2]*0.85 #de-emph the surr. dates
            weighting_max20_allhucs[:,iplus2] = \
                weighting_max20_allhucs[:,iplus2]*0.85
            casenum = casenum + 1
    
# ---- develop a set of cases across US based on ***ensemble max20 *** in each real HUC.
#      Process the full CONUS (stored in the last HUC index) later.

print ('******** beginning loop thru cases')
for icase in range(cases_for_each_huc_bymax20):
    print ('processing cases by max20', icase, cases_for_each_huc_bymax20)
    for ihuc in range(18):
        
        if huc_use[ihuc] == 1:
            
            # ---- find the date with the maximum weight for max20 precip.  Adjust
            #      weights of nearby cases downward somewhat so that they are less
            #      likely to be chosen.
    
            weights = weighting_max20_allhucs[ihuc,:]
            indices = np.argsort(weights)
    
            if indices[-1] != 0:
                iminus = indices[-1]-1
            else:
                iminus = indices[-1]
            if indices[-1] != ndates-1:
                iplus = indices[-1]+1
            else:
                iplus = indices[-1]
    
            if indices[-1] > 1:
                iminus2 = indices[-1]-2
            else:
                iminus2 = indices[-1]
            if indices[-1] < ndates-2:
                iplus2 = indices[-1]+2
            else:
                iplus2 = indices[-1]
            
            casedates[indices[-1]] = 1
            which_huc[indices[-1]] = ihuc+1
            meanlon[indices[-1]] = lon_max20_allhucs[ihuc,indices[-1]]
            meanlat[indices[-1]] = lat_max20_allhucs[ihuc,indices[-1]]
        
            if ihuc == 17: print ('casedate, huc, wt = ',yyyymmddhh[indices[-1]], \
                which_huc[indices[-1]], max20_precip[indices[-1]], weights[indices[-1]])
            weighting_mean_allhucs[:,indices[-1]] = 0.0 
                # zero out this date's weight so not chosen again
            weighting_mean_allhucs[:,iminus] = \
                weighting_mean_allhucs[:,iminus]*0.7 #de-emph the surr. dates
            weighting_mean_allhucs[:,iplus] = \
                weighting_mean_allhucs[:,iplus]*0.7
            weighting_max20_allhucs[:,indices[-1]] = 0.0 
                # zero out this date's weight so not chosen again
            weighting_max20_allhucs[:,iminus] = \
                weighting_max20_allhucs[:,iminus]*0.7 #de-emph the surr. dates
            weighting_max20_allhucs[:,iplus] = \
                weighting_max20_allhucs[:,iplus]*0.7
            weighting_max20_allhucs[:,iminus2] = \
                weighting_max20_allhucs[:,iminus2]*0.85 #de-emph the surr. dates
            weighting_max20_allhucs[:,iplus2] = \
                weighting_max20_allhucs[:,iplus2]*0.85
            casenum = casenum + 1
        

# ---- use the remaining dates to select cases with CONUS-wide impact that
#      haven't been selected already.
    
print ('******** cases_for_conus',cases_for_conus)
for icase in range(cases_for_conus):
    
    # ---- find the date with the maximum weight for mean precip.  Adjust
    #      weights of nearby cases downward somewhat so that they are less
    #      likely to be chosen so that we don't choose too many cases
    #      that are clustered around a few chosen dates.
        
    weights_mean = weighting_mean_allhucs[18,:]
    indices = np.argsort(weights_mean)
    
    if indices[-1] != 0:
        iminus = indices[-1]-1
    else:
        iminus = indices[-1]
        
    if indices[-1] != ndates-1:
        iplus = indices[-1]+1
    else:
        iplus = indices[-1]
    
    if indices[-1] > 1:
        iminus2 = indices[-1]-2
    else:
        iminus2 = indices[-1]
    if indices[-1] < ndates-2:
        iplus2 = indices[-1]+2
    else:
        iplus2 = indices[-1]
    
    casedates[indices[-1]] = 1 # choose the date with max weight
    which_huc[indices[-1]] = 19
    print ('casedate, huc, precip, wt = ',yyyymmddhh[indices[-1]], \
        which_huc[indices[-1]], mean_precip[indices[-1]], weights_mean[indices[-1]])
    weighting_mean_allhucs[:,indices[-1]] = 0.0 # zero out this date's weight so not chosen again
    weighting_mean_allhucs[:,iminus] = weighting_mean_allhucs[:,iminus]*0.7 #de-emph the surr. dates
    weighting_mean_allhucs[:,iplus] = weighting_mean_allhucs[:,iplus]*0.7 
    weighting_mean_allhucs[:,iminus2] = weighting_mean_allhucs[:,iminus2]*0.85 #de-emph the surr. dates
    weighting_mean_allhucs[:,iplus2] = weighting_mean_allhucs[:,iplus2]*0.85     

# ---- save list of chosen cases.  Also save lon/lat of mean of max20 grid points

outfile = 'case_list_'+cseason+'season_ncases'+ctotal_ncases+cexclude+'.txt'
print ('writing case dates to ', outfile)
ouf = open(outfile,'w')
print ('casedates[0:30] = ', casedates[0:30])
print ('yyyymmddhh[0:30] = ', yyyymmddhh[0:30])
print ('which_huc[0:30] = ', which_huc[0:30])
for idate in range(ndates):
    if casedates[idate] == 1:
        print(yyyymmddhh[idate], which_huc[idate], meanlon[idate], meanlat[idate], file=ouf)
ouf.close()                                                                                                                                                                                                                                                                                                          ._define_important_casedates_roebber.py                                                             000755  000765  000024  00000000413 14073701025 020612  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2                                              ATTR                                   com.apple.lastuseddate#PS           com.macromates.selectionRange           com.macromates.visibleIndex  `    {v    1122541                                                                                                                                                                                                                                                     define_important_casedates_roebber.py                                                               000755  000765  000024  00000030431 14073701025 020400  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         # python define_important_casedates_roebber.py warm/cool ncases cexclude
import _pickle as cPickle
import sys, os
import numpy as np
import math
from matplotlib.path import Path
from shapely.geometry import Polygon

from netCDF4 import Dataset
import matplotlib.pyplot as plt
from matplotlib import rcParams
from mpl_toolkits.basemap import Basemap, interp
from mpl_toolkits.axes_grid1 import make_axes_locatable
import shapefile

# ---- input the season and total number of reforecast cases from command line

cseason = sys.argv[1] # warm or cool
ctotal_ncases = sys.argv[2] 
cexclude_hucs = sys.argv[3] # if 1, then exclude HUCs 9,13,16

# --- per feedback from Sunghee, provide ability to save data with or
#     without excluding HUCs

if cexclude_hucs == '1':
    huc_use = [1,1,1,1,1, 1,1,1,0,1, 1,1,0,1,1, 0,1,1,1]
    cexclude = '_no9_13_16'
else:
    huc_use = [1,1,1,1,1, 1,1,1,1,1, 1,1,1,1,1, 1,1,1,1]
    cexclude = '_allhucs'
    
total_ncases_out = int(ctotal_ncases)
print ('total_ncases_out = ', total_ncases_out)
print ('cexclude_hucs = ',cexclude_hucs)
print ('cexclude = ', cexclude)
print ('np.sum(huc_use) = ', np.sum(huc_use))

# ---- set the parameters for how to divide up cases, some selected by the
#      average mean precip over the HUC; others by the precip max at the top
#      20 grid points within the HUC, and the rest by CONUS-averaged mean precip

nhucs = 19 # 18 HUC-2's + whole conus [#19]
fraction_by_each_huc_bymean = \
    0.3 / float(np.sum(huc_use)-1) 
    # the fraction of cases by HUC mean 
fraction_by_each_huc_bymax20 = \
    0.6 / float(np.sum(huc_use)-1) 
    # the fraction of cases excl. CONUS
    # by HUC max at top 20 points within that HUC
fraction_conuswide = 1.0 - \
    fraction_by_each_huc_bymean*(np.sum(huc_use)-1) -  \
    fraction_by_each_huc_bymax20*(np.sum(huc_use)-1) \
    # the fraction of cases by CONUS mean
print ('fractions bymean bymax20 conus = ', \
    fraction_by_each_huc_bymean, fraction_by_each_huc_bymax20,\
    fraction_conuswide)
cases_for_each_huc_bymean = int(total_ncases_out*fraction_by_each_huc_bymean)
cases_for_each_huc_bymax20 = int(total_ncases_out*fraction_by_each_huc_bymax20)
cases_for_conus = total_ncases_out - (nhucs-1)*cases_for_each_huc_bymean - \
    (nhucs-1)*cases_for_each_huc_bymax20
print ('cases_for_each_huc_bymean, cases_for_each_huc_bymax20, cases_for_conus = ',\
     cases_for_each_huc_bymean, cases_for_each_huc_bymax20, cases_for_conus)
print ('total number of cases = ',\
    (np.sum(huc_use)-1)*cases_for_each_huc_bymean + \
    (np.sum(huc_use)-1)*cases_for_each_huc_bymax20 + \
    cases_for_conus)

# ---- do some preprocessing, loading in the data and getting the indices associated
#      with sorted data.  The last HUC is for CONUS.

for ihuc in range(19):
    
    # --- read the yyyymmddhh of the initial date, the mean precip in this HUC over the 
    #     240 h period, and the mean of the precip at the grid points with 20 largest 
    #     values.   
    
    infile = cseason + '_precip_stats_roebber_huc2number'+str(ihuc+1)+'.cPick'
    print ('reading from ', infile)
    inf = open(infile, 'rb')
    yyyymmddhh = cPickle.load(inf)
    print (yyyymmddhh[0], yyyymmddhh[-1])
    mean_precip = cPickle.load(inf)
    max20_precip = cPickle.load(inf)
    max20_meanlon = cPickle.load(inf)
    max20_meanlat = cPickle.load(inf)    
    inf.close()
    
    # ---- Apply argsort to get the associated indices of sorted low to high values.
    #      Initialize arrays also.
        
    argsort_mean = np.argsort(mean_precip)
    argsort_max20 = np.argsort(max20_precip)
    if ihuc == 0: # first time through
        ndates = len(yyyymmddhh)
        argsort_mean_allhucs = np.zeros((19,ndates), dtype=np.int32)
        argsort_max20_allhucs = np.zeros((19,ndates), dtype=np.int32)
        weighting_mean_allhucs = np.zeros((19,ndates), dtype=np.float64)
        weighting_max20_allhucs = np.zeros((19,ndates), dtype=np.float64)
        lon_max20_allhucs = np.zeros((19,ndates), dtype=np.float64)
        lat_max20_allhucs = np.zeros((19,ndates), dtype=np.float64)
    argsort_mean_allhucs[ihuc,:] = argsort_mean[:]
    argsort_max20_allhucs[ihuc,:] = argsort_max20[:]
    
    # ---- Develop an initial weighting estimate based on the precipitation
    #      value divided by the maximum precipitation value
    
    weighting_mean_allhucs[ihuc,:] = mean_precip[:] / mean_precip[argsort_mean[-1]]
    weighting_max20_allhucs[ihuc,:] = max20_precip[:] / max20_precip[argsort_max20[-1]]
    lon_max20_allhucs[ihuc,:] = max20_meanlon[:]
    lat_max20_allhucs[ihuc,:] = max20_meanlat[:]


# ---- develop a set of cases across US based on ***ensemble mean*** in each real HUC.
#      Process the full CONUS (stored in the last HUC index) later.

print ('******** beginning loop thru cases')
casedates = np.zeros((ndates), dtype=np.int32)
which_huc = np.zeros((ndates), dtype=np.int16)
meanlon = -99.99*np.ones((ndates), dtype=np.float32) # used to ID center lon when selected by max20
meanlat = -99.99*np.ones((ndates), dtype=np.float32) # used to ID center lat when selected by max20
casenum = 0
for icase in range(cases_for_each_huc_bymean):
    print ('processing cases by mean', icase, cases_for_each_huc_bymean)
    for ihuc in range(18): # 18, so excl. CONUS
        
        if huc_use[ihuc] == 1:

            # ---- find the date with the maximum weight for mean precip.  Adjust
            #      weights of nearby cases downward somewhat so that they are less
            #      likely to be chosen so that we don't choose too many cases
            #      that are clustered around a few chosen dates.
        
            weights_mean = weighting_mean_allhucs[ihuc,:]
            #print ('len(weights_mean) = ', len(weights_mean))
            indices = np.argsort(weights_mean)
        
            if indices[-1] != 0:
                iminus = indices[-1]-1
            else:
                iminus = indices[-1]
            if indices[-1] != ndates-1:
                iplus = indices[-1]+1
            else:
                iplus = indices[-1]
        
            if indices[-1] > 1:
                iminus2 = indices[-1]-2
            else:
                iminus2 = indices[-1]
            if indices[-1] < ndates-2:
                iplus2 = indices[-1]+2
            else:
                iplus2 = indices[-1]
        
            casedates[indices[-1]] = 1 # choose the date with max weight
            which_huc[indices[-1]] = ihuc+1
        
            if ihuc == 17: print ('casedate, huc, precip, wt = ',\
                yyyymmddhh[indices[-1]], which_huc[indices[-1]], \
                mean_precip[indices[-1]], weights_mean[indices[-1]])
            weighting_mean_allhucs[:,indices[-1]] = 0.0 
                # zero out this date's weight so not chosen again
            weighting_mean_allhucs[:,iminus] = \
                weighting_mean_allhucs[:,iminus]*0.7 #de-emph the surr. dates
            weighting_mean_allhucs[:,iplus] = \
                weighting_mean_allhucs[:,iplus]*0.7
            weighting_max20_allhucs[:,indices[-1]] = 0.0 
                # zero out this date's weight so not chosen again
            weighting_max20_allhucs[:,iminus] = \
                weighting_max20_allhucs[:,iminus]*0.7 #de-emph the surr. dates
            weighting_max20_allhucs[:,iplus] = \
                weighting_max20_allhucs[:,iplus]*0.7
            weighting_max20_allhucs[:,iminus2] = \
                weighting_max20_allhucs[:,iminus2]*0.85 #de-emph the surr. dates
            weighting_max20_allhucs[:,iplus2] = \
                weighting_max20_allhucs[:,iplus2]*0.85
            casenum = casenum + 1
    
# ---- develop a set of cases across US based on ***ensemble max20 *** in each real HUC.
#      Process the full CONUS (stored in the last HUC index) later.

print ('******** beginning loop thru cases')
for icase in range(cases_for_each_huc_bymax20):
    print ('processing cases by max20', icase, cases_for_each_huc_bymax20)
    for ihuc in range(18):
        
        if huc_use[ihuc] == 1:
            
            # ---- find the date with the maximum weight for max20 precip.  Adjust
            #      weights of nearby cases downward somewhat so that they are less
            #      likely to be chosen.
    
            weights = weighting_max20_allhucs[ihuc,:]
            indices = np.argsort(weights)
    
            if indices[-1] != 0:
                iminus = indices[-1]-1
            else:
                iminus = indices[-1]
            if indices[-1] != ndates-1:
                iplus = indices[-1]+1
            else:
                iplus = indices[-1]
    
            if indices[-1] > 1:
                iminus2 = indices[-1]-2
            else:
                iminus2 = indices[-1]
            if indices[-1] < ndates-2:
                iplus2 = indices[-1]+2
            else:
                iplus2 = indices[-1]
            
            casedates[indices[-1]] = 1
            which_huc[indices[-1]] = ihuc+1
            meanlon[indices[-1]] = lon_max20_allhucs[ihuc,indices[-1]]
            meanlat[indices[-1]] = lat_max20_allhucs[ihuc,indices[-1]]
        
            if ihuc == 17: print ('casedate, huc, wt = ',yyyymmddhh[indices[-1]], \
                which_huc[indices[-1]], max20_precip[indices[-1]], weights[indices[-1]])
            weighting_mean_allhucs[:,indices[-1]] = 0.0 
                # zero out this date's weight so not chosen again
            weighting_mean_allhucs[:,iminus] = \
                weighting_mean_allhucs[:,iminus]*0.7 #de-emph the surr. dates
            weighting_mean_allhucs[:,iplus] = \
                weighting_mean_allhucs[:,iplus]*0.7
            weighting_max20_allhucs[:,indices[-1]] = 0.0 
                # zero out this date's weight so not chosen again
            weighting_max20_allhucs[:,iminus] = \
                weighting_max20_allhucs[:,iminus]*0.7 #de-emph the surr. dates
            weighting_max20_allhucs[:,iplus] = \
                weighting_max20_allhucs[:,iplus]*0.7
            weighting_max20_allhucs[:,iminus2] = \
                weighting_max20_allhucs[:,iminus2]*0.85 #de-emph the surr. dates
            weighting_max20_allhucs[:,iplus2] = \
                weighting_max20_allhucs[:,iplus2]*0.85
            casenum = casenum + 1
        

# ---- use the remaining dates to select cases with CONUS-wide impact that
#      haven't been selected already.
    
print ('******** cases_for_conus',cases_for_conus)
for icase in range(cases_for_conus):
    
    # ---- find the date with the maximum weight for mean precip.  Adjust
    #      weights of nearby cases downward somewhat so that they are less
    #      likely to be chosen so that we don't choose too many cases
    #      that are clustered around a few chosen dates.
        
    weights_mean = weighting_mean_allhucs[18,:]
    indices = np.argsort(weights_mean)
    
    if indices[-1] != 0:
        iminus = indices[-1]-1
    else:
        iminus = indices[-1]
        
    if indices[-1] != ndates-1:
        iplus = indices[-1]+1
    else:
        iplus = indices[-1]
    
    if indices[-1] > 1:
        iminus2 = indices[-1]-2
    else:
        iminus2 = indices[-1]
    if indices[-1] < ndates-2:
        iplus2 = indices[-1]+2
    else:
        iplus2 = indices[-1]
    
    casedates[indices[-1]] = 1 # choose the date with max weight
    which_huc[indices[-1]] = 19
    print ('casedate, huc, precip, wt = ',yyyymmddhh[indices[-1]], \
        which_huc[indices[-1]], mean_precip[indices[-1]], weights_mean[indices[-1]])
    weighting_mean_allhucs[:,indices[-1]] = 0.0 # zero out this date's weight so not chosen again
    weighting_mean_allhucs[:,iminus] = weighting_mean_allhucs[:,iminus]*0.7 #de-emph the surr. dates
    weighting_mean_allhucs[:,iplus] = weighting_mean_allhucs[:,iplus]*0.7 
    weighting_mean_allhucs[:,iminus2] = weighting_mean_allhucs[:,iminus2]*0.85 #de-emph the surr. dates
    weighting_mean_allhucs[:,iplus2] = weighting_mean_allhucs[:,iplus2]*0.85     

# ---- save list of chosen cases.  Also save lon/lat of mean of max20 grid points

outfile = 'case_list_'+cseason+'season_roebber_ncases'+ctotal_ncases+cexclude+'.txt'
print ('writing case dates to ', outfile)
ouf = open(outfile,'w')
print ('casedates[0:30] = ', casedates[0:30])
print ('yyyymmddhh[0:30] = ', yyyymmddhh[0:30])
print ('which_huc[0:30] = ', which_huc[0:30])
for idate in range(ndates):
    if casedates[idate] == 1:
        print(yyyymmddhh[idate], which_huc[idate], meanlon[idate], meanlat[idate], file=ouf)
ouf.close()                                                                                                                                                                                                                                       ._demo_sfcdata.py                                                                                   000775  000765  000024  00000000412 14016263151 014161  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2        
                                      ATTR      
                             com.apple.lastuseddate#PS           com.macromates.selectionRange      	     com.macromates.visibleIndex  `    &w    27:270                                                                                                                                                                                                                                                      demo_sfcdata.py                                                                                     000775  000765  000024  00000014075 14016263151 013756  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         import numpy as np
import matplotlib.pyplot as plt
from matplotlib import rcParams
rcParams['legend.fontsize']='x-small'

idate = range(365) # Julian day number of simulation
bias = 2.0 # systematic error in the ECMWF system
phi_truth = 0.5 # autocorrelation of truth
phi_ecmwf = 0.7 # autocorrelation of ECMWF error
R = 0.15 # observation error variance, roughly 1/10th climatological 
random_ecmwf_mag = 0.5  # scaling factor for ECMWF random error
random_truth_mag = 0.7  # scaling factor for truth

# random error component of ECMWF synthetic analyses, N(0,1)
random_ecmwf = np.random.randn(365) 
# random error component of truth, N(0,1)
random_truth = np.random.randn(365) 
print ('random_truth[0:10] = ', random_truth[0:10])

truth = np.zeros((365), dtype=np.float32) # initialize vector to zero
# a climatological background, set to constant zero 
background = np.zeros((365), dtype=np.float32)  
# initialize a vector of autocorrelated ECMWF analysis error
autocorr_ecmwf = np.zeros((365), dtype=np.float32)

for i in range(1,365):
    # true state is AR1 time series
    truth[i] = phi_truth*truth[i-1] + random_truth[i]
    # autocorrelated ECMWF error, bias + random ecmwf error 
    #     + fraction of truth random component
    autocorr_ecmwf[i] = phi_ecmwf*autocorr_ecmwf[i-1] + \
        random_ecmwf_mag*random_ecmwf[i] + \
        random_truth_mag*random_truth[i] 
        
autocorr_ecmwf = autocorr_ecmwf + bias
# set the background error variance of the truth 
# relative to the constant (zero) background
B = np.var(truth)
# used to separate out the ECMWF error component that is time varying vs. bias
autocorr_mean_ecmwf = np.mean(autocorr_ecmwf) 
# simulate two observation time series, independent of each other
obs1 = np.sqrt(R)*np.random.randn(365) + truth
obs2 = np.sqrt(R)*np.random.randn(365) + truth

print ('np.var(truth) = ', B)
# assimilation of one observation
state_estimate_assim1 = np.zeros((365), dtype=np.float32)
state_estimate_assim2 = np.zeros((365), dtype=np.float32)

for i in range(365):
    # Kalman gain for first observation
    K1 = B / (B+R)
    # analysis error variance after assimilation of first observation
    Pa1 = B - K1*B
    # state estimate after assimilation of first observation
    state_estimate_assim1[i] = background[i] + K1*(obs1[i]-background[i])
    # Kalman gain for assimilation of second observation
    K2 = Pa1 / (Pa1+R)
    # state estimate after assimilation of second observation
    state_estimate_assim2[i] = state_estimate_assim1[i] + \
        K2*(obs2[i]-state_estimate_assim1[i])
    Pa2 = Pa1 - K2*Pa1
    
# ---- make 4-panel plot

fig = plt.figure(figsize=(6.5,9.0))

axloc = [0.11,0.83,0.85,0.14]
ax = fig.add_axes(axloc)
title = '(a) Truth, ECMWF 4D-Var, OI analyses'
ax.set_title(title, fontsize=11,color='Black')
ax.plot(idate,truth,'r-',lw=0.6,label='Truth')
ax.plot(idate,autocorr_ecmwf,'b-',lw=0.6,label='ECMWF 4D-Var')
ax.plot(idate,state_estimate_assim1,'g-',lw=0.6,label='OI, 1 obs')
ax.plot(idate,state_estimate_assim2,'-',color='Purple',lw=0.6,label='OI, 2 obs')
ax.set_ylabel('Temperature')
ax.legend(loc=0)
ax.set_xlim(0,365)
ax.set_ylim(-3,7)
ax.grid(True,lw=0.25,color='LightGray')

axloc = [0.11,0.63,0.85,0.14]
ax = fig.add_axes(axloc)
title = r'(b) $\delta_t^{\prime}$ relative to OI with 1 observation'
ax.set_title(title, fontsize=11,color='Black')
ax.plot(idate,truth,'r-',lw=0.6,label='Truth')
ax.plot(idate,autocorr_ecmwf-autocorr_mean_ecmwf,'b-',\
    lw=0.6,label='ECMWF 4D-Var random component')
ax.plot(idate,state_estimate_assim1,'g-',lw=0.6,label='OI, 1 obs')
ax.set_ylabel('Temperature')
for i in range(365):
    ax.plot([i,i],[autocorr_ecmwf[i]-autocorr_mean_ecmwf, \
        state_estimate_assim1[i]],color='Black',lw=1.3)
ax.legend(loc=0)
ax.set_xlim(0,365)
ax.set_ylim(-5,5)
ax.grid(True,lw=0.25,color='LightGray')

axloc = [0.11,0.43,0.85,0.14]
ax = fig.add_axes(axloc)
title = r'(c) $\delta_t^{\prime}$ relative to OI with 2 observations'
ax.set_title(title, fontsize=11,color='Black')
ax.plot(idate,truth,'r-',lw=0.6,label='Truth')
ax.plot(idate,autocorr_ecmwf-autocorr_mean_ecmwf,'b-',\
    lw=0.6,label=r'ECMWF 4D-Var random component')
ax.plot(idate,state_estimate_assim2,'-',color='Purple',\
    lw=0.6,label='OI, 2 obs')
ax.set_ylabel('Temperature')
for i in range(365):
    ax.plot([i,i],[autocorr_ecmwf[i]-autocorr_mean_ecmwf, \
        state_estimate_assim2[i]],color='Black',lw=1.3)
ax.legend(loc=0)
ax.set_xlim(0,365)
ax.set_ylim(-5,5)
ax.grid(True,lw=0.25,color='LightGray')

axloc = [0.11,0.23,0.85,0.14]
ax = fig.add_axes(axloc)
title = r'(d) OI error with 1 observation'
ax.set_title(title, fontsize=11,color='Black')
ax.plot(idate,truth,'r-',lw=0.6,label='Truth')
ax.plot(idate,state_estimate_assim1,'g-',lw=0.6,\
    label='OI, assimilate 1 obs')
ax.set_ylabel('Temperature')
for i in range(365):
    ax.plot([i,i],[truth[i], state_estimate_assim1[i]],\
        color='Black',lw=1.3)
ax.legend(loc=0)
ax.set_xlim(0,365)
ax.set_ylim(-5,5)
ax.grid(True,lw=0.25,color='LightGray')

axloc = [0.11,0.03,0.85,0.14]
ax = fig.add_axes(axloc)
title = r'(d) OI error with 2 observations'
ax.set_title(title, fontsize=11,color='Black')
ax.plot(idate,truth,'r-',lw=0.6,label='Truth')
ax.plot(idate,state_estimate_assim2,'-',color='Purple',\
    lw=0.6,label='OI, assimilate 2 obs')
for i in range(365):
    ax.plot([i,i],[truth[i], state_estimate_assim2[i]],\
        color='Black',lw=1.3)
ax.legend(loc=0)
ax.set_ylabel('Temperature')
ax.set_xlim(0,365)
ax.set_xlabel('Day number')
ax.set_ylim(-5,5)
ax.grid(True,lw=0.25,color='LightGray')

# ---- set plot title

plot_title = 'demo_sfcdata.png'
fig.savefig(plot_title,dpi=400)
print ('saving plot to file = ',plot_title)
print ('Done!')


print ('Var(truth - state_estimate_assim1) = ', \
    np.var(truth - state_estimate_assim1), Pa1)
print ('Var(truth - state_estimate_assim2) = ', \
    np.var(truth - state_estimate_assim2), Pa2)
print ('Var(autocorr_ecmwf -  state_estimate_assim1) = ', \
    np.var((autocorr_ecmwf-autocorr_mean_ecmwf) -  state_estimate_assim1))
print ('Var(autocorr_ecmwf -  state_estimate_assim2) = ', \
    np.var((autocorr_ecmwf-autocorr_mean_ecmwf) -  state_estimate_assim2))
        
    
                                                                                                                                                                                                                                                                                                                                                                                                                                                                       ._det_params.py                                                                                     000775  000765  000024  00000000414 14016263151 013671  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2                                              ATTR                                   com.apple.lastuseddate#PS           com.macromates.selectionRange      	     com.macromates.visibleIndex  `    w    66:14808                                                                                                                                                                                                                                                    det_params.py                                                                                       000775  000765  000024  00000007317 14016263151 013465  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         def det_params(ndates_valid, nlats, nlons, data_input):

    """ try various power transformations to determine which power transformed normal distribution
        best fits the empirical CDF 
    """

    import numpy as np
    import scipy
    import sys

    power = np.zeros((nlats,nlons), dtype=np.float64)
    mean = np.zeros((nlats,nlons), dtype=np.float64)
    stddev = np.zeros((nlats,nlons), dtype=np.float64)
    
    empirical_quantiles = np.zeros((19,nlats, nlons), dtype=np.float64)
    fitted_quantiles = np.zeros((19), dtype=np.float64)
    Dn_statistic = np.zeros((19,nlats, nlons), dtype=np.float64)
    rq = 0.05 + 0.05*np.arange(19)
    iquse = (rq*ndates_valid).astype(int)

    # ---- determine the empirical quantiles from the sample
    
    print ('determining empirical quantiles')
    for ilat in range(nlats):
        for ilon in range(nlons):
            sample = data_input[:,ilat,ilon]
            sample_sorted = np.sort(sample)
            for iq in range(19):
                empirical_quantiles[iq,ilat,ilon] = sample_sorted[iquse[iq]]
            if ilat == 0 and ilon == 0: 
                print ('empirical quantiles = ', empirical_quantiles[:,ilat,ilon] )
                print ('sample_sorted= ', sample_sorted[0:-1:10])
                
    # ---- determine which power transformed normal distribution provides the best fit to 
    #      the empirical data
    
    print ('testing powers')
    testpowers = [0.5,0.7,0.8,0.9,0.95,1.0,1.05,1.1,1.2,1.3,1.5]
    ntest = len(testpowers)
    Dnstat = np.zeros((ntest), dtype=np.float64)
    Dnsample = np.zeros((19), dtype=np.float64)
    CDF_fitted = np.zeros((19), dtype=np.float64)
    for ilat in range(nlats):
        print ('ilat = ', ilat)
        for ilon in range(nlons):
            for itest, testpower in enumerate(testpowers):
                sample = empirical_quantiles[:,ilat,ilon]
                #print ('sample = ', sample)
                sample_xform = np.where(sample >= 0.0,\
                    ( (sample+1.0)**testpower - 1.0) / testpower, \
                    - ( (-sample+1.0)**(2.0-testpower) - 1.0 ) / (2.0-testpower) )
                smean = np.mean(sample_xform)
                sstd = np.std(sample_xform)
                #if ilat == 0 and ilon == 0: 
                #    print ('ilat,ilon, testpower, smean, sstd = ', ilat,ilon, testpower, smean, sstd)
                sample_norm = (sample_xform - smean) / sstd
                #print ('sample_norm = ', sample_norm)
                for iq in range(19):
                    CDF_fitted[iq] = scipy.stats.norm.cdf(sample_norm[iq], loc=0., scale=1.)
                    Dnsample[iq] = np.abs(CDF_fitted[iq] - rq[iq])
                #print ('Dnsample = ', Dnsample)
                Dnstat[itest] = np.max(Dnsample)
                    
            #print ('Dnstat = ', Dnstat)
            idmin = np.argmin(Dnstat)
            #print ('Dn min, index = ', Dnstat[idmin], idmin)
            testpower = testpowers[idmin]
            sample = data_input[:,ilat,ilon]
            sample_xform = np.where(sample >= 0.0,\
                ( (sample+1.0)**testpower - 1.0) / testpower, \
                - ( (-sample+1.0)**(2.0-testpower) - 1.0 ) / (2.0-testpower) )
            smean = np.mean(sample_xform)
            sstd = np.std(sample_xform)
            if ilat == 0 and ilon == 0: 
                print ('testpower, smean, sstd = ', testpower, smean, sstd)
            power[ilat,ilon] = testpower
            mean[ilat,ilon] = smean
            stddev[ilat,ilon] = sstd
            #sys.exit()
    print ('max, min power = ', np.max(power), np.min(power))
    print ('max, min mean = ', np.max(mean), np.min(mean))
    print ('max, min stddev = ', np.max(stddev), np.min(stddev))
    

    return power, mean, stddev                                                                                                                                                                                                                                                                                                                 ._det_params_gaussmix.py                                                                            000775  000765  000024  00000000412 14016263151 015607  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2        
                                      ATTR      
                             com.apple.lastuseddate#PS           com.macromates.selectionRange      	     com.macromates.visibleIndex  `    x    25:750                                                                                                                                                                                                                                                      det_params_gaussmix.py                                                                              000775  000765  000024  00000003064 14016263151 015400  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         def det_params_gaussmix(ndates_valid, nlats, nlons, data_input):

    """ try to fit a Gaussian mixture model of two normal distributions to 
         the data at hand, and return the parameters.
    """

    import numpy as np
    import scipy
    import sys
    from sklearn import mixture

    weights = np.zeros((3,nlats,nlons), dtype=np.float64)
    means = np.zeros((3,nlats,nlons), dtype=np.float64)
    stddevs = np.zeros((3,nlats,nlons), dtype=np.float64)
    X = np.zeros((ndates_valid,1), dtype=np.float64)

    # ---- Fit 2 gaussian distribution mixture. Return parameters
    
    print ('determining 3-component Gaussian mixture')
    for ilat in range(nlats):
        print ('ilat = ', ilat)
        for ilon in range(nlons):
            X[:,0] = data_input[:,ilat,ilon]
            clf = mixture.GaussianMixture(n_components=3,\
                covariance_type='spherical',init_params='kmeans', n_init=5)
            #clf = mixture.GaussianMixture(n_components=3,\
            #    covariance_type='spherical',init_params='random',n_init=5)
            clf.fit(X)
            w = clf.weights_
            m = clf.means_
            s = np.sqrt(clf.covariances_)
            if ilat == nlats//2 and ilon == nlons//2:
                print ('overall mean, stddev ',np.mean(X), np.std(X))
                print ('weights = ', w[:])
                print ('means = ', m[:,0])
                print ('stdevs = ',s[:])
            weights[:,ilat,ilon] = w[:]
            means[:,ilat,ilon] = m[:,0]
            stddevs[:,ilat,ilon] = s[:]
    
    return weights, means, stddevs                                                                                                                                                                                                                                                                                                                                                                                                                                                                            ._determine_era5_climatology.py                                                                     000775  000765  000024  00000000413 14016263151 017044  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2                                              ATTR                                   com.apple.lastuseddate#PS           com.macromates.selectionRange      
     com.macromates.visibleIndex  `    <:y    102:220                                                                                                                                                                                                                                                     determine_era5_climatology.py                                                                       000775  000765  000024  00000011742 14016263151 016636  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         """
determine_era5_climatology.py

"""

import pygrib
from dateutils import daterange, dateshift, dayofyear, splitdate
import os, sys
from datetime import datetime
import _pickle as cPickle
import numpy as np
import numpy.ma as ma
import scipy.signal as signal
import scipy.stats as stats
from astropy.convolution import convolve
from scipy.interpolate import LSQUnivariateSpline, splrep, splev
import math

# --------------------------------------------------------------   

# ---- various initialization

cvariable = '2t'
cpath_era5 = '/Volumes/Backup Plus/ecmwf/'
#date_list_anal = daterange('2000010100','2018123100',24)
date_list_anal = daterange('2000010112','2018123112',24)
ndates = len(date_list_anal)
knots = [0.1*3.14159*2., 0.2*3.14159*2.,0.3*3.14159*2., 0.4*3.14159*2.,\
    0.5*3.14159*2.,0.6*3.14159*2,0.7*3.14159*2., 0.8*3.14159*2., 0.9*3.14159*2.]

# ---- loop through dates and process day by day

ndatektr = 0
ndatektr_yearly = np.zeros((18), dtype=np.int32)
radians_doy = np.zeros((ndates), dtype=np.float32)
for idate, date in enumerate(date_list_anal):
    
    # ---- read reanalysis appropriate at the time of this forecast for bias corr.
    
    rem = idate%30
    if rem == 0: print ('processing date = ', idate, date)
    cyear = date[0:4]
    cmm = date[4:6]
    cmmdd = date[4:8]
    imm = int(cmm)
    idd = int(date[6:8])
    iyear = int(cyear)-2000
    iyear_full = int(cyear)
    cyearf = date[0:4]
    julday = dayofyear(iyear_full, imm, idd)
    radians_doy[idate] = np.where(iyear_full%4 == 0,  \
        2.*math.pi*float(julday)/365., \
        2.*math.pi*float(julday)/364.) +\
        (iyear_full-2000)*0.00001 # extra term to aid in sorting
    
    infile = cpath_era5 +cyearf+'/t2m_era5_halfdegree_'+date+'.cPick'
    fexist1 = os.path.exists(infile)
    if fexist1 == True:
        inf = open(infile, 'rb')
        analysis = cPickle.load(inf) - 273.16
        analysis = np.flipud(analysis)
        if idate == 0: 
            lats = cPickle.load(inf)
            lons = cPickle.load(inf)
            nlats, nlons = np.shape(lats)
            lats = np.flipud(lats)
            analyses_3d = np.zeros((ndates,nlats,nlons), dtype=np.float64)
            climo_temps_estimated = np.zeros((365,nlats,nlons))
            climo_temps_stddev = np.zeros((365,nlats,nlons))
    analyses_3d[idate,:,:] = analysis[:,:]

x = np.arange(0.,2.*math.pi,2.*math.pi/365.)
print ('shape of x = ', len(x))
for jy in range(nlats):
    for ix in range(nlons):
        temps = analyses_3d[:,jy,ix]
        rads = radians_doy[:]
        indices = np.argsort(rads)
        temps_sorted = temps[indices]
        rads_sorted = rads[indices]
        spltemp = splrep(rads_sorted, temps_sorted, \
            xb=0., xe=2.*math.pi, k=3, task=-1, per=1, t=knots)
        climo_temps_estimated[:,jy,ix] = splev(x, spltemp)
        #if jy == nlats//2 and ix == nlons//2:
        #    print ('climo_temps sample = ',climo_temps_estimated[0:-1:5,jy,ix])
            
            
# ---- for every day and grid point, determine the standard deviation around
#      the estimated mean.

for idate, date in enumerate(date_list_anal):
    cyear = date[0:4]
    cmm = date[4:6]
    cmmdd = date[4:8]
    imm = int(cmm)
    idd = int(date[6:8])
    iyear = int(cyear)-2000
    iyear_full = int(cyear)
    cyearf = date[0:4]
    julday = dayofyear(iyear_full, imm, idd) - 1
    if julday > 364: julday = 364
    analyses_3d[idate,:,:] = analyses_3d[idate,:,:] - climo_temps_estimated[julday,:,:]

iuse = np.zeros(ndates, dtype=np.int32)
sumx = np.zeros((nlats, nlons), dtype = np.float64)
sumx2 = np.zeros((nlats, nlons), dtype = np.float64)
for iday in range(365):
    iuse = np.zeros((ndates,nlats,nlons), dtype=np.int32)
    for idate, date in enumerate(date_list_anal):
        cyear = date[0:4]
        cmm = date[4:6]
        cmmdd = date[4:8]
        imm = int(cmm)
        idd = int(date[6:8])
        iyear = int(cyear)-2000
        iyear_full = int(cyear)
        cyearf = date[0:4]
        julday = dayofyear(iyear_full, imm, idd) 
        if julday > 364: julday = 364
        imin = np.min([np.abs(iday - julday), np.abs(iday - julday + 365), np.abs(iday - julday -365)])
        if imin < 30:
            iuse[idate,:,:] = 1
        #print (np.abs(iday - julday), np.abs(iday - julday + 365), np.abs(iday - julday -365))
    #print (iday, date, julday, iuse[0:40,0,0], iuse[-40:-1,0,0])
    #sys.exit()

    sumx = np.sum(analyses_3d*iuse, axis=0) 
    sumx2 = np.sum(analyses_3d*analyses_3d*iuse, axis=0)
    nd = np.sum(iuse[:,0,0])
    climo_temps_stddev[iday,:,:] = np.sqrt((sumx2 - sumx**2/nd)/(nd-1))
    print (iday, climo_temps_stddev[iday,nlats//2,nlons//2])
     
# ---- save estimated 2000-2018 climatology to file

#outfile = cpath_era5 + 'ERA5_temperature_climatology_00UTC.cPick'
outfile = cpath_era5 + 'ERA5_temperature_climatology_12UTC.cPick'
print ('writing to ', outfile)
ouf = open(outfile,'wb')
cPickle.dump(climo_temps_estimated, ouf)
cPickle.dump(lats, ouf)
cPickle.dump(lons, ouf)
cPickle.dump(climo_temps_stddev, ouf)
ouf.close()
                                              ._determine_mean_soil_moisture.py                                                                   000775  000765  000024  00000000414 14016263151 017503  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2                                              ATTR                                   com.apple.lastuseddate#PS           com.macromates.selectionRange      	     com.macromates.visibleIndex  `    Ay    65:44720                                                                                                                                                                                                                                                    determine_mean_soil_moisture.py                                                                     000775  000765  000024  00000004761 14016263151 017277  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         from netCDF4 import Dataset
import numpy as np
from dateutils import daterange
import sys
import os
import os.path
from os import path
import numpy.ma as ma
import _pickle as cPickle

def find_nearest(vec, value):
    idx = np.abs(vec-value).argmin()
    return idx

# ---- commmand line inputs 

cstream = sys.argv[1]
clonlow = sys.argv[2]
clatlow = sys.argv[3]
clonhi = sys.argv[4]
clathi = sys.argv[5]
ctitle = sys.argv[6]
rlonlow = float(clonlow)
rlatlow = float(clatlow)
rlonhi = float(clonhi)
rlathi = float(clathi)


# --- read in sample lat/lon indices

infile = '/Volumes/Backup Plus/gefsv12/1999/bfg_2003123100_fhr00_control2.nc4'
nc = Dataset(infile)
lon = nc.variables['lon'][:]
lat = nc.variables['lat'][:]
nlons = len(lon)
nlats = len(lat)
nc.close()

# ---- determine the nearest grid index for box boundaries

imin = find_nearest(lon, rlonlow)
jmin = find_nearest(lat, rlatlow)
imax = find_nearest(lon, rlonhi)
jmax = find_nearest(lat, rlathi)

# ---- determine the date ranges to read in.

if cstream == '1999':
    date_list = daterange('2000010100','2003123100',24)
elif cstream == '2003':
    date_list = daterange('2004010100','2007123100',24)
elif cstream == '2007':
    date_list = daterange('2008010100','2011123100',24)
elif cstream == '2011':
    date_list = daterange('2012010100','2015123100',24)
elif cstream == '2015':
    date_list = daterange('2016010100','2019123100',24)
ndates = len(date_list)
swmean = np.zeros((ndates), dtype=np.float32)    
    
# ---- loop thru files and read in

for idate, date in enumerate(date_list):
    
    infile = '/Volumes/Backup Plus/gefsv12/'+cstream+'/bfg_'+date+'_fhr00_control2.nc4'
    print (infile)    
    does_soil_exist = path.exists(infile)
    if does_soil_exist == True:
        nc = Dataset(infile)
        sw = nc.variables['soilw10_40cmdow'][0,:,:]
        ls = nc.variables['landsfc'][0,:,:]
        nc.close()
        swmean[idate] = np.sum(sw[jmax:jmin,imin:imax]*ls[jmax:jmin,imin:imax]) / \
            np.sum(ls[jmax:jmin,imin:imax])
        print (idate, 'swmean = ', swmean[idate])
    else:  
        swmean[idate] = -99.99
        
        
swmean_masked = ma.masked_where(swmean <= 0.0, swmean)
date_list_vec = np.squeeze(np.asarray(date_list))

# ---- save to file.

outfile = 'gefsv12/'+ctitle+'_'+cstream+'_soilmoisture.dump'
print ('writing soil moisture time series to ',outfile)
swmean_masked.dump(outfile)

outfile = 'gefsv12/'+ctitle+'_'+cstream+'_datelist.dump'
print ('writing date_list time series to ',outfile)
date_list_vec.dump(outfile)
               ._determine_raw_forecast_climo.py                                                                   000775  000765  000024  00000000416 14016263151 017452  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2                                              ATTR                                   com.apple.lastuseddate#PS           com.macromates.selectionRange      
     com.macromates.visibleIndex  `    z    144:412786                                                                                                                                                                                                                                                  determine_raw_forecast_climo.py                                                                     000775  000765  000024  00000011717 14016263151 017243  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         """
determine_raw_forecast_climo.py

"""

import pygrib
from dateutils import daterange, dateshift, dayofyear, splitdate
import os, sys
from datetime import datetime
import _pickle as cPickle
import numpy as np
import numpy.ma as ma
import scipy.signal as signal
import scipy.stats as stats
from astropy.convolution import convolve
from scipy.interpolate import LSQUnivariateSpline, splrep, splev
import math

# --------------------------------------------------------------   

# ---- various initialization

clead = sys.argv[1]
cvariable = '2t'
#cpath_era5 = '/Volumes/Backup Plus/ecmwf/'
cpath_forecast = '/Volumes/Backup Plus/gefsv12/t2m/'
date_list_anal = daterange('2000010100','2018123100',24)
#date_list_anal = daterange('2000010112','2018123112',24)
ndates = len(date_list_anal)
knots = [0.1*3.14159*2., 0.2*3.14159*2.,0.3*3.14159*2., \
    0.4*3.14159*2.,0.5*3.14159*2.,0.6*3.14159*2,\
    0.7*3.14159*2., 0.8*3.14159*2., 0.9*3.14159*2.]

# ---- loop through dates and process day by day

ndatektr = 0
ndatektr_yearly = np.zeros((18), dtype=np.int32)
radians_doy = np.zeros((ndates), dtype=np.float32)
for idate, date in enumerate(date_list_anal):
    
    # ---- read reanalysis appropriate at the time of 
    #      this forecast for bias corr.
    
    rem = idate%30
    if rem == 0: print ('processing date = ', idate, date)
    cyear = date[0:4]
    cmm = date[4:6]
    cmmdd = date[4:8]
    imm = int(cmm)
    idd = int(date[6:8])
    iyear = int(cyear)-2000
    iyear_full = int(cyear)
    julday = dayofyear(iyear_full, imm, idd)
    radians_doy[idate] = np.where(iyear_full%4 == 0,  \
        2.*math.pi*float(julday)/365., \
        2.*math.pi*float(julday)/364.) +\
        (iyear_full-2000)*0.00001 # extra term to aid in sorting
    
    #infile = cpath_era5 +cyearf+'/t2m_era5_halfdegree_'+date+'.cPick'
    infile = cpath_forecast + cyear + '/'+date+'_lead'+\
        clead+'_conus_0.5deg_hour'+clead+'.cPick'
    fexist1 = os.path.exists(infile)
    #print (infile, fexist1)
    if fexist1 == True:
        inf = open(infile, 'rb')
        forecast = cPickle.load(inf) 
        if idate == 0: 
            nlats, nlons = np.shape(forecast)
            forecast_3d = ma.zeros((ndates,nlats,nlons), dtype=np.float64)
            climo_temps_estimated = np.zeros((365,nlats,nlons))
            climo_temps_stddev = np.zeros((365,nlats,nlons))
        forecast_3d[idate,:,:] = forecast[:,:]
    else:
        forecast_3d[idate,:,:].mask = True
        print ('missing data for idate, date = ',idate, date)
        
        
# ---- estimate mean temperature for each julian day with cubic spline

x = np.arange(0.,2.*math.pi,2.*math.pi/365.)
print ('shape of x = ', len(x))
for jy in range(nlats):
    for ix in range(nlons):
        temps = forecast_3d[:,jy,ix]
        rads = radians_doy[:]
        indices = np.argsort(rads)
        temps_sorted = temps[indices]
        rads_sorted = rads[indices]
        spltemp = splrep(rads_sorted, temps_sorted, \
            xb=0., xe=2.*math.pi, k=3, task=-1, per=1, t=knots)
        climo_temps_estimated[:,jy,ix] = splev(x, spltemp)            
            
# ---- for every Julian day and grid point, determine the 
#      standard deviation around the estimated mean.  Use +/- 30 days

for idate, date in enumerate(date_list_anal):
    cyear = date[0:4]
    cmm = date[4:6]
    cmmdd = date[4:8]
    imm = int(cmm)
    idd = int(date[6:8])
    iyear = int(cyear)-2000
    iyear_full = int(cyear)
    cyearf = date[0:4]
    julday = dayofyear(iyear_full, imm, idd) - 1
    if julday > 364: julday = 364
    forecast_3d[idate,:,:] = forecast_3d[idate,:,:] - \
        climo_temps_estimated[julday,:,:]

iuse = np.zeros(ndates, dtype=np.int32)
sumx = np.zeros((nlats, nlons), dtype = np.float64)
sumx2 = np.zeros((nlats, nlons), dtype = np.float64)
for iday in range(365):
    iuse = np.zeros((ndates,nlats,nlons), dtype=np.int32)
    for idate, date in enumerate(date_list_anal):
        cyear = date[0:4]
        cmm = date[4:6]
        cmmdd = date[4:8]
        imm = int(cmm)
        idd = int(date[6:8])
        iyear = int(cyear)-2000
        iyear_full = int(cyear)
        cyearf = date[0:4]
        julday = dayofyear(iyear_full, imm, idd) 
        if julday > 364: julday = 364
        imin = np.min([np.abs(iday - julday), \
            np.abs(iday - julday + 365), np.abs(iday - julday -365)])
        if imin < 30:
            iuse[idate,:,:] = 1

    sumx = np.sum(forecast_3d*iuse, axis=0) 
    sumx2 = np.sum(forecast_3d*forecast_3d*iuse, axis=0)
    nd = np.sum(iuse[:,0,0])
    climo_temps_stddev[iday,:,:] = np.sqrt((sumx2 - sumx**2/nd)/(nd-1))
    print (iday, climo_temps_stddev[iday,nlats//2,nlons//2])
     
# ---- save estimated 2000-2018 climatology to file

#outfile = cpath_era5 + 'ERA5_temperature_climatology_00UTC.cPick'
outfile = cpath_forecast + 'GEFSv12_temperature_climatology_lead='+\
    clead+'h.cPick'
print ('writing to ', outfile)
ouf = open(outfile,'wb')
cPickle.dump(climo_temps_estimated, ouf)
cPickle.dump(climo_temps_stddev, ouf)
ouf.close()
                                                                 ._diagnose_bias_statistics.py                                                                       000775  000765  000024  00000000407 14016263151 016615  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                             Mac OS X            	   2                                              ATTR                                   com.apple.lastuseddate#PS           com.macromates.selectionRange           com.macromates.visibleIndex  `    7	|    880                                                                                                                                                                                                                                                         diagnose_bias_statistics.py                                                                         000775  000765  000024  00000017116 14016263151 016405  0                                                                                                    ustar 00Tom                             staff                           000000  000000                                                                                                                                                                         """

"""
import pygrib
from dateutils import daterange, dateshift, dayofyear, splitdate
import os, sys
import numpy as np
import _pickle as cPickle
import matplotlib.pyplot as plt
from matplotlib import rcParams
from mpl_toolkits.basemap import Basemap, interp
from mpl_toolkits.axes_grid1 import make_axes_locatable

rcParams['xtick.labelsize']='medium'
rcParams['ytick.labelsize']='medium'
rcParams['legend.fontsize']='large'

# =====================================================================

def initialize(ilead, alpha):
    
    # ---- initialize various
    
    R = 0.25**2  # 1.0**2 # observation-error variance
    B = (0.1 + (ilead/96.))**2 # (0.5 + (ilead/48.))**2 # estimated forecast random-error variance, which grows with lead time
    Bbeta = np.zeros((5,5), dtype=np.float32) # simplified error covariance for seas'ly depdt coefficients
    Bbeta[0,0] = alpha *0.25 
    Bbeta[1,1] = alpha *0.25
    Bbeta[2,2] = alpha *0.25
    Bbeta[3,3] = alpha *0.25
    Bbeta[4,4] = alpha *0.25
    
    KF_betahat = np.zeros((5), dtype=np.float32)
   
    return R, B, Bbeta, KF_betahat

# =====================================================================

def cosfac_sinfac (date):
    
    # ---- compute cos, sin of Julian day/365.
    
    yyyy,mm,dd,hh = splitdate(date) 
    doy = dayofyear(yyyy,mm,dd)
    fracyear = doy/365.
    fac = 2.*3.14159*(np.real(doy)/365.)
    cosfac = np.cos(fac)
    sinfac = np.sin(fac)
    fac = 4.*3.14159*(np.real(doy)/365.)
    cos2fac = np.cos(fac)
    sin2fac = np.sin(fac)
    return cosfac, sinfac, cos2fac, sin2fac, fracyear
    
# =====================================================================

def decayavg_bias(alpha, obs, forecast, bias):
    
    # ---- compute the bog-standard decaying average bias correction estimate
       
    bias = (1-alpha)*bias + alpha*(forecast-obs)
    return bias

# =====================================================================

def seasonalKFbias(cosfac, sinfac, cos2fac, sin2fac, Bbeta, B, R, \
    KF_betahat, obsy, fcsty, biasy):
        
    # ---- estimate the Kalman gain for the bias correction.
    
    L = np.array([1.0, sinfac, cosfac, sin2fac, cos2fac])
    BbetaLT = np.matmul(Bbeta[:,:], np.transpose(L))
    LBbetaLT = np.matmul(L,BbetaLT)
    LBbetaLT_plus_B_plus_R = LBbetaLT + B + R
    LBbetaLT_plus_B_plus_R_inv = 1.0 / LBbetaLT_plus_B_plus_R
    Kfgain_beta = BbetaLT * LBbetaLT_plus_B_plus_R_inv
    print ('np.shape(Kfgain_beta) = ', np.shape(Kfgain_beta))
    print ('obsy, fcsty, biasy = ', obsy, fcsty, biasy)

    # ---- update bias estimate with new data

    for i in range(5):
        KF_betahat[i] = KF_betahat[i] - \
            Kfgain_beta[i]*(obsy - (fcsty - biasy))      
    biasy = L[0]*KF_betahat[0] + \
        L[1]*KF_betahat[1] + L[2]*KF_betahat[2] + \
        L[3]*KF_betahat[3] + L[4]*KF_betahat[4]
    return KF_betahat, biasy

# =====================================================================

clead = sys.argv[1]  # lead time, e.g., 12, 72, 120 (in hours)
clonb = sys.argv[2]
clatb = sys.argv[3]
clone = sys.argv[4]
clate = sys.argv[5]
rlonb = float(clonb)
rlatb = float(clatb)
rlone = float(clone)
rlate = float(clate)
alpha = 0.02
calpha = str(alpha)

ilead = int(clead)
datadir = '/Users/Tom/python/ecmwf/'
cvariable = '2t'
datestart = dateshift('2018110100',ilead)
date_list_anal = daterange(datestart,'2019123100',24)
ndates = len(date_list_anal)
date_list_fcst = []
for idate in range(ndates):
    date_list_fcst.append(dateshift(date_list_anal[idate],-ilead)) # initial times of fcst

forecast_box = np.zeros((ndates), dtype=np.float32) 
analysis_box = np.zeros((ndates), dtype=np.float32) 
bias_decayavg = np.zeros((ndates), dtype=np.float32)
bias_seasonalKF = np.zeros((ndates), dtype=np.float32)
frac2019 = np.zeros((ndates), dtype=np.float32)  
    
# ---- call initialization routine

R, B, Bbeta, KF_betahat = initialize(ilead, alpha)

for idate, datea in enumerate(date_list_anal):
    
    datef = date_list_fcst[idate]
    if datea == '2019010100': dstart = idate
    print ('------ processing analysis, forecast dates = ', datea, datef)

    # ---- read the ECMWF ERA5 reanalysis at this analysis date.
    
    infile = datadir + 't2m_era5_halfdegree_'+datea+'.cPick'
    inf = open(infile, 'rb')
    analysis = cPickle.load(inf)
    if idate == 0:
        lats = cPickle.load(inf)
        lons = cPickle.load(inf)
        nlats, nlons = np.shape(lats) 
        imin = np.argmin(np.abs(lons[0,:]-rlonb))
        imax = np.argmin(np.abs(lons[0,:]-rlone))
        jmin = np.argmin(np.abs(lats[:,0]-rlate))
        jmax = np.argmin(np.abs(lats[:,0]-rlatb))
        print ('imin, imax, jmin, jmax = ', imin, imax, jmin, jmax)
        #sys.exit()
    inf.close()
    
    # ---- read the ECMWF control forecast at this lead time and initial date
 
    infile = datadir + cvariable+'_'+datef+'_f'+clead+'.grib2'  
    grbfile = pygrib.open(infile) 
    grb = grbfile.select()[0] 
    forecast = grb.values
    grbfile.close()
    
    # ---- read the ERA5 analysis valid at this date.
    
    infilename = datadir+'t2m_era5_halfdegree_'+datea+'.cPick'
    inf = open(infilename, 'rb')
    obs = cPickle.load(inf)
    inf.close()    
    
    forecast_box[idate] = np.mean(forecast[jmin:jmax,imin:imax])
    analysis_box[idate] = np.mean(analysis[jmin:jmax,imin:imax])
    
    cosfac, sinfac, cos2fac, sin2fac, fracyear = cosfac_sinfac (datea)
    if int(datea[0:4]) < 2019:
        frac2019[idate] = fracyear-1.0
    else:
        frac2019[idate] = fracyear
    
    # ---- produce estimate of standard decaying-average bias correction

    bias = bias_decayavg[idate-1]
    print ('alpha, obs, analysis_box[idate], forecast_box[idate] = ', alpha, \
        analysis_box[idate], forecast_box[idate], bias)
    bias_decayavg[idate] = decayavg_bias(alpha, analysis_box[idate], \
        forecast_box[idate], bias)
    
    # ---- produce estimate of Kalman filter bias correction with seasonal variability.
    
    bias = bias_seasonalKF[idate-1]
    KF_betahat, bias_seasonalKF[idate] = seasonalKFbias(cosfac, sinfac, \
        cos2fac, sin2fac, Bbeta, B, R, KF_betahat, \
        analysis_box[idate], forecast_box[idate], bias)
    
# --- make plot    

print ('frac2019[0:] = ',frac2019[0:])
print ('frac2019[dstart:] = ',frac2019[dstart:])
    
fig = plt.figure(figsize=(9.,5.6))

axloc = [0.08,0.11,0.68,0.82]
a1 = fig.add_axes(axloc)
a1.set_title('Mean forecasts and observations, lead = '+clead+' h',fontsize=16)
a1.plot(frac2019[dstart:],forecast_box[dstart:]-analysis_box[dstart:],'.',color='Black',linewidth=0.3)
#a1.plot(np.arange(ndates)/365.,analysis_box,'.',color='Red',linewidth=0.3)
a1.plot(frac2019[dstart:],bias_decayavg[dstart:], 'k-',lw=2,label=r'Decaying average, $\alpha$='+calpha)
a1.plot(frac2019[dstart:],bias_seasonalKF[dstart:], 'r-',lw=2,\
    label='Kalman filter permitting seasonal\nand subseasonal bias dependence')

a1.plot([0,1],[0,0],'k-',lw=1)
a1.set_xlabel('Fraction of calendar year', fontsize=14)
a1.set_ylabel('Temperature bias (deg C)', fontsize=14)
a1.set_xlim(0,1)
a1.set_ylim(-3,3)
a1.grid (True,color='LightGray')
a1.legend(loc=0)
#crmse = '%.2f' %(rmse)
#crmse = 'RMSE = '+crmse
#a1.annotate(crmse,xy=(0.3,1.0))

axloc = [0.8,0.11,0.19,0.82]
a1 = fig.add_axes(axloc)
m = Basemap(llcrnrlon=rlonb,llcrnrlat=rlatb,urcrnrlo