"""
using ECMWF forecast and ERA5 verification data, implement Kalman-filter
type regression bias correction approach to estimate forecast bias.   Grid covers
CONUS, approximately.  Coded by Tom Hamill, May-Jun 2020.
tom.hamill@noaa.gov
"""

import pygrib
from dateutils import daterange, dateshift, dayofyear, splitdate
import os, sys
import numpy as np
import numpy.ma as ma
import _pickle as cPickle
from os import path
import statsmodels.api as sm
    
# =====================================================================

def KF_linear_regres(Bbeta, B, R, KF_betahat, \
    obs, fcst, bias_estimate):

    # ---- estimate the Kalman gain for the bias correction.

    L = np.array([1.0, ])
    BbetaLT = np.matmul(Bbeta[:,:], np.transpose(L))
    LBbetaLT = np.matmul(L,BbetaLT)
    LBbetaLT_plus_B_plus_R = LBbetaLT + B + R
    LBbetaLT_plus_B_plus_R_inv = 1.0 / LBbetaLT_plus_B_plus_R
    Kfgain_beta = BbetaLT * LBbetaLT_plus_B_plus_R_inv

    # ---- update bias estimate with new data

    for i in range(3):
        KF_betahat[i,:,:] = KF_betahat[i,:,:] - \
            Kfgain_beta[i]*(obs[:,:] - (forecast[:,:] - bias_estimate[:,:]))
    bias_estimate = L[0]*KF_betahat[0,:,:] + \
        L[1]*KF_betahat[1,:,:] + L[2]*KF_betahat[2,:,:] + \
        L[3]*KF_betahat[3,:,:] + L[4]*KF_betahat[4,:,:]
    return KF_betahat, bias_estimate


# =====================================================================

clead = sys.argv[1]  # lead time, e.g., 12, 72, 120 (in hours)
ilead = int(clead)
ndstart = ilead // 24
datadir_reanl = '/Users/Tom/python/ecmwf/'
datadir = '/Users/Tom/python/ecmwf/'

cvariable = '2t'
tally_statistics = False
#datestart = dateshift('2018110100',ilead)
dateend = dateshift('2018123100',-ilead)
date_list_anal = daterange('2018010100',dateend,24) # initial time of the current forecast
ndates = len(date_list_anal)
date_list_fcst = []
for idate in range(ndates):
    date_list_fcst.append(dateshift(date_list_anal[idate],ilead)) # initial times of fcst

# ---- read in the time-dependent ERA5 climatology of t2m

infilename = 'ecmwf/t2m_climo_daily_era5_halfdegree.cPick'
print (infilename)
inf = open(infilename, 'rb')
climo_yearly = cPickle.load(inf)
print ('shape climo_yearly = ', np.shape(climo_yearly))
latsa_halfdegree = cPickle.load(inf)
lonsa_halfdegree = cPickle.load(inf)
inf.close()

# ---- loop over dates and update bias estimates

for idate, datea in enumerate(date_list_anal):
    
    datef = date_list_fcst[idate]
    #print ('------ processing analysis, forecast dates = ', datea, datef)
    if int(datea) >= 2019010100: tally_statistics = True

    # ---- read the ECMWF ERA5 reanalysis at valid at the forecast date.
    
    infile = datadir_reanl + 't2m_era5_halfdegree_'+datef+'.cPick'
    #print (infile)
    inf = open(infile, 'rb')
    analysis = cPickle.load(inf)
    if idate == 0:
        lats = cPickle.load(inf)
        lons = cPickle.load(inf)
        nlats, nlons = np.shape(lats)
        print (nlats, nlons)
        analyzed_yearly = ma.zeros((ndates,nlats,nlons), dtype=np.float32) 
        forecast_yearly = ma.zeros((ndates,nlats,nlons), dtype=np.float32)    
    inf.close()
    analyzed_yearly[idate,:,:] = analysis[:,:] 
    
    # ---- read the control forecast at this lead time and initial date

 
    infile = datadir + cvariable+'_'+datea+'_f'+clead+'.grib2'  
    fexist = path.exists(infile)
    print (infile, fexist)
    if fexist == True:
        #print (infile)
        grbfile = pygrib.open(infile) 
        grb = grbfile.select()[0] 
        forecast = grb.values
        grbfile.close()
        forecast_yearly[idate,:,:] = forecast[:,:] 
    else:
        forecast_yearly[idate,:,:] = ma.masked
    
# ---- for each grid point, first produce a linear regression in order 
#      to get a sense of the standard error of the regression coefficients.
    
forecast_deviation = forecast_yearly - climo_yearly[ndstart:ndstart+ndates,:,:]
analyzed_deviation = analyzed_yearly - climo_yearly[ndstart:ndstart+ndates,:,:]

var_const = np.zeros((nlats,nlons), dtype = np.float32)
var_slope = np.zeros((nlats,nlons), dtype = np.float32)
cov_const_slope = np.zeros((nlats,nlons), dtype = np.float32)
regr_const = np.zeros((nlats,nlons), dtype = np.float32)
regr_slope = np.zeros((nlats,nlons), dtype = np.float32)

for ix in range(nlons):
    for jy in range(nlats):
        a = analyzed_deviation[:,jy,ix]
        f = forecast_deviation[:,jy,ix]
        x = ma.zeros((ndates,2), dtype=np.float32)
        x[:,0] = 1.0
        x[:,1] = f[:]
        model = sm.OLS(a, x, hasconst=True)
        results = model.fit()
        params = results.params
        cov = results.cov_params()
        var_const[jy,ix] = cov[0,0]
        var_slope[jy,ix] = cov[1,1]
        cov_const_slope[jy,ix] = cov[1,0]
        regr_const[jy,ix] = params[0]
        regr_slope[jy,ix] = params[1]


print ('mean slope, intercept = ', np.mean(regr_slope), np.mean(regr_const))
print ('std slope, intercept = ', np.std(regr_slope), np.std(regr_const))
ind = np.unravel_index(np.argmin(regr_const, axis=None), regr_const.shape)
print ('ind = ', ind)
print ('regr_const at min = ', regr_const[ind])

        
outfile = 'covariance_regression_coefficients_lead='+clead+'.cPick'
ouf = open(outfile, 'wb')
cPickle.dump(var_const, ouf)
cPickle.dump(var_slope, ouf)
cPickle.dump(cov_const_slope, ouf)
ouf.close()

print ('mean const, slope = ', np.mean(regr_const), np.mean(regr_slope))
print ('variance of const, slope = ', np.mean(var_const), np.mean(var_slope))
        
#KF_betahat, bias_seasonalKF = seasonalKFbias(cosfac, sinfac, \
#    cos2fac, sin2fac, Bbeta, B, R, KF_betahat, obs, \
#    forecast, bias_seasonalKF)