""" python mswep_to_netCDF.py cmonth For chosen month (01 to 12), extract 6-hourly files of MSWEP data, interpolate to CONUS NDFD grid and save to a new netCDF file. Do this for all 6-hourly data for the chosen month and for years 2002-2019. MSWEP data used in this procedure was v260, downloaded with instructions from gloh2o.org. They have since upgraded to v280, which Hylke Beck says has more light precipitation. The v280 data isn't used here. Dateutils is a Jeff Whitaker library for handling dates in yyyymmddhh format Tom Hamill """ import os, sys from datetime import datetime import numpy as np import numpy.ma as ma import _pickle as cPickle from netCDF4 import Dataset import scipy.stats as stats import pygrib from netCDF4 import Dataset from dateutils import hrs_since_day1CE_todate, \ dateto_hrs_since_day1CE, hrstodate, datetohrs, dateshift from mpl_toolkits.basemap import Basemap, interp # ---- get the month and end time from the commmand line. The first 00 # hour analysis of the month will need to access the data from # the previous month. cmonth = sys.argv[1] # 01 etc imonth = int(cmonth) - 1 # ---- define where the output data should be written mswep_output_directory = '/Volumes/Backup Plus/mswep/' # ---- the very first hour of the first day of the month # with MSWEP is stored in the previous month. Define # that. if cmonth == '01': cmonth_before = '12' else: imonth_before = int(cmonth)-1 if imonth_before < 10: cmonth_before = '0'+str(imonth_before) else: cmonth_before = str(imonth_before) daysomo = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31] daysomo_leap = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31] day1900 = datetime(1900,1,1,0) hours1900 = dateto_hrs_since_day1CE(day1900, mixedcal=True) # ---- get the lat/lons of the output NDFD CONUS grid by reading in # a sample file with the data. These should be oriented S to N as # the Basemap interp requires. Check for that and flip if # necessary. infile = 'ccpa.t00z.06h.ndgd2p5.conus.gb2' print (infile) flatlon = pygrib.open(infile) fcst = flatlon.select()[0] lats_ndfd, lons_ndfd = fcst.latlons() if lats_ndfd[0,0] > lats_ndfd[-1,0]: flipud = True else: flipud = False if flipud == True: lats_ndfd = np.flipud(lats_ndfd) lons_ndfd = np.flipud(lons_ndfd) nlats_ndfd, nlons_ndfd = np.shape(lons_ndfd) flatlon.close() print ('min, max lons_ndfd = ', np.min(lons_ndfd), \ np.max(lons_ndfd)) zeros = np.zeros((nlats_ndfd, nlons_ndfd), dtype=np.float32) ones = np.ones((nlats_ndfd, nlons_ndfd), dtype=np.float32) # --- read in the land-water mask. Also flip if necessary. infile = 'ndfd_terrain_landwater.grib2.gb2' print (infile) flatlon = pygrib.open(infile) fcst = flatlon.select()[0] landmask = fcst.values landmask = np.where(landmask > 1.0, ones, zeros) lats_landmask, lons_landmask = fcst.latlons() if lats_landmask[0,0] > lats_landmask[-1,0]: flipud = True else: flipud = False if flipud == True: lats_landmask = np.flipud(lats_landmask) lons_landmask = np.flipud(lons_landmask) nlats_landmask, nlons_landmask = np.shape(lons_landmask) flatlon.close() # --- read in the valid CCPA mask. Note that this contains # Gulf and Atlantic points we don't want to use. # These will be filtered out later. infile = 'various_nbm_plus_mask.nc' nc = Dataset(infile) lats_ccpa = nc.variables['latitude'][:,:] lons_ccpa = nc.variables['longitude'][:,:] validmask_ccpa = nc.variables['validmask_ccpa'][:,:] if lats_ccpa[0,0] > lats_ccpa[-1,0]: validmask_ccpa = np.flipud(validmask_ccpa) nc.close() # ---- make the final mask as landmask*validmask_ccpa. # Through this, we will default to using the # alternative MSWEP analysis at all water points. finalmask = validmask_ccpa*landmask # ---- read in a sample mswep lat/lon to see if need to flip # so that Basemap interp works. infile = mswep_directory + '200001_on_ndfd_grid_6hourly.nc' nc = Dataset(infile) lats_mswep = nc.variables['lats'][:,:] if lats_mswep[0,0] > lats_mswep[-1,0]: flipud_mswep = True else: flipud_mswep = False nc.close() mninenine = -99.99*np.ones((nlats_ndfd, nlons_ndfd), dtype=np.float64) # ---- process all years for this month for iyear in range(2002,2020): cyear = str(iyear) print ('****** processing year = ', iyear) # ---- determine the days of the month if iyear%4 == 0: ndays = daysomo_leap[imonth] else: ndays = daysomo[imonth] # ---- open netCDF output file and deal with all the variable definition and # such. outfile = '/Volumes/NBM/conus_panal/'+cyear+cmonth+'_mswep_on_ndfd_grid_6hourly.nc' print (' writing to ',outfile) ncout = Dataset(outfile,'w',format='NETCDF4_CLASSIC') xf = ncout.createDimension('xf',nlons_ndfd) xvf = ncout.createVariable('xf','f4',('xf',)) xvf.long_name = "eastward grid point number on NDFD grid" xvf.units = "n/a" yf = ncout.createDimension('yf',nlats_ndfd) yvf = ncout.createVariable('yf','f4',('yf',)) yvf.long_name = "northward grid point number on 1/4-degree lat-lon grid" yvf.units = "n/a" time = ncout.createDimension('time',None) timev = ncout.createVariable('time','f4',('time',)) timev.units = "index to time dimension, that's all" lonsa = ncout.createVariable('lons','f4',('yf','xf',)) lonsa.long_name = "longitude" lonsa.units = "degrees_east" latsa = ncout.createVariable('lats','f4',('yf','xf',)) latsa.long_name = "latitude" latsa.units = "degrees_north" conusmask = ncout.createVariable('conusmask','i4',('yf','xf',)) latsa.long_name = "mask (1=land, 0=water)" latsa.units = "none" yyyymmddhh_begin = ncout.createVariable('yyyymmddhh_begin','i4',('time',)) yyyymmddhh_begin.longname = \ "Precip accumulation period beginning in yyyymmddhh format" yyyymmddhh_end = ncout.createVariable('yyyymmddhh_end','i4',('time',)) yyyymmddhh_end.longname = \ "Precip accumulation period ending in yyyymmddhh format" # --- declare the single-level variable information on lat-lon grid apcp_anal = ncout.createVariable('apcp_anal','f4',('time','yf','xf',), zlib=True,least_significant_digit=4) apcp_anal.units = "mm" apcp_anal.long_name = \ "Interpolated 6-h accumulated MSWEP analysis on CONUS NDFD grid" apcp_anal.valid_range = [0.,1000.] apcp_anal.missing_value = np.array(-9999.99,dtype=np.float32) # ---- initialize xvf[:] = np.arange(nlons_ndfd) yvf[:] = np.arange(nlats_ndfd) lonsa[:] = lons_ndfd[:,:] latsa[:] = lats_ndfd[:,:] conusmask[:] = finalmask[:,:] # ---- metadata ncout.title = "NDFD CONUS domain, 6 hourly accum." ncout.history = " " ncout.institution = "NCEP/EMC" ncout.platform = "Precipitation analysis" ncout.references = "DOI: 10.1175/JHM-D-11-0140.1" # ---- loop thru all dates, read reforecasts, and munge them into netCDF... ktr = 0 for iday in range(1,ndays+1): infile2 = '' if iday < 10: cday = '0'+str(iday) else: cday = str(iday) cyyyymmdd = cyear + cmonth + cday for chour in ['00','06','12','18',]: precip_final = np.zeros((nlats_ndfd, nlons_ndfd), dtype=np.float64) ihour = int(chour) if ihour == 0: chour_begin = '18' chour_end = '00' elif ihour == 6: chour_begin = '00' chour_end = '06' elif ihour == 12: chour_begin = '06' chour_end = '12' elif ihour == 18: chour_begin = '12' chour_end = '18' cyyyymmddhh_end = cyear + cmonth + cday + chour cyyyymmddhh_begin = dateshift(cyyyymmddhh_end, -6) iyyyymmddhh = int(cyyyymmddhh_end) # ---- read the MSWEP data if iday == 1 and chour_end == '00': # the 18-00 UTC data on the first day of month should be stored with # the data from the previous month. For example, if # iyyyymmddhh == 2002010100, the data period is 2001123118 to # 2002010100 and it'd be stored in the Dec 2001 netCDF file if cmonth == '01': cyear_before = str(int(cyear)-1) infile2 = mswep_directory + cyear_before + \ cmonth_before + '_on_ndfd_grid_6hourly.nc' else: infile2 = mswep_directory + cyear + \ cmonth_before + '_on_ndfd_grid_6hourly.nc' else: infile2 = mswep_directory + cyear + cmonth + '_on_ndfd_grid_6hourly.nc' nc = Dataset(infile2) print (infile2) yyyymmddhh_end_in = nc.variables['yyyymmddhh_end'][:] idx = int(np.where(yyyymmddhh_end_in == iyyyymmddhh)[0]) precip_mswep = nc.variables['apcp_anal'][idx,:,:].astype('d') if flipud_mswep == True: precip_mswep = np.flipud(precip_mswep) nc.close() # ---- write record to file. yyyymmddhh_begin[ktr] = int(cyyyymmddhh_begin) yyyymmddhh_end[ktr] = int(cyyyymmddhh_end) apcp_anal[ktr] = precip_mswep[:,:] ktr = ktr + 1 ncout.close() print ('writing to ', outfile, ' completed.')