import numpy as np
import scipy as sp
import math
import os, sys
import matplotlib.pyplot as plt

from numpy import ma
from numpy.random import random_sample
from scipy import stats
from scipy.stats import gamma
from scipy.special import loggamma
from scipy.interpolate import interp1d
from scipy.optimize import minimize


####  Simulate from a 2-component gamma distribution  ###
#
w1 = 0.4
w2 = 0.6
shape1 = 0.2
shape2 = 0.6
scale1 = 3.5
scale2 = 2.2

n = 1000

cmp = np.random.binomial(1,w2,size=n)
n1 = np.sum(cmp==0)
n2 = np.sum(cmp==1)
x = np.zeros(n, dtype=np.float32)
x[cmp==0] = np.random.gamma(shape1,scale1,n1)
x[cmp==1] = np.random.gamma(shape2,scale2,n2)

#
##########################################################


## Fit single component model

pmean = np.mean(x)
lnxbar = np.log(pmean)
meanlnxi = np.mean(np.log(x))
D = lnxbar - meanlnxi
alpha0 = (1.0 + np.sqrt(1.0+4.0*D/3.0)) / (4.0*D)
invbeta0 = pmean / alpha0


## Fit single component only to upper 20% data, ignoring the censoring issue

c = np.percentile(x,80)

pmeanC1 = np.mean(x[x>c])
lnxbarC1 = np.log(pmeanC1)
meanlnxiC1 = np.mean(np.log(x[x>c]))
D = lnxbarC1 - meanlnxiC1
alphaC1 = (1.0 + np.sqrt(1.0+4.0*D/3.0)) / (4.0*D)
invbetaC1 = pmeanC1 / alphaC1


## Fit single component only to upper 20% data using censored maximum likelihood

def loglik(par,x_ucs,c,n_usc,n_cs):     # censored, negative log-likelihood function
    T1 = -n_cs * np.log(gamma.cdf(c,par[0],scale=par[1]))
    T2 = np.sum(x_ucs)/par[1] - (par[0]-1)*np.sum(np.log(x_ucs)) + n_ucs*(loggamma(par[0])+par[0]*np.log(par[1]))
    return T1 + T2

bnds = ((0.001,1.5), (0.001,3.0*invbeta0))
pst = [alpha0,invbeta0]                       # start with the parameters fitted to the full data set

x_ucs = x[x>c]
n_ucs = len(x_ucs)
n_cs = len(x) - n_ucs

par_opt = minimize(loglik, pst, args=(x_ucs,c,n_ucs,n_cs), method='L-BFGS-B', bounds=bnds, tol=1e-6).x

alphaC2 = par_opt[0]
invbetaC2 = par_opt[1]

##  Make some plots

prob = np.arange(1,n+1)/(n+1)
xgrd = np.arange(0,15,0.1)

# Plot empirical and fitted CDFs
plt.figure()
plt.scatter(np.sort(x), prob)
plt.plot(xgrd, gamma.cdf(xgrd,alpha0,scale=invbeta0), c='m',label='Single Gamma')
plt.plot(xgrd, gamma.cdf(xgrd,alphaC1,scale=invbetaC1), c='r',label='Single to upper 20%, ignore censor')
plt.plot(xgrd, gamma.cdf(xgrd,alphaC2,scale=invbetaC2), c='b',label='Single to upper 20%, censor')
plt.legend(loc=0)
plt.show()


# Q-Q Plots
plt.figure()
plt.plot(xgrd, xgrd, c='k')
plt.scatter(gamma.ppf(prob,alpha0,scale=invbeta0), np.sort(x), c='m',label='Single Gamma')
plt.scatter(gamma.ppf(prob,alphaC1,scale=invbetaC1), np.sort(x), c='r',label='Single to upper 20%, ignore censor')
plt.scatter(gamma.ppf(prob,alphaC2,scale=invbetaC2), np.sort(x), c='b',label='Single to upper 20%, censor')
plt.legend(loc=0)
plt.show()