# Numerical Methods for Structural Economic Models
# Spring 2020
# Zach Stangebye
# Lecture 5: Machine Learning Example

# We'll solve ]the Arellano (AER 2008) sovereign default model using GPR/dynamic programming
# Two states: Output and debt

# We solve for the stationary equilibrium that is the limit of the finite-horizon game

# Since the method is fully continuous, it easily works for the extension
# to long-maturity debt, which often encounters difficulty when working
# with discretized grids

# Though the method here is not much faster than traditional methods,
# it generalizes tractably to much larger state spaces no substantive change
# in the algorithm, as required memory and computation grows linearly rather than
# exponentially in the dimensionality (D)

# Machine learning optimization: on or off 
# If this is on, then Gaussian processes will be optimized in every functional approximation 
# This step is optional. Whether this fosters dynamic stability or not often depends on the problem. In this case, 
# it doesn't so we set it to false
const optimize_on = false 

# Model parameters
const r = .017
const β = 0.953

const σ_CRRA = 2.0

function sov_utility(c)
    u = 1.0
    if c <= 0.0
        u = -1.0e10
    else
        u = c^(1-σ_CRRA)/(1-σ_CRRA)
    end
    return u
end

const σy = .025 # Output shock volatility
const ρy = .945 # Output persistence
const σy_uncond = σy/sqrt(1-ρy^2)

const ythresh = 0.969 # Default cost parameter

const y_lev = -.18
const y_curv = .23
function ydef(y)

    # ydef = 1.0
    # if y <= ythresh
    #     ydef = y
    # else
    #     ydef = ythresh
    # end
    return min(y,y-y_lev*y-y_curv*y^2)
end

const π = .282 # Re-entry probability

# We add in a small amount of recovery so that all equilibrium objects can
# have the same domain dimensionality (2). Typically, default value only takes one function
const recov = .01  # Recovery rate (zero in Arellano [2008])

const D = 2 #dimensionality of the problem

# Bounds on y, b
const yL = exp(-3*σy_uncond)
const yH = exp(3*σy_uncond)

const bL = 0.0
const bH = 0.5

const bN_opt = 10
const b_opt_grid = bL:(bH-bL)/(bN_opt-1):bH

# ML parameters
const N_inputs = 40*D
# Scheidigger and Bilionis (2017) suggest 5*D or 10*D for N_inputs, but we
# find this to be insufficient given the significan non-linearities in these models
# However, the golden feature that the problem is linear in D remains
const N_validate = 10000

using Random
using StatsFuns
using LinearAlgebra
using Optim
using FastGaussQuadrature
using GaussianProcesses
using Plots
using LaTeXStrings
gr() # Sets the backend for the plotting interface (not used in solution)

# Take expectations with a Gauss-Chebyshev quadrature
# Given that there are significantly fewer grid points, it's important
# for this to be reasonably large for precise expectations
const quad_size = 30
const gc_points, gc_weights = gausschebyshev(quad_size)

# Minor adjustment to quadrature-based expectations to ensure pdf integrates to unity
const mwidth_cond = 3.0
const pdf_adj = normcdf(mwidth_cond)-normcdf(-mwidth_cond)

Random.seed!(13) # Set random seed so get same answer each time

eyeN1 = zeros(N_inputs,N_inputs)
for i=1:N_inputs
    eyeN1[i,i] = 1.0
end
const eyeN = copy(eyeN1)

const max_iters = 10000 # Cap in event of non-convergence
x_inputs_total = zeros(D,N_inputs,max_iters)

t_set_total = zeros(N_inputs,max_iters)
t_set_totalD = zeros(N_inputs,max_iters)
t_set_totalA = copy(t_set_total)

# Start at limit of finite-horizon game

mzero = MeanZero() # Used in the GPR (prior)
v_prior(x) = 0.0
Vrepay(x) = v_prior(x)
Vdefault(x) = v_prior(x) # No values of repayment/default after last period
bpol(x) = v_prior(x) # No borrowing in last period

def_fun(x) = 1.0 # Finite-horizon game; default for sure at end
q(x) = 0.0 # No lending can take place in last period

# Simulate the model (given current policy functions) and return the ergodic distribution
function simulation(sim_length::Int64)
    sim_y = zeros(sim_length,1)
    sim_b = zeros(sim_length,1)
    sim_access = ones(sim_length,1) # binary; 1 if country has access to credit markets

    sim_y[1] = 1.0 # start at steady state output
    sim_b[1] = 0.0 # start with no debt
    sim_access[1] = 1.0 # start with access to credit markets
    for t=1:sim_length-1

        if (sim_access[t] > 0.5) || ( (sim_access[t] < 0.5) && (rand() < π) )

            if def_fun([sim_y[t],sim_b[t]]) > 0.5
                sim_b[t+1] = recov*sim_b[t]
                sim_access[t+1] = 0.0
            else
                # Restrictions on max/min typically not binding in final solution,
                # but simulations done during solution process
                sim_b[t+1] = min( max( bpol([sim_y[t],sim_b[t]]), bL), bH)
                sim_access[t+1] = 1.0

                # Also, we reset market access here if need to
                if (sim_access[t] < 0.5)
                    sim_access[t] = 1.0
                end

            end

        else

            sim_b[t+1] = sim_b[t]
            sim_access[t+1] = 0.0

        end

        sim_y[t+1] = min( max( exp( ρy*log(sim_y[t]) + σy*norminvcdf(rand()) ), yL), yH)
        # Notice we never use this to compute consumption, so we don't apply default costs
    end

    return hcat(hcat(sim_y,sim_b),sim_access)
end

# Construct a histogram for the ergodic distribution that we can use to draw grid points
# or points for evaluation of convergence criterion
const yNbins = 50
const bNbins = 50

const y_hist_bnds = yL:(yH-yL)/yNbins:yH
const b_hist_bnds = bL:(bH-bL)/bNbins:bH
const y_hist_width = y_hist_bnds[2] - y_hist_bnds[1]
const b_hist_width = b_hist_bnds[2] - b_hist_bnds[1]

const hist_counter = ones(bNbins)
const y_hist_pnts = kron( (y_hist_bnds[1:end-1] .+ y_hist_bnds[2:end])./2.0 , hist_counter)
const b_hist_pnts = kron(hist_counter, (b_hist_bnds[1:end-1] .+ b_hist_bnds[2:end])./2.0 )

erg_cdf = zeros(yNbins*bNbins)
erg_pdf = copy(erg_cdf)

erg_hist = zeros(yNbins,bNbins)

# Initially draw from unconditional distribution of y
# Initially assume that distribution of b (endogenous) is uniform
i = 0
for iy=1:yNbins
    y_mass = normcdf(log(y_hist_bnds[iy+1])/σy_uncond) - normcdf(log(y_hist_bnds[iy])/σy_uncond)
    for ib=1:bNbins
        global i += 1
        erg_pdf[i] = y_mass
        erg_hist[iy,ib] = y_mass
    end
end
erg_pdf = erg_pdf/sum(erg_pdf)
erg_cdf[1] = erg_pdf[1]

for i=2:length(erg_cdf)
    erg_cdf[i] = erg_cdf[i-1] + erg_pdf[i]
end

erg_hist = erg_hist/sum(erg_hist)

# Draw from the ergodic distribution
function draw_from_erg(draw_size)

    erg_draw = zeros(D,draw_size)
    erg_ind = Array{Int64}(undef,D,draw_size)

    b_grid_draw = bL: (bH-bL)/(draw_size-1) : bH

    y_hist_ind = 1
    b_hist_ind = 1

    for ix=1:draw_size
        found_bin = false
        icount = 0
        rand_roll = rand()
        while ~found_bin
            if rand_roll < erg_cdf[icount+1]
                b_hist_ind = icount%bNbins+1
                y_hist_ind = trunc(Int,icount/yNbins)+1
                found_bin = true
            else
                icount += 1
            end
        end
        erg_ind[1,ix] = y_hist_ind
        erg_ind[2,ix] = b_hist_ind

        erg_draw[1,ix] = y_hist_bnds[y_hist_ind] + rand()*y_hist_width
        erg_draw[2,ix] = b_grid_draw[ix]
        # Every draw for a uniform is typical,
        # so to maximize dispersion we use a uniform grid
    end

    return erg_draw, erg_ind
end

const Rsmooth = 0.0 # Smoothing parameters (if necessary)
const Dsmooth = 0.0

# Convergence criterion
const ϵt = 0.0001 # typical set convergence for final ergodic
const ϵconv = 1.0e-6 # VFI convergence
ddist = 1.0 # Current distance metric (initialized > ϵconv)
ccounter = 0
min_iters = 0
new_draws = true # Tells us if we draw new gridpoints (training inputs); only done on first iteration
Vlast = Vrepay([1.0,.04])
sstart0 = time()
final_draw = false

while (ddist > ϵconv) && (ccounter < max_iters)

    sstart = time()
    global new_draws, Vlast
    global ccounter += 1


    # Start by generating input points
    if new_draws

        global erg_pdf = copy(erg_cdf)
        for i=2:length(erg_pdf)
            erg_pdf[i] = erg_cdf[i] - erg_cdf[i-1]
        end

        # We draw from the ergodic distribution until we reach a point in the typical set
        # We then use this as our grid

        # First, compute the entropy of the ergodic distribution
        global ergH = 0.0
        for i=1:length(erg_pdf)
            if erg_pdf[i] > 0.0
                global ergH -= erg_pdf[i]*log(erg_pdf[i])
            end
        end

        # First iteration; draw all points uniformly
        global x_inputs, x_inds = draw_from_erg(N_inputs)

        not_typical = true
        x_mass = zeros(N_inputs)
        while not_typical
            drawH = 0.0
            for i=1:N_inputs
                x_mass[i] = erg_hist[x_inds[1,i],x_inds[2,i]]
                drawH -= log(x_mass[i])/N_inputs
            end

            # println([drawH,ergH])
            if abs(drawH - ergH) < ϵt
                not_typical = false
            else
                global x_inputs, x_inds = draw_from_erg(N_inputs)
            end
        end
    end

    # Check for convergence by drawing random points from the current ergodic
    # distribution
    if ccounter > min_iters
        global conv_test_points, jjunk = draw_from_erg(N_validate)

        v_old_points = zeros(N_validate,1)
        for i=1:N_validate
            v_old_points[i] = Vrepay(conv_test_points[:,i])
        end
    end


    # Given we're consider the backward solution of a finite-horizon game,
    # we update the equilibrium pricing function first
    t_setq = zeros(N_inputs)

    for i=1:N_inputs
    # Threads.@threads for i=1:N_inputs

        ytod = x_inputs[1,i]
        btom = x_inputs[2,i]

        rep_fun(ytom) = (1.0 - def_fun([ytom, btom])*(1-π/(r+π)*recov))*( normpdf( (log(ytom) - ρy*log(ytod))/σy )/σy )/pdf_adj

        zL1 = ρy*log(ytod) - mwidth_cond*σy
        zH1 = ρy*log(ytod) + mwidth_cond*σy

        z_gc_points =  zL1 .+ ( 1.0 .+ gc_points)/2 .*(zH1-zL1)
        ER = (zH1-zL1)/2.0*sum( gc_weights.*( rep_fun.(exp.(z_gc_points)) ).*sqrt.(1.0 .- gc_points.^2) )

        t_setq[i] = ER/(1+r)
    end

    # Now update pricing functions using GPR
    # In contrast to Scheidigger and Bilionis (2017), who consider a different
    # set of macro models, we do not find substantial efficiency gains by trying
    # different starting points for this class of models.
    # It typically takes extra time and rarely improves convergence in this context

    kernLBq = [-5.0, -5.0, -5.0]
    kernUBq = [6.0, 6.0, 6.0]
    noiseLBq = -11.0
    noiseUBq = -6.0

    kernq = Mat52Ard([0.0,0.0],0.0)
    logObsNoiseq = -8.0

    global gpq = GP(x_inputs,t_setq,mzero,kernq,logObsNoiseq)

    if optimize_on
        optimize!(gpq;noisebounds=[noiseLBq,noiseUBq],kernbounds = [kernLBq,kernUBq])
    end

    global function q(x)
        xstack = hcat(x,[1.0,.01])
        new_predicts = predict_y(gpq,xstack)[1]
        return max(min(new_predicts[1],1.0/(1+r)),0.0)
    end

    # Next we update the repayment/default values and policy function
    t_set = zeros(N_inputs)
    t_setD = copy(t_set)
    t_setA = copy(t_set)

    for i=1:N_inputs
    # Threads.@threads for i=1:N_inputs
        ytod = x_inputs[1,i]
        btod = x_inputs[2,i]

        # First train the repayment value
        function neg_v_update(bprime)

            # # Quadrature method for expectation
            Vtom(ytom) = max( Vrepay([ytom, bprime]), Vdefault([ytom,bprime]) )*( normpdf( (log(ytom) - ρy*log(ytod))/σy )/σy )/pdf_adj

            zL1 = ρy*log(ytod) - mwidth_cond*σy
            zH1 = ρy*log(ytod) + mwidth_cond*σy

            z_gc_points =  zL1 .+ ( 1.0 .+ gc_points)/2 .*(zH1-zL1)
            CV = (zH1-zL1)/2.0*sum( gc_weights.*Vtom.(exp.(z_gc_points)).*sqrt.(1.0 .- gc_points.^2) )

            return -( sov_utility( ytod - btod + q([ytod,bprime])*bprime ) + β*CV )
        end
        # Do a simple global grid search first to search in the right place
        minval, minind = findmin( neg_v_update.(b_opt_grid) )
        if minind == 1
            minind = 2
        elseif minind == bN_opt
            minind = bN_opt-1
        end
        # Now minimize continuously but locally
        rresult = Optim.optimize(neg_v_update,b_opt_grid[minind-1],b_opt_grid[minind+1])

        t_setA[i] = rresult.minimizer
        t_set[i] = Rsmooth*Vrepay(x_inputs[:,i]) - (1.0-Rsmooth)*rresult.minimum


        # Now update the default value
        Vtomdef(ytom) = (π*max(Vrepay([ytom, recov*btod]),Vdefault([ytom,recov*btod])) + (1-π)*Vdefault([ytom,btod]) )*( normpdf( (log(ytom) - ρy*log(ytod))/σy )/σy )/pdf_adj

        zL1 = ρy*log(ytod) - mwidth_cond*σy
        zH1 = ρy*log(ytod) + mwidth_cond*σy

        z_gc_points =  zL1 .+ ( 1.0 .+ gc_points)/2 .*(zH1-zL1)
        CV = (zH1-zL1)/2.0*sum( gc_weights.*Vtomdef.(exp.(z_gc_points)).*sqrt.(1.0 .- gc_points.^2) )

        vdef_new = sov_utility( ydef(ytod) ) + β*CV

        t_setD[i] = Rsmooth*Vdefault(x_inputs[:,i]) + (1.0-Rsmooth)*vdef_new

    end
    t_set_total[:,ccounter] = copy(t_set)
    t_set_totalD[:,ccounter] = copy(t_setD)
    t_set_totalA[:,ccounter] = copy(t_setA)

    tot_time_bellman = time()-sstart
    sstart1 = time()

    # Now update value/policy functions using GPR

    kernLB = [-5.0, -5.0, -5.0]
    kernUB = [6.0, 6.0, 6.0]
    noiseLB = -11.0
    noiseUB = -6.0

    kernLBD = [-5.0, -5.0, -5.0]
    kernUBD = [10.0, 10.0, 10.0]
    noiseLBD = -11.0
    noiseUBD = -6.0

    kernLBA = [-5.0, -5.0, -5.0]
    kernUBA = [6.0, 6.0, 6.0]
    noiseLBA = -11.0
    noiseUBA = -6.0

    kern = Mat52Ard([0.0,0.0],0.0)
    logObsNoise = -8.0

    kernD = Mat52Ard([0.0,0.0],0.0)
    logObsNoiseD = -8.0

    kernA = Mat52Ard([0.0,0.0],0.0)
    logObsNoiseA = -8.0

    global gp = GP(x_inputs,t_set,mzero,kern,logObsNoise)
    global gpD = GP(x_inputs,t_setD,mzero,kernD,logObsNoiseD)
    global gpA = GP(x_inputs,t_setA,mzero,kernA,logObsNoiseA)

    if optimize_on
        optimize!(gp;noisebounds=[noiseLB,noiseUB],kernbounds = [kernLB,kernUB])
        optimize!(gpD;noisebounds=[noiseLBD,noiseUBD],kernbounds = [kernLBD,kernUBD])
        optimize!(gpA;noisebounds=[noiseLBA,noiseUBA],kernbounds = [kernLBA,kernUBA])
    end

    global function Vrepay(x)
        xstack = hcat(x,[1.0,.01])
        new_predicts = predict_y(gp,xstack)[1]
        return new_predicts[1]
    end

    global function Vdefault(x)
        xstack = hcat(x,[1.0,.01])
        new_predicts = predict_y(gpD,xstack)[1]
        return new_predicts[1]
    end

    global function bpol(x)
        xstack = hcat(x,[1.0,.01])
        new_predicts = predict_y(gpA,xstack)[1]
        return new_predicts[1]
    end

    global def_fun(x) = Vdefault(x) > Vrepay(x)

    tot_time_update_funcs = time()-sstart1
    sstart2 = time()

    # Check for convergence with same sample points as before
    if ccounter > min_iters
        v_new_points = copy(v_old_points)
        for i=1:N_validate
            v_new_points[i] = Vrepay(conv_test_points[:,i])
        end

        print_dist =  maximum(abs.(v_old_points .- v_new_points))/(maximum(v_new_points)-minimum(v_new_points))
        global ddist = print_dist
    else
        print_dist = ddist
    end

    # Now simulate to get the new ergodic distribution to draw from for convergence check or new training inputs
    if (ccounter > min_iters)

        # Now, we derive the ergodic distribution and sample from it
        current_simul = simulation(10000)

        erg_hist = zeros(yNbins,bNbins)
        # First, build the new histogram/cdf of the ergodic distribution
        for i=1:N_validate

            iy_ind = 1
            y_done = false
            while ~y_done
                if current_simul[i,1] <= y_hist_bnds[iy_ind+1]
                    y_done = true
                else
                    iy_ind += 1
                end
            end

            ib_ind = 1
            b_done = false
            while ~b_done
                if current_simul[i,2] <= b_hist_bnds[ib_ind+1]
                    b_done = true
                else
                    ib_ind += 1
                end
            end

            # Only include it in the distribution if it's a period of repayment
            if current_simul[i,3] > .5
                erg_hist[iy_ind,ib_ind] += 1.0
            end
        end
        global erg_hist = erg_hist/sum(erg_hist)

        for iy=1:yNbins
            for ib=1:bNbins
                if (iy == 1) && (ib == 1)
                    global erg_cdf[1] = erg_hist[1,1]
                else
                    global erg_cdf[(iy-1)*bNbins + ib] = erg_hist[iy,ib] + erg_cdf[(iy-1)*bNbins + ib - 1]
                end
            end
        end
        global erg_cdf = erg_cdf/erg_cdf[end]
    end

    tot_time_update_sims_and_q = time() - sstart2

    # Don't draw new training inputs after first iteration
    global new_draws = false

    println([ccounter,print_dist,Vrepay([1.0,.04]),tot_time_bellman,tot_time_update_funcs,tot_time_update_sims_and_q])
end

tot_time_vfi = time() - sstart0
# cd("/Users/zstangeb/Dropbox/AAA_CoursePrep/numerical_methods_class/lectures/machine_learning/code")

heatmap(b_hist_bnds[1:end-1],y_hist_bnds[1:end-1],erg_hist,c=cgrad([:white, :blue,:yellow, :red]),xlabel=L"\textbf{Debt}",ylabel=L"\textbf{Income}",
title=L"\textbf{Ergodic Distribution}")
scatter!(x_inputs[2,:],x_inputs[1,:],label=L"\textbf{Training Inputs}")

# savefig("erg_dist.pdf")

y_grid = yL:.001:yH
btest = 0.05
V_y_alone(y) = Vrepay([y, btest])
Vd_y_alone(y) = Vdefault([y,btest])
plot(y_grid,V_y_alone.(y_grid),title=L"\textbf{Value Function ( } B_t = .05 \textbf{ )}",ylabel=L"V_t",xlabel=L"y_{t}",
label=L"\textbf{Repay}",linewidth=3.0,linecolor=:green,legend=:topleft)
plot!(y_grid,Vd_y_alone.(y_grid),label=L"\textbf{Default}",linewidth=3.0,linecolor=:red)

# savefig("Value_functions_y.pdf")

b_grid = bL:.001:bH
V_b_alone(b) = Vrepay([1.0, b])
Vd_b_alone(b) = Vdefault([1.0,b])
plot(b_grid,V_b_alone.(b_grid),title=L"\textbf{Value Function (Steady State } y_t \textbf{ )}",ylabel=L"V_t",xlabel=L"B_{t}",
label=L"\textbf{Repay}",linewidth=3.0,linecolor=:green,legend=:bottomleft)
plot!(b_grid,Vd_b_alone.(b_grid),label=L"\textbf{Default}",linewidth=3.0,linecolor=:red)

# savefig("Value_functions_b.pdf")

b_grid = bL:.001:bH
a_b_alone_h(b) = bpol([1 + σy_uncond, b])
a_b_alone_l(b) = bpol([1 - σy_uncond, b])
plot(b_grid,a_b_alone_h.(b_grid),title=L"\textbf{Policy Function}",ylabel=L"B_{t+1}",xlabel=L"B_{t}",
label=L"y=1.0 + \sigma_{y,uncond}",linewidth=3.0,linecolor=:green,legend=:topleft)
plot!(b_grid,a_b_alone_l.(b_grid),label=L"y=1.0 - \sigma_{y,uncond}",linewidth=3.0,linecolor=:red)
plot!(b_grid,b_grid,label=L"\textbf{45-Degree Line}",linewidth=3.0,linecolor=:black,linestyle=:dash)

# savefig("policy_functions.pdf")

b_grid = bL:.001:bH
q_ss(b) = q([1.0, b])
q_l(b) = q([1 - σy_uncond, b])
q_h(b) = q([1 + σy_uncond, b])
plot(b_grid,q_h.(b_grid),title=L"\textbf{Pricing Function}",ylabel=L"q_t",xlabel=L"B_{t+1}",
label=L"y=1.0 + \sigma_{y,uncond}",linewidth=3.0,linecolor=:green)
plot!(b_grid,q_ss.(b_grid),label=L"y=1.0",linewidth=3.0,linecolor=:blue)
plot!(b_grid,q_l.(b_grid),label=L"y=1.0 - \sigma_{y,uncond}",linewidth=3.0,linecolor=:red)

# savefig("pricing_functions.pdf")

# Now we check for accuracy

Random.seed!(15) # Set random seed so get same answer each time

N_acc = 1000
acc_test_points, jjunk = draw_from_erg(N_acc)
v_interpolated = zeros(N_acc,1)
for i=1:N_acc
    v_interpolated[i] = Vrepay(acc_test_points[:,i])
end

cec_interpolated = ((1-β).*(1-σ_CRRA).*v_interpolated).^(1/(1-σ_CRRA))

v_exact = copy(v_interpolated)

for i=1:N_acc
    ytod = acc_test_points[1,i]
    btod = acc_test_points[2,i]

    # First train the repayment value
    function neg_v_update(bprime)

        # # Quadrature method for expectation
        Vtom(ytom) = max( Vrepay([ytom, bprime]), Vdefault([ytom,bprime]) )*( normpdf( (log(ytom) - ρy*log(ytod))/σy )/σy )/pdf_adj

        zL1 = ρy*log(ytod) - mwidth_cond*σy
        zH1 = ρy*log(ytod) + mwidth_cond*σy

        z_gc_points =  zL1 .+ ( 1.0 .+ gc_points)/2 .*(zH1-zL1)
        CV = (zH1-zL1)/2.0*sum( gc_weights.*Vtom.(exp.(z_gc_points)).*sqrt.(1.0 .- gc_points.^2) )

        return -( sov_utility( ytod - btod + q([ytod,bprime])*bprime ) + β*CV )
    end
    # Do a simple global grid search first to search in the right place
    minval, minind = findmin( neg_v_update.(b_opt_grid) )
    if minind == 1
        minind = 2
    elseif minind == bN_opt
        minind = bN_opt-1
    end
    # Now minimize continuously but locally
    rresult = Optim.optimize(neg_v_update,b_opt_grid[minind-1],b_opt_grid[minind+1])

    v_exact[i] = - rresult.minimum

end

cec_exact = ((1-β).*(1-σ_CRRA).*v_exact).^(1/(1-σ_CRRA))

avg_err = sum(log10.(abs.((cec_interpolated .- cec_exact)./cec_exact)))/N_acc
max_err = maximum(log10.(abs.((cec_interpolated .- cec_exact)./cec_exact)))