4. Evaluating a Policy

Testing a policy across many uncertain futures

A single simulation tells us what happens under one possible future. But we’re uncertain about the future! To evaluate a policy properly, we need to test it across many possible futures.

Setup

using SimOptDecisions
using Distributions
using Random
using CairoMakie
using Statistics

# === All type definitions (self-contained) ===

Base.@kwdef struct HouseElevationConfig{T<:AbstractFloat} <: AbstractConfig
    horizon::Int = 70
    gauge_height_above_ref::T = 0.0
    house_height_above_ref::T = 4.0
    house_area_ft2::T = 1500.0
    house_value::T = 200_000.0
end

struct HouseElevationSOW{T<:AbstractFloat} <: AbstractSOW
    gev_μ::T
    gev_σ::T
    gev_ξ::T
    dd_threshold::T
    dd_saturation::T
    discount_rate::T
end

function sample_sow(rng::AbstractRNG)
    gev_μ = rand(rng, Normal(2.8, 0.3))
    gev_σ = rand(rng, truncated(Normal(1.0, 0.15); lower=0.3))
    gev_ξ = rand(rng, truncated(Normal(0.15, 0.05); lower=-0.2, upper=0.5))
    dd_threshold = rand(rng, Normal(0.0, 0.25))
    dd_saturation = rand(rng, Normal(8.0, 0.5))
    discount_rate = rand(rng, truncated(Normal(0.03, 0.015); lower=0.01, upper=0.07))
    return HouseElevationSOW(gev_μ, gev_σ, gev_ξ, dd_threshold, dd_saturation, discount_rate)
end

struct SeaLevelState{T<:AbstractFloat} <: AbstractState
    msl::T
end

struct ElevationAction{T<:AbstractFloat} <: AbstractAction
    elevation_ft::T
end

struct ElevationPolicy{T<:AbstractFloat} <: AbstractPolicy
    elevation_ft::T
end

function SimOptDecisions.get_action(
    policy::ElevationPolicy, state::SeaLevelState, sow::HouseElevationSOW, t::TimeStep
)
    return ElevationAction(policy.elevation_ft)
end

# Physics functions
function depth_damage(depth::T, threshold::T, saturation::T) where {T<:AbstractFloat}
    depth <= threshold && return zero(T)
    depth >= saturation && return one(T)
    midpoint = (threshold + saturation) / 2
    steepness = T(6) / (saturation - threshold)
    return one(T) / (one(T) + exp(-steepness * (depth - midpoint)))
end

function elevation_cost(Δh::Real, area_ft2::Real, house_value::Real)
    Δh <= 0 && return 0.0
    base_cost = 20_745.0
    thresholds = [0.0, 5.0, 8.5, 12.0, 14.0]
    rates = [80.36, 82.5, 86.25, 103.75, 113.75]
    rate = rates[1]
    for i in 1:(length(thresholds) - 1)
        if Δh <= thresholds[i + 1]
            t = (Δh - thresholds[i]) / (thresholds[i + 1] - thresholds[i])
            rate = rates[i] + t * (rates[i + 1] - rates[i])
            break
        end
        rate = rates[i + 1]
    end
    return (base_cost + area_ft2 * rate) / house_value
end
elevation_cost(Δh::Real) = elevation_cost(Δh, 1500.0, 200_000.0)

# Callbacks
function SimOptDecisions.initialize(::HouseElevationConfig, ::HouseElevationSOW, ::AbstractRNG)
    return SeaLevelState(0.0)
end

function SimOptDecisions.time_axis(config::HouseElevationConfig, ::HouseElevationSOW)
    return 1:(config.horizon)
end

function SimOptDecisions.run_timestep(
    state::SeaLevelState, action::ElevationAction, sow::HouseElevationSOW,
    config::HouseElevationConfig, t::TimeStep, rng::AbstractRNG
)
    construction_cost = SimOptDecisions.Utils.is_first(t) ?
        elevation_cost(action.elevation_ft, config.house_area_ft2, config.house_value) : 0.0

    surge_dist = GeneralizedExtremeValue(sow.gev_μ, sow.gev_σ, sow.gev_ξ)
    surge_at_gauge = rand(rng, surge_dist)
    water_level = config.gauge_height_above_ref + surge_at_gauge + state.msl
    house_floor_level = config.house_height_above_ref + action.elevation_ft
    flood_depth_at_house = water_level - house_floor_level
    damage_fraction = depth_damage(flood_depth_at_house, sow.dd_threshold, sow.dd_saturation)

    return (state, (construction_cost=construction_cost, damage_fraction=damage_fraction))
end

function SimOptDecisions.finalize(
    final_state::SeaLevelState, step_records::Vector,
    config::HouseElevationConfig, sow::HouseElevationSOW
)
    construction_cost = step_records[1].construction_cost
    npv_damages = sum(
        step_records[t].damage_fraction * SimOptDecisions.Utils.discount_factor(sow.discount_rate, t)
        for t in eachindex(step_records)
    )
    return (construction_cost=construction_cost, npv_damages=npv_damages)
end

Generate Many SOWs

First, we sample an ensemble of possible futures:

n_sows = 1000
rng = Random.Xoshiro(42)
sows = [sample_sow(rng) for _ in 1:n_sows]

println("Generated $(n_sows) SOWs")
println("Discount rate range: $(round(minimum(s.discount_rate for s in sows), digits=3)) - $(round(maximum(s.discount_rate for s in sows), digits=3))")

Generated 1000 SOWs
Discount rate range: 0.01 - 0.069

Evaluate One Policy

Let’s evaluate a 5-foot elevation policy across all SOWs:

config = HouseElevationConfig()
policy = ElevationPolicy(5.0)

# Run simulation for each SOW
outcomes = [simulate(config, sow, policy) for sow in sows]

# Extract NPV damages
damages = [o.npv_damages for o in outcomes]

println("Policy: Elevate 5 feet")
println("Construction cost: $(round(outcomes[1].construction_cost, digits=3))")
println("Expected NPV damage: $(round(mean(damages), digits=3))")
println("Median NPV damage: $(round(median(damages), digits=3))")
println("Worst case (max): $(round(maximum(damages), digits=3))")

Policy: Elevate 5 feet
Construction cost: 0.722
Expected NPV damage: 0.11
Median NPV damage: 0.005
Worst case (max): 1.809

Understanding Metrics

Different metrics capture different aspects of policy performance:

Metric	Formula	Interpretation
Expected value	`mean(damages)`	Average outcome across all futures
Median	`median(damages)`	Typical outcome (50th percentile)
CVaR 95%	Mean of worst 5%	Average of bad outcomes
Worst case	`maximum(damages)`	Murphy’s Law scenario

# Compute all metrics for our policy
expected_damage = mean(damages)
median_damage = median(damages)
cvar_95 = mean(sort(damages)[end-50+1:end])  # worst 5%
worst_case = maximum(damages)

println("Expected damage: $(round(expected_damage, digits=3))")
println("Median damage: $(round(median_damage, digits=3))")
println("CVaR 95%: $(round(cvar_95, digits=3))")
println("Worst case: $(round(worst_case, digits=3))")

Expected damage: 0.11
Median damage: 0.005
CVaR 95%: 0.864
Worst case: 1.809

Visualizing Uncertainty

The distribution of outcomes shows how much uncertainty remains:

Code

let
    fig = Figure(; size=(700, 400))
    ax = Axis(fig[1, 1];
        xlabel="NPV damages (fraction of house value)",
        ylabel="Frequency",
        title="Damage Distribution (5 ft elevation)")

    hist!(ax, damages; bins=50, color=(:steelblue, 0.7))

    # Mark key statistics
    vlines!(ax, [expected_damage]; color=:red, linewidth=2, label="Expected")
    vlines!(ax, [median_damage]; color=:orange, linewidth=2, label="Median")
    vlines!(ax, [cvar_95]; color=:purple, linewidth=2, linestyle=:dash, label="CVaR 95%")

    axislegend(ax; position=:rt)
    fig
end

Figure 1: Distribution of NPV damages across 1000 SOWs for 5-foot elevation

Comparing Multiple Elevations

Now let’s compare several elevation choices:

elevations = 0:2:14  # 0, 2, 4, ..., 14 feet

# Store results for each elevation
results = Dict{Int,NamedTuple}()

for elev in elevations
    policy = ElevationPolicy(Float64(elev))
    outcomes = [simulate(config, sow, policy) for sow in sows]
    damages = [o.npv_damages for o in outcomes]

    results[elev] = (
        construction_cost = outcomes[1].construction_cost,
        expected_damage = mean(damages),
        median_damage = median(damages),
        cvar_95 = mean(sort(damages)[end-50+1:end]),
        worst_case = maximum(damages),
        all_damages = damages
    )
end

Code

let
    fig = Figure(; size=(700, 400))
    ax = Axis(fig[1, 1];
        xlabel="Elevation (ft)",
        ylabel="Expected total cost (fraction of house value)",
        title="Expected Total Cost by Elevation")

    elevs = collect(elevations)
    total_costs = [results[e].construction_cost + results[e].expected_damage for e in elevs]

    barplot!(ax, elevs, total_costs; color=:steelblue)

    # Mark minimum
    min_idx = argmin(total_costs)
    scatter!(ax, [elevs[min_idx]], [total_costs[min_idx]];
        color=:red, markersize=20, marker=:star5)

    fig
end

Figure 2: Expected total cost (construction + damages) by elevation level

Uncertainty Across Elevations

The box plot shows how uncertainty varies with elevation:

Code

let
    fig = Figure(; size=(800, 500))
    ax = Axis(fig[1, 1];
        xlabel="Elevation (ft)",
        ylabel="NPV damages (fraction of house value)",
        title="Damage Uncertainty by Elevation")

    for elev in elevations
        damages = results[elev].all_damages
        q05 = quantile(damages, 0.05)
        q25 = quantile(damages, 0.25)
        q50 = quantile(damages, 0.50)
        q75 = quantile(damages, 0.75)
        q95 = quantile(damages, 0.95)

        # Whiskers
        lines!(ax, [elev, elev], [q05, q95]; color=:gray, linewidth=1)
        # Box
        poly!(ax, Point2f[(elev-0.6, q25), (elev+0.6, q25), (elev+0.6, q75), (elev-0.6, q75)];
            color=(:steelblue, 0.5), strokecolor=:black, strokewidth=1)
        # Median
        lines!(ax, [elev-0.6, elev+0.6], [q50, q50]; color=:black, linewidth=2)
    end

    fig
end

Figure 3: Distribution of NPV damages by elevation level

Key Insight: Risk vs Expected Value

Higher elevations reduce both expected damage and uncertainty (narrower boxes). This is a form of risk reduction: you pay more upfront to reduce exposure to bad outcomes.

Code

let
    fig = Figure(; size=(600, 500))
    ax = Axis(fig[1, 1];
        xlabel="Expected NPV damage",
        ylabel="CVaR 95% (worst 5% average)",
        title="Risk vs Expected Value by Elevation")

    elevs = collect(elevations)
    expected = [results[e].expected_damage for e in elevs]
    cvar = [results[e].cvar_95 for e in elevs]

    scatter!(ax, expected, cvar; markersize=15, color=:steelblue)

    for (i, elev) in enumerate(elevs)
        text!(ax, expected[i], cvar[i];
            text="$(elev) ft", align=(:left, :bottom), offset=(5, 5))
    end

    fig
end

Figure 4: Trade-off between expected damage and worst-case damage

Summary

We’ve learned to:

Generate SOW ensembles representing uncertain futures
Evaluate a policy across many SOWs using simulate
Compute metrics that summarize performance (expected value, CVaR, worst case)
Compare policies to understand trade-offs

This manual loop works well, but for systematic exploration we want a more structured approach.

Next Steps

In the next section, we’ll use the explore() function to systematically evaluate all combinations of policies and SOWs.