4. Aggregating with Metrics

Summarizing outcomes across scenarios

A single simulation gives us one outcome. But we need to evaluate a policy across many scenarios and summarize the results. This is where metrics come in.

Setup

Type definitions and functions (click to expand)
using SimOptDecisions
using Distributions
using Random
using CairoMakie
using Statistics

# Physics functions
function depth_damage(depth::T, threshold::T, saturation::T) where {T<:AbstractFloat}
    depth <= threshold && return zero(T)
    depth >= saturation && return one(T)
    midpoint = (threshold + saturation) / 2
    steepness = T(6) / (saturation - threshold)
    return one(T) / (one(T) + exp(-steepness * (depth - midpoint)))
end

function elevation_cost(Δh::Real, area_ft2::Real, house_value::Real)
    Δh <= 0 && return 0.0
    base_cost = 20_745.0
    thresholds = [0.0, 5.0, 8.5, 12.0, 14.0]
    rates = [80.36, 82.5, 86.25, 103.75, 113.75]
    rate = rates[1]
    for i in 1:(length(thresholds) - 1)
        if Δh <= thresholds[i + 1]
            t = (Δh - thresholds[i]) / (thresholds[i + 1] - thresholds[i])
            rate = rates[i] + t * (rates[i + 1] - rates[i])
            break
        end
        rate = rates[i + 1]
    end
    return (base_cost + area_ft2 * rate) / house_value
end
elevation_cost(Δh::Real) = elevation_cost(Δh, 1500.0, 200_000.0)

# Types
Base.@kwdef struct HouseConfig{T<:AbstractFloat} <: AbstractConfig
    horizon::Int = 70
    house_height::T = 4.0
    house_area_ft2::T = 1500.0
    house_value::T = 200_000.0
end

@scenariodef HouseScenario begin
    @timeseries water_levels
    @continuous dd_threshold
    @continuous dd_saturation
    @continuous discount_rate
end

function sample_scenario(rng::AbstractRNG, horizon::Int)
    μ = rand(rng, Normal(2.8, 0.3))
    σ = rand(rng, truncated(Normal(1.0, 0.15); lower=0.3))
    ξ = rand(rng, truncated(Normal(0.15, 0.05); lower=-0.2, upper=0.5))
    surge_dist = GeneralizedExtremeValue(μ, σ, ξ)
    water_levels = [rand(rng, surge_dist) for _ in 1:horizon]

    HouseScenario(
        water_levels = water_levels,
        dd_threshold = rand(rng, Normal(0.0, 0.25)),
        dd_saturation = rand(rng, Normal(8.0, 0.5)),
        discount_rate = rand(rng, truncated(Normal(0.03, 0.015); lower=0.01, upper=0.07)),
    )
end

struct HouseState{T<:AbstractFloat} <: AbstractState
    elevation_ft::T
end

struct ElevationAction{T<:AbstractFloat} <: AbstractAction
    elevation_ft::T
end

@policydef ElevationPolicy begin
    @continuous elevation_ft 0.0 14.0
end

# Functions
SimOptDecisions.initialize(::HouseConfig, ::HouseScenario, ::AbstractRNG) = HouseState(0.0)
SimOptDecisions.time_axis(config::HouseConfig, ::HouseScenario) = 1:(config.horizon)
SimOptDecisions.get_action(policy::ElevationPolicy, ::HouseState, ::TimeStep, ::HouseScenario) = ElevationAction(value(policy.elevation_ft))

function SimOptDecisions.run_timestep(
    state::HouseState, action::ElevationAction, t::TimeStep,
    config::HouseConfig, scenario::HouseScenario, rng::AbstractRNG
)
    if is_first(t)
        new_elevation = state.elevation_ft + action.elevation_ft
        construction_cost = elevation_cost(action.elevation_ft, config.house_area_ft2, config.house_value)
    else
        new_elevation = state.elevation_ft
        construction_cost = 0.0
    end
    water_level = scenario.water_levels[t]
    floor_level = config.house_height + new_elevation
    flood_depth = water_level - floor_level
    damage = depth_damage(flood_depth, value(scenario.dd_threshold), value(scenario.dd_saturation))
    return (HouseState(new_elevation), (construction_cost=construction_cost, damage_fraction=damage))
end

function SimOptDecisions.compute_outcome(
    step_records::Vector, ::HouseConfig, scenario::HouseScenario
)
    construction_cost = step_records[1].construction_cost
    npv_damages = sum(
        step_records[t].damage_fraction * discount_factor(value(scenario.discount_rate), t)
        for t in eachindex(step_records)
    )
    return (construction_cost=construction_cost, npv_damages=npv_damages, total_cost=construction_cost + npv_damages)
end

From Outcomes to Metrics

An outcome is the result of one simulation:

config = HouseConfig()
scenario = sample_scenario(Random.Xoshiro(42), config.horizon)
policy = ElevationPolicy(elevation_ft=5.0)

outcome = simulate(config, scenario, policy)
outcome
(construction_cost = 0.722475, npv_damages = 0.0, total_cost = 0.722475)

A metric summarizes outcomes across many scenarios. Common metrics include:

Metric What it answers
Expected value What’s the average outcome?
Variance How much does the outcome vary?
Probability How likely is an event (e.g., damage > 50%)?
Quantile What’s the worst-case at a given confidence level?

Evaluating a Policy Across Scenarios

Let’s run one policy against many scenarios:

n_scenarios = 500
rng = Random.Xoshiro(123)
scenarios = [sample_scenario(rng, config.horizon) for _ in 1:n_scenarios]

# Evaluate one policy across all scenarios
policy = ElevationPolicy(elevation_ft=5.0)
outcomes = [simulate(config, s, policy) for s in scenarios]

# Extract total costs
total_costs = [o.total_cost for o in outcomes]

println("Mean total cost: $(round(mean(total_costs), digits=3))")
println("Std deviation: $(round(std(total_costs), digits=3))")
println("5th percentile: $(round(quantile(total_costs, 0.05), digits=3))")
println("95th percentile: $(round(quantile(total_costs, 0.95), digits=3))")
Mean total cost: 0.819
Std deviation: 0.199
5th percentile: 0.722
95th percentile: 1.222
Code
let
    fig = Figure(; size=(700, 400))
    ax = Axis(fig[1, 1];
        xlabel="Total cost (fraction of house value)",
        ylabel="Count",
        title="Outcome Distribution (5ft elevation, $(n_scenarios) scenarios)")

    hist!(ax, total_costs; bins=30, color=:steelblue)

    vlines!(ax, [mean(total_costs)]; color=:red, linewidth=2, label="Mean")
    vlines!(ax, [quantile(total_costs, 0.95)]; color=:orange, linewidth=2, linestyle=:dash, label="95th percentile")

    axislegend(ax; position=:rt)
    fig
end
Figure 1: Distribution of total costs across scenarios (5ft elevation)

The calculate_metrics Function

For optimization, we need a function that takes outcomes and returns metrics. This is calculate_metrics:

function calculate_metrics(outcomes)
    total_costs = [o.total_cost for o in outcomes]
    return (
        expected_cost = mean(total_costs),
        cost_variance = var(total_costs),
        worst_5pct = quantile(total_costs, 0.95),  # 95th percentile = worst 5%
    )
end
metrics = calculate_metrics(outcomes)
metrics
(expected_cost = 0.8193046710051736, cost_variance = 0.039791184167260164, worst_5pct = 1.2222963231613766)

Comparing Policies with Metrics

Now we can compare policies using consistent metrics:

Code
let
    elevations = 0:14
    expected_costs = Float64[]
    worst_5pcts = Float64[]

    for elev in elevations
        policy = ElevationPolicy(elevation_ft=Float64(elev))
        outcomes = [simulate(config, s, policy) for s in scenarios]
        metrics = calculate_metrics(outcomes)
        push!(expected_costs, metrics.expected_cost)
        push!(worst_5pcts, metrics.worst_5pct)
    end

    fig = Figure(; size=(700, 500))
    ax = Axis(fig[1, 1];
        xlabel="Expected cost (fraction of house value)",
        ylabel="Worst 5% cost (fraction of house value)",
        title="Trade-off: Expected vs Worst-Case Cost")

    scatter!(ax, expected_costs, worst_5pcts; markersize=15)

    for (i, elev) in enumerate(elevations)
        text!(ax, expected_costs[i], worst_5pcts[i];
            text=string(elev), align=(:left, :bottom), offset=(5, 5))
    end

    fig
end
Figure 2: Expected cost vs worst-case cost for different elevations

This reveals a Pareto frontier: no policy can improve on expected cost without worsening worst-case cost (or vice versa).

Risk Attitudes and Metric Choice

The “right” metric depends on the decision-maker’s values and the institutional context. In decision making under deep uncertainty, the choice of metric is itself a modeling decision with real consequences (Herman et al. 2015):

Risk Attitude Metric Favors
Risk-neutral Expected cost Policies that perform well on average across scenarios
Risk-averse 95th percentile or CVaR Policies that limit worst-case losses
Regret-averse Maximum regret Policies whose worst-case deviation from the best possible choice is small
Satisficing Probability of cost < threshold Policies most likely to meet a minimum performance standard
TipRobustness vs. Optimality

In classical optimization, we seek the policy that minimizes (or maximizes) some objective. In DMDU, we often seek robust policies—ones that perform acceptably well across a wide range of futures, even if they aren’t optimal in any single scenario.

A policy that minimizes expected cost might perform terribly in the worst 5% of scenarios. A slightly more expensive policy might avoid catastrophic outcomes entirely. The Pareto frontier in the plot above captures exactly this trade-off.

The calculate_metrics function lets you define whatever aggregations are relevant for your problem. Different metrics lead to different “optimal” policies—which is why exploring the full outcome distribution matters.

Built-in Metric Types

SimOptDecisions provides declarative metric types for common cases:

# These are equivalent to what we computed manually:
ExpectedValue(:total_cost)           # mean(outcomes.total_cost)
Variance(:total_cost)                # var(outcomes.total_cost)
Quantile(:total_cost, 0.95)          # quantile(outcomes.total_cost, 0.95)
Probability(:total_cost, >, 0.5)     # fraction where total_cost > 0.5

These are used with compute_metrics(metric, outcomes) for convenience, but for optimization you’ll typically write a custom calculate_metrics function.

Summary

  • Outcome: Result of one simulate() call
  • Metric: Summary statistic across many outcomes
  • calculate_metrics(outcomes): Your function that aggregates outcomes into named metrics

The choice of metrics reflects your values and risk attitude. Different metrics lead to different “optimal” policies.

TipComing Up: explore()

Manually looping over scenarios works, but explore() does this automatically with:

  • Parallel execution via Executors
  • Automatic result organization as YAXArrays
  • Common Random Numbers for variance reduction

In the next section, we’ll use explore() to systematically run all policy×scenario combinations and analyze the results.

References

Herman, Jonathan D., Patrick M. Reed, Harrison B. Zeff, and Gregory W. Characklis. 2015. “How Should Robustness Be Defined for Water Systems Planning Under Change?” Journal of Water Resources Planning and Management 141 (10): 04015012. https://doi.org/10.1061/(asce)wr.1943-5452.0000509.