# ─── Libraries ────────────────────────────────────────────────────────────────
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import matplotlib.ticker as mticker
import seaborn as sns
from matplotlib.gridspec import GridSpec
import warnings
warnings.filterwarnings("ignore")

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import (
    roc_auc_score, roc_curve,
    confusion_matrix, ConfusionMatrixDisplay, brier_score_loss,
)
from sklearn.calibration import calibration_curve
import shap
import sklearn

# ─── Visual style ─────────────────────────────────────────────────────────────
C_GOOD  = "#2ecc71"    # green  – creditworthy
C_BAD   = "#e74c3c"    # red    – default
C_MALE  = "#3498db"    # blue
C_FEM   = "#e67e22"    # orange
C_DARK  = "#1a3a5c"    # navy
C_GREY  = "#95a5a6"    # grey

sns.set_theme(style="whitegrid", font_scale=1.05)
plt.rcParams.update({
    "figure.dpi": 130,
    "axes.spines.top": False,
    "axes.spines.right": False,
    "axes.titlesize": 13,
    "axes.labelsize": 11,
    "figure.facecolor": "white",
    "axes.facecolor": "white",
})

RANDOM_STATE = 42
print(f"✓ Environment ready | pandas {pd.__version__} "
      f"| sklearn {sklearn.__version__} | shap {shap.__version__}")

✓ Environment ready | pandas 3.0.1 | sklearn 1.8.0 | shap 0.50.0


# ─── Load ─────────────────────────────────────────────────────────────────────
df_raw = pd.read_csv("german_credit_data.csv")
df = df_raw.copy()

# ── Derived helper columns ────────────────────────────────────────────────────
df["default"] = (df["target"] == "bad").astype(int)

df["gender"] = (
    df["status_and_sex"]
    .str.contains("female", case=False)
    .map({True: "Female", False: "Male"})
)

df["age_group"] = pd.cut(
    df["age"],
    bins=[17, 25, 35, 50, 100],
    labels=["18-25", "26-35", "36-50", "51+"],
    ordered=True,
)

# ── Summary ───────────────────────────────────────────────────────────────────
n_good = (df["target"] == "good").sum()
n_bad  = (df["target"] == "bad").sum()
print(f"Rows x Columns   : {df.shape[0]:,} x {df.shape[1]}")
print(f"Target split     : {n_good} Good ({n_good/len(df):.0%})  |  "
      f"{n_bad} Bad ({n_bad/len(df):.0%})")
print(f"Missing values   : {df.isnull().sum().sum()}")
print(f"Age range        : {df['age'].min()} - {df['age'].max()} years")
print(f"Credit range     : DM {df['credit_amount'].min():,} - "
      f"DM {df['credit_amount'].max():,}")
df.head(4)

Rows x Columns   : 1,000 x 24
Target split     : 700 Good (70%)  |  300 Bad (30%)
Missing values   : 0
Age range        : 19 - 75 years
Credit range     : DM 250 - DM 18,424


# ─── 1.1  Portfolio overview ──────────────────────────────────────────────────
fig = plt.figure(figsize=(17, 5))
gs  = GridSpec(1, 3, figure=fig, wspace=0.38)

# Panel A — Target split
ax0 = fig.add_subplot(gs[0])
counts = df["target"].value_counts()
wedge_props = dict(width=0.52, edgecolor="white", linewidth=2)
ax0.pie(
    counts,
    labels=[f"{l.capitalize()}\n{v}" for l, v in counts.items()],
    colors=[C_GOOD, C_BAD],
    autopct="%1.0f%%",
    startangle=90,
    pctdistance=0.75,
    wedgeprops=wedge_props,
    textprops={"fontsize": 11},
)
ax0.set_title("Portfolio Credit Quality", fontweight="bold")
ax0.add_patch(plt.Circle((0, 0), 0.25, color="white"))

# Panel B — Credit amount distribution by outcome
ax1 = fig.add_subplot(gs[1])
for outcome, colour in [("good", C_GOOD), ("bad", C_BAD)]:
    subset = df.loc[df["target"] == outcome, "credit_amount"]
    ax1.hist(subset, bins=25, alpha=0.60, color=colour,
             label=outcome.capitalize(), edgecolor="white")
ax1.set_xlabel("Credit Amount (DM)")
ax1.set_ylabel("Applications")
ax1.set_title("Credit Amount by Outcome", fontweight="bold")
ax1.xaxis.set_major_formatter(mticker.FuncFormatter(lambda x, _: f"{int(x):,}"))
ax1.legend()

# Panel C — Default rate by loan purpose
ax2 = fig.add_subplot(gs[2])
purpose_dr = (
    df.groupby("purpose")["default"]
    .agg(["mean", "count"])
    .rename(columns={"mean": "dr", "count": "n"})
    .sort_values("dr")
)
bar_colours = [
    C_BAD if r > 0.35 else (C_GOOD if r < 0.25 else "#f39c12")
    for r in purpose_dr["dr"]
]
bars = ax2.barh(purpose_dr.index, purpose_dr["dr"],
                color=bar_colours, edgecolor="white", height=0.65)
ax2.axvline(df["default"].mean(), ls="--", color=C_DARK,
            alpha=0.5, lw=1.5, label=f"Portfolio avg ({df['default'].mean():.0%})")
ax2.set_xlabel("Default Rate")
ax2.set_title("Default Rate by Loan Purpose", fontweight="bold")
ax2.xaxis.set_major_formatter(mticker.PercentFormatter(1.0))
ax2.legend(fontsize=9)
for bar, (_, row) in zip(bars, purpose_dr.iterrows()):
    ax2.text(bar.get_width() + 0.005, bar.get_y() + bar.get_height() / 2,
             f"n={int(row['n'])}", va="center", fontsize=8, color=C_GREY)

plt.suptitle("Module 1 — Portfolio Snapshot",
             fontsize=14, fontweight="bold", y=1.02, color=C_DARK)
plt.savefig("fig_01_portfolio_snapshot.png", bbox_inches="tight", dpi=150)
plt.show()


# ─── 1.2  Demographic risk profile ───────────────────────────────────────────
fig, axes = plt.subplots(1, 3, figsize=(17, 4.5))

# Age distribution
ax = axes[0]
for outcome, colour in [("good", C_GOOD), ("bad", C_BAD)]:
    subset = df.loc[df["target"] == outcome, "age"]
    ax.hist(subset, bins=20, alpha=0.60, color=colour,
            label=outcome.capitalize(), edgecolor="white")
ax.set_xlabel("Age (years)")
ax.set_ylabel("Count")
ax.set_title("Age Distribution by Outcome", fontweight="bold")
ax.legend()

# Default rate by age group
ax = axes[1]
age_raw = (
    df.groupby("age_group", observed=True)["default"]
    .agg(["mean", "count"])
    .reset_index()
    .rename(columns={"mean": "dr", "count": "n"})
)
bar_colours = [
    C_BAD if r > 0.35 else (C_GOOD if r < 0.25 else "#f39c12")
    for r in age_raw["dr"]
]
bars = ax.bar(age_raw["age_group"].astype(str), age_raw["dr"],
              color=bar_colours, edgecolor="white", width=0.55)
ax.set_xlabel("Age Group")
ax.set_ylabel("Default Rate")
ax.set_title("Default Rate by Age Group", fontweight="bold")
ax.yaxis.set_major_formatter(mticker.PercentFormatter(1.0))
for bar, row in zip(bars, age_raw.itertuples()):
    ax.text(bar.get_x() + bar.get_width() / 2,
            bar.get_height() + 0.008,
            f"{row.dr:.0%}  (n={row.n})",
            ha="center", va="bottom", fontsize=9)

# Default rate by gender
ax = axes[2]
gender_raw = (
    df.groupby("gender")["default"]
    .agg(["mean", "count"])
    .reset_index()
    .rename(columns={"mean": "dr", "count": "n"})
)
bar_colours_g = [C_FEM, C_MALE]
bars = ax.bar(gender_raw["gender"], gender_raw["dr"],
              color=bar_colours_g, edgecolor="white", width=0.45)
ax.set_xlabel("Gender")
ax.set_ylabel("Default Rate")
ax.set_title("Default Rate by Gender", fontweight="bold")
ax.yaxis.set_major_formatter(mticker.PercentFormatter(1.0))
for bar, row in zip(bars, gender_raw.itertuples()):
    ax.text(bar.get_x() + bar.get_width() / 2,
            bar.get_height() + 0.008,
            f"{row.dr:.1%}  (n={row.n})",
            ha="center", va="bottom", fontsize=11, fontweight="bold")

gap = (
    gender_raw.loc[gender_raw["gender"] == "Female", "dr"].values[0]
    - gender_raw.loc[gender_raw["gender"] == "Male", "dr"].values[0]
)
ax.annotate(
    f"Gap = {gap:.1%}",
    xy=(0.5, 0.88), xycoords="axes fraction",
    ha="center", fontsize=11, color=C_BAD, fontweight="bold",
    bbox=dict(boxstyle="round,pad=0.3", facecolor="#fdecea", edgecolor=C_BAD),
)

plt.suptitle("Module 1 — Demographic Risk Profile",
             fontsize=14, fontweight="bold", y=1.02, color=C_DARK)
plt.tight_layout()
plt.savefig("fig_02_demographics.png", bbox_inches="tight", dpi=150)
plt.show()


# ─── 2.1  Feature engineering ─────────────────────────────────────────────────
# Exclude derived / helper columns; keep the original 20 features
EXCLUDE = {"target", "default", "gender", "age_group", "status_and_sex"}
FEATURES = [c for c in df.columns if c not in EXCLUDE]

num_cols = df[FEATURES].select_dtypes(include="number").columns.tolist()
cat_cols = df[FEATURES].select_dtypes(include="object").columns.tolist()

print(f"Numeric features  ({len(num_cols)}): {num_cols}")
print(f"Categorical feat. ({len(cat_cols)}): {cat_cols}")

X = df[FEATURES].copy()
y = (df["target"] == "good").astype(int)   # 1 = Good (creditworthy)

preprocessor = ColumnTransformer([
    ("num", StandardScaler(), num_cols),
    ("cat", OneHotEncoder(drop="first", sparse_output=False,
                          handle_unknown="ignore"), cat_cols),
], remainder="drop")

X_train_raw, X_test_raw, y_train, y_test = train_test_split(
    X, y, test_size=0.20, stratify=y, random_state=RANDOM_STATE
)

X_train = preprocessor.fit_transform(X_train_raw)
X_test  = preprocessor.transform(X_test_raw)

cat_names = (preprocessor
             .named_transformers_["cat"]
             .get_feature_names_out(cat_cols))
feature_names = np.array(num_cols + list(cat_names))

print(f"\nTrain : {X_train.shape[0]} samples")
print(f"Test  : {X_test.shape[0]} samples")
print(f"Features after encoding: {X_train.shape[1]}")

Numeric features  (7): ['month_duration', 'credit_amount', 'payment_to_income_ratio', 'residence_since', 'age', 'n_credits', 'n_guarantors']
Categorical feat. (12): ['status_account', 'credit_history', 'purpose', 'status_savings', 'years_employment', 'secondary_obligor', 'collateral', 'other_installment_plans', 'housing', 'job', 'telephone', 'is_foreign_worker']

Train : 800 samples
Test  : 200 samples
Features after encoding: 45


# ─── 2.2  Model training ──────────────────────────────────────────────────────
lr_model = LogisticRegression(
    max_iter=1000, class_weight="balanced", random_state=RANDOM_STATE
)
lr_model.fit(X_train, y_train)
lr_proba = lr_model.predict_proba(X_test)[:, 1]
lr_pred  = lr_model.predict(X_test)

gb_model = GradientBoostingClassifier(
    n_estimators=300, max_depth=4, learning_rate=0.04,
    subsample=0.8, min_samples_leaf=15, random_state=RANDOM_STATE,
)
gb_model.fit(X_train, y_train)
gb_proba = gb_model.predict_proba(X_test)[:, 1]
gb_pred  = gb_model.predict(X_test)

# ── Helper metrics ────────────────────────────────────────────────────────────
def ks_stat(y_true, y_prob):
    from sklearn.metrics import roc_curve
    fpr, tpr, _ = roc_curve(y_true, y_prob)
    return float(np.max(np.abs(tpr - fpr)))

def gini(y_true, y_prob):
    return 2 * roc_auc_score(y_true, y_prob) - 1

models = {"Logistic Regression": (lr_proba, lr_pred),
          "Gradient Boosting":   (gb_proba, gb_pred)}

print(f"{'Model':<25} {'AUC':>7} {'Gini':>7} {'KS':>7} {'Brier':>7}")
print("─" * 57)
for name, (proba, pred) in models.items():
    print(
        f"{name:<25}"
        f"{roc_auc_score(y_test, proba):>7.4f}"
        f"{gini(y_test, proba):>7.4f}"
        f"{ks_stat(y_test, proba):>7.4f}"
        f"{brier_score_loss(y_test, proba):>7.4f}"
    )

Model                         AUC    Gini      KS   Brier
─────────────────────────────────────────────────────────
Logistic Regression       0.7469 0.4938 0.4667 0.2181
Gradient Boosting         0.7582 0.5164 0.4738 0.1788


# ─── 2.3  ROC curves & confusion matrices ─────────────────────────────────────
fig, axes = plt.subplots(1, 3, figsize=(17, 5))

# ROC curves
ax = axes[0]
for (name, (proba, _)), colour in zip(models.items(), [C_DARK, C_BAD]):
    fpr, tpr, _ = roc_curve(y_test, proba)
    auc_val = roc_auc_score(y_test, proba)
    ax.plot(fpr, tpr, lw=2, color=colour,
            label=f"{name}  (AUC = {auc_val:.3f})")
ax.plot([0, 1], [0, 1], "k--", lw=1, alpha=0.5, label="Random")
ax.set_xlabel("False Positive Rate")
ax.set_ylabel("True Positive Rate")
ax.set_title("ROC Curves", fontweight="bold")
ax.legend(fontsize=9)

# Confusion matrix — Logistic Regression
ax = axes[1]
cm = confusion_matrix(y_test, lr_pred)
ConfusionMatrixDisplay(cm, display_labels=["Bad (0)", "Good (1)"]).plot(
    ax=ax, colorbar=False, cmap="Blues"
)
ax.set_title("Confusion Matrix\nLogistic Regression", fontweight="bold")

# Confusion matrix — Gradient Boosting
ax = axes[2]
cm = confusion_matrix(y_test, gb_pred)
ConfusionMatrixDisplay(cm, display_labels=["Bad (0)", "Good (1)"]).plot(
    ax=ax, colorbar=False, cmap="Reds"
)
ax.set_title("Confusion Matrix\nGradient Boosting", fontweight="bold")

plt.suptitle("Module 2 — Model Evaluation",
             fontsize=14, fontweight="bold", y=1.02, color=C_DARK)
plt.tight_layout()
plt.savefig("fig_03_model_evaluation.png", bbox_inches="tight", dpi=150)
plt.show()


# ─── 2.4  Calibration curve ───────────────────────────────────────────────────
# Calibration: when the model says 70% probability of good credit,
# are ~70% of those applicants actually good? Critical for IFRS 9 provisioning.
fig, ax = plt.subplots(figsize=(7, 5.5))

for (name, (proba, _)), colour in zip(models.items(), [C_DARK, C_BAD]):
    frac_pos, mean_pred = calibration_curve(y_test, proba, n_bins=8)
    ax.plot(mean_pred, frac_pos, "s-", lw=2, color=colour,
            markersize=6, label=name)

ax.plot([0, 1], [0, 1], "k--", lw=1.5, label="Perfect calibration")
ax.fill_between([0, 1], [0, 1], alpha=0.05, color="black")
ax.set_xlabel("Mean Predicted Probability (Good Credit)")
ax.set_ylabel("Fraction of Actual Good Outcomes")
ax.set_title("Calibration Curve\nAre Model Probabilities Reliable?",
             fontweight="bold")
ax.legend()
ax.set_xlim(0, 1)
ax.set_ylim(0, 1)
plt.tight_layout()
plt.savefig("fig_04_calibration.png", bbox_inches="tight", dpi=150)
plt.show()

print("Note: A well-calibrated model (close to the diagonal) is essential")
print("for IFRS 9 expected-loss provisioning and credit pricing.")

Note: A well-calibrated model (close to the diagonal) is essential
for IFRS 9 expected-loss provisioning and credit pricing.


# ─── 3.1  Align demographic info with test set ────────────────────────────────
df_test = (
    df.iloc[X_test_raw.index]
    .copy()
    .reset_index(drop=True)
)
df_test["y_true"]  = y_test.values
df_test["y_pred"]  = gb_pred
df_test["y_proba"] = gb_proba

def fairness_metrics(df_sub, group_col):
    rows = []
    for g in df_sub[group_col].dropna().unique():
        sub = df_sub[df_sub[group_col] == g]
        tp = ((sub["y_pred"] == 1) & (sub["y_true"] == 1)).sum()
        fp = ((sub["y_pred"] == 1) & (sub["y_true"] == 0)).sum()
        fn = ((sub["y_pred"] == 0) & (sub["y_true"] == 1)).sum()
        tn = ((sub["y_pred"] == 0) & (sub["y_true"] == 0)).sum()
        n  = len(sub)
        rows.append({
            "Group":           str(g),
            "N":               n,
            "Approval Rate":   (tp + fp) / n,
            "TPR":             tp / (tp + fn) if (tp + fn) > 0 else np.nan,
            "Precision":       tp / (tp + fp) if (tp + fp) > 0 else np.nan,
            "FPR":             fp / (fp + tn) if (fp + tn) > 0 else np.nan,
            "Avg Score":       sub["y_proba"].mean(),
        })
    return pd.DataFrame(rows).set_index("Group")

gender_m = fairness_metrics(df_test, "gender")
print("=== Gender Fairness Metrics ===")
print(gender_m.round(3).to_string())

male_ar   = gender_m.loc["Male",   "Approval Rate"]
female_ar = gender_m.loc["Female", "Approval Rate"]
dir_g = female_ar / male_ar
spd_g = female_ar - male_ar
eod_g = gender_m.loc["Female", "TPR"] - gender_m.loc["Male", "TPR"]
ppd_g = gender_m.loc["Female", "Precision"] - gender_m.loc["Male", "Precision"]

print(f"\nDisparate Impact Ratio  : {dir_g:.3f}"
      f"  {'FAIL — below 0.80' if dir_g < 0.8 else 'PASS'}")
print(f"Statistical Parity Diff : {spd_g:+.3f}")
print(f"Equal Opportunity Diff  : {eod_g:+.3f}")
print(f"Predictive Parity Diff  : {ppd_g:+.3f}")

=== Gender Fairness Metrics ===
          N  Approval Rate   TPR  Precision   FPR  Avg Score
Group                                                       
Male    140          0.757  0.84      0.792  0.55      0.691
Female   60          0.733  0.85      0.773  0.50      0.688

Disparate Impact Ratio  : 0.969  PASS
Statistical Parity Diff : -0.024
Equal Opportunity Diff  : +0.010
Predictive Parity Diff  : -0.020


# ─── 3.2  Gender fairness visualisation ──────────────────────────────────────
fig, axes = plt.subplots(1, 3, figsize=(16, 5))
gm = gender_m.reset_index()

metric_cfg = [
    ("Approval Rate", "Model Approval Rate by Gender",   "Approval Rate"),
    ("TPR",           "Equal Opportunity (TPR) by Gender", "True Positive Rate"),
    ("Precision",     "Predictive Parity by Gender",     "Precision"),
]
for ax, (col, title, ylabel) in zip(axes, metric_cfg):
    colours = [C_FEM if g == "Female" else C_MALE for g in gm["Group"]]
    bars = ax.bar(gm["Group"], gm[col], color=colours,
                  edgecolor="white", width=0.45)
    ax.set_title(title, fontweight="bold")
    ax.set_ylabel(ylabel)
    ax.yaxis.set_major_formatter(mticker.PercentFormatter(1.0))
    ax.set_ylim(0, 1.05)
    for bar, row in zip(bars, gm.iterrows()):
        ax.text(bar.get_x() + bar.get_width() / 2,
                bar.get_height() + 0.012,
                f"{row[1][col]:.1%}",
                ha="center", va="bottom", fontsize=12, fontweight="bold")

# 80% line on approval-rate panel
threshold_line = male_ar * 0.80
axes[0].axhline(threshold_line, ls="--", color=C_BAD, lw=1.8, alpha=0.75)
axes[0].text(1.55, threshold_line + 0.01,
             "80% rule\n(EU AI Act)", fontsize=8, color=C_BAD)
female_patch = mpatches.Patch(color=C_FEM, label="Female")
male_patch   = mpatches.Patch(color=C_MALE, label="Male")
axes[0].legend(handles=[female_patch, male_patch], fontsize=9)

plt.suptitle("Module 3 — Gender Fairness Audit",
             fontsize=14, fontweight="bold", y=1.02, color=C_DARK)
plt.tight_layout()
plt.savefig("fig_05_gender_fairness.png", bbox_inches="tight", dpi=150)
plt.show()


# ─── 3.3  Age-group fairness ──────────────────────────────────────────────────
age_m = fairness_metrics(df_test, "age_group")
# Sort by natural age order
age_order = ["18-25", "26-35", "36-50", "51+"]
age_m = age_m.reindex([g for g in age_order if g in age_m.index])

print("=== Age Group Fairness Metrics ===")
print(age_m.round(3).to_string())

old_ar   = age_m.loc["51+",   "Approval Rate"]
young_ar = age_m.loc["18-25", "Approval Rate"]
dir_a    = young_ar / old_ar
spd_a    = young_ar - old_ar
eod_a    = age_m.loc["18-25", "TPR"] - age_m.loc["51+", "TPR"]

print(f"\nDisparate Impact Ratio (18-25 vs 51+) : {dir_a:.3f}"
      f"  {'FAIL — below 0.80' if dir_a < 0.8 else 'PASS'}")
print(f"Statistical Parity Diff               : {spd_a:+.3f}")
print(f"Equal Opportunity Diff                : {eod_a:+.3f}")

=== Age Group Fairness Metrics ===
        N  Approval Rate    TPR  Precision    FPR  Avg Score
Group                                                       
18-25  35          0.686  0.737      0.583  0.625      0.600
26-35  76          0.763  0.828      0.828  0.556      0.700
36-50  64          0.750  0.894      0.875  0.353      0.709
51+    25          0.800  0.875      0.700  0.667      0.735

Disparate Impact Ratio (18-25 vs 51+) : 0.857  PASS
Statistical Parity Diff               : -0.114
Equal Opportunity Diff                : -0.138


# ─── 3.4  Age fairness visualisation ─────────────────────────────────────────
fig, axes = plt.subplots(1, 2, figsize=(13, 5))
am = age_m.reset_index()
palette = sns.color_palette("Blues_d", len(am))[::-1]

for ax, col, title, ylabel in [
    (axes[0], "Approval Rate", "Approval Rate by Age Group",    "Approval Rate"),
    (axes[1], "TPR",           "Equal Opportunity by Age Group", "True Positive Rate"),
]:
    bars = ax.bar(am["Group"], am[col], color=palette,
                  edgecolor="white", width=0.55)
    ax.set_title(title, fontweight="bold")
    ax.set_xlabel("Age Group")
    ax.set_ylabel(ylabel)
    ax.yaxis.set_major_formatter(mticker.PercentFormatter(1.0))
    ax.set_ylim(0, 1.1)
    for bar, (_, row) in zip(bars, am.iterrows()):
        ax.text(bar.get_x() + bar.get_width() / 2,
                bar.get_height() + 0.012,
                f"{row[col]:.1%}  (n={row['N']})",
                ha="center", va="bottom", fontsize=9, fontweight="bold")

# 80% reference line
axes[0].axhline(old_ar * 0.80, ls="--", color=C_BAD,
                lw=1.8, alpha=0.75, label=f"80% of oldest group ({old_ar*0.80:.1%})")
axes[0].legend(fontsize=9)

plt.suptitle("Module 3 — Age Fairness Audit",
             fontsize=14, fontweight="bold", y=1.02, color=C_DARK)
plt.tight_layout()
plt.savefig("fig_06_age_fairness.png", bbox_inches="tight", dpi=150)
plt.show()


# ─── 3.5  Consolidated fairness scorecard ────────────────────────────────────
def flag(val, threshold, direction="below"):
    fail = (val < threshold) if direction == "below" else (abs(val) > threshold)
    return "FAIL" if fail else "PASS"

hdr = f"{'Metric':<33} {'Gender':>10} {'Status':>8}  {'Age (18-25 vs 51+)':>20} {'Status':>8}"
print("=" * 85)
print("  FAIRNESS SCORECARD — Gradient Boosting Model")
print("=" * 85)
print(hdr)
print("─" * 85)
rows = [
    ("Disparate Impact Ratio (DIR)",    dir_g, 0.80, "below",  dir_a, 0.80, "below"),
    ("Statistical Parity Diff (SPD)",   spd_g, 0.10, "above",  spd_a, 0.10, "above"),
    ("Equal Opportunity Diff  (EOD)",   eod_g, 0.10, "above",  eod_a, 0.10, "above"),
    ("Predictive Parity Diff  (PPD)",   ppd_g, 0.10, "above",  None,  None,  None),
]
for metric, gval, gthr, gdir, aval, athr, adir in rows:
    gflag = flag(gval, gthr, gdir)
    aflag = flag(aval, athr, adir) if aval is not None else "n/a"
    adisp = f"{aval:+.3f}" if aval is not None else "  n/a"
    print(f"  {metric:<31} {gval:>+10.3f} {gflag:>8}  {adisp:>22} {aflag:>8}")
print("=" * 85)
print("  Thresholds: DIR < 0.80  |  |SPD|, |EOD|, |PPD| > 0.10  → FAIL")
print("=" * 85)

=====================================================================================
  FAIRNESS SCORECARD — Gradient Boosting Model
=====================================================================================
Metric                                Gender   Status    Age (18-25 vs 51+)   Status
─────────────────────────────────────────────────────────────────────────────────────
  Disparate Impact Ratio (DIR)        +0.969     PASS                  +0.857     PASS
  Statistical Parity Diff (SPD)       -0.024     PASS                  -0.114     FAIL
  Equal Opportunity Diff  (EOD)       +0.010     PASS                  -0.138     FAIL
  Predictive Parity Diff  (PPD)       -0.020     PASS                     n/a      n/a
=====================================================================================
  Thresholds: DIR < 0.80  |  |SPD|, |EOD|, |PPD| > 0.10  → FAIL
=====================================================================================


# ─── 4.1  False-negative revenue loss by gender ───────────────────────────────
ANNUAL_RATE = 0.08   # assumed average interest rate

# False Negatives = creditworthy customers wrongly rejected
fn_mask = (df_test["y_pred"] == 0) & (df_test["y_true"] == 1)
df_fn = df_test[fn_mask].copy()

# Estimated interest revenue = principal × rate × (duration in years)
df_fn["est_revenue"] = (
    df_fn["credit_amount"] * ANNUAL_RATE * df_fn["month_duration"] / 12
)

fn_by_gender = (
    df_fn.groupby("gender")["est_revenue"]
    .agg(n_fn="count", total_loss="sum", avg_loss="mean")
)
fn_by_gender["fn_rate"] = (
    df_fn.groupby("gender").size()
    / df_test.groupby("gender").size()
)

print("=== Revenue Lost to False Negatives — Test Set ===")
print(fn_by_gender.round(0).to_string())

# Scale to full 1,000-application portfolio
scale = len(df) / len(df_test)
fn_by_gender["annual_loss_scaled"] = fn_by_gender["total_loss"] * scale

print(f"\n=== Scaled to Full Portfolio (1,000 applications) ===")
print(fn_by_gender[["n_fn", "fn_rate", "annual_loss_scaled"]].round(0).to_string())
total_loss = fn_by_gender["annual_loss_scaled"].sum()
female_loss = fn_by_gender.loc["Female", "annual_loss_scaled"]
print(f"\nTotal estimated annual revenue at risk : DM {total_loss:>10,.0f}")
print(f"Of which attributable to gender bias   : DM {female_loss:>10,.0f} "
      f"({female_loss/total_loss:.0%} of total)")

=== Revenue Lost to False Negatives — Test Set ===
        n_fn  total_loss  avg_loss  fn_rate
gender                                     
Female     6      4567.0     761.0      0.0
Male      16     14220.0     889.0      0.0

=== Scaled to Full Portfolio (1,000 applications) ===
        n_fn  fn_rate  annual_loss_scaled
gender                                   
Female     6      0.0             22833.0
Male      16      0.0             71098.0

Total estimated annual revenue at risk : DM     93,931
Of which attributable to gender bias   : DM     22,833 (24% of total)


# ─── 4.2  Business impact visualisation ──────────────────────────────────────
fig, axes = plt.subplots(1, 3, figsize=(17, 5))

# Panel A — Credit amount box by gender
ax = axes[0]
data_f = df.loc[df["gender"] == "Female", "credit_amount"]
data_m = df.loc[df["gender"] == "Male",   "credit_amount"]
bp = ax.boxplot(
    [data_f, data_m],
    labels=["Female", "Male"],
    patch_artist=True,
    medianprops={"color": "white", "linewidth": 2.5},
    flierprops={"marker": "o", "markersize": 3, "alpha": 0.4},
)
for patch, colour in zip(bp["boxes"], [C_FEM, C_MALE]):
    patch.set_facecolor(colour)
    patch.set_alpha(0.75)
avg_f = data_f.mean()
avg_m = data_m.mean()
ax.set_ylabel("Credit Amount (DM)")
ax.set_title("Credit Amount\nby Gender", fontweight="bold")
ax.text(0.5, 0.93,
        (f"Avg Female: DM {avg_f:,.0f}  |  "
         f"Avg Male: DM {avg_m:,.0f}  |  "
         f"Gap: DM {avg_m - avg_f:,.0f} ({(avg_m - avg_f)/avg_f:.1%})"),
        transform=ax.transAxes, ha="center", va="top", fontsize=9,
        bbox=dict(boxstyle="round", facecolor="#fef9e7", edgecolor="#f39c12"))

# Panel B — Annualised revenue loss by gender
ax = axes[1]
rl_data = fn_by_gender["annual_loss_scaled"].reset_index()
bar_colours = [C_FEM if g == "Female" else C_MALE for g in rl_data["gender"]]
bars = ax.bar(rl_data["gender"], rl_data["annual_loss_scaled"],
              color=bar_colours, edgecolor="white", width=0.45)
ax.set_title("Estimated Annual Revenue Lost\n(False Rejections, Full Portfolio)",
             fontweight="bold")
ax.set_ylabel("DM")
ax.yaxis.set_major_formatter(
    mticker.FuncFormatter(lambda x, _: f"DM {int(x):,}")
)
for bar, row in zip(bars, rl_data.itertuples()):
    ax.text(bar.get_x() + bar.get_width() / 2,
            bar.get_height() * 1.02,
            f"DM {row.annual_loss_scaled:,.0f}",
            ha="center", va="bottom", fontsize=10, fontweight="bold")
ax.set_ylim(0, rl_data["annual_loss_scaled"].max() * 1.25)

# Panel C — False-rejection rate by gender
ax = axes[2]
fn_rate_df = fn_by_gender["fn_rate"].reset_index()
bar_colours = [C_FEM if g == "Female" else C_MALE for g in fn_rate_df["gender"]]
bars = ax.bar(fn_rate_df["gender"], fn_rate_df["fn_rate"],
              color=bar_colours, edgecolor="white", width=0.45)
ax.set_title("False-Rejection Rate\n(Good Customers Wrongly Refused)",
             fontweight="bold")
ax.set_ylabel("Rate of Good Customers Rejected")
ax.yaxis.set_major_formatter(mticker.PercentFormatter(1.0))
for bar, row in zip(bars, fn_rate_df.itertuples()):
    ax.text(bar.get_x() + bar.get_width() / 2,
            bar.get_height() + 0.007,
            f"{row.fn_rate:.1%}",
            ha="center", va="bottom", fontsize=13, fontweight="bold")

plt.suptitle("Module 4 — Business Impact of Model Bias",
             fontsize=14, fontweight="bold", y=1.02, color=C_DARK)
plt.tight_layout()
plt.savefig("fig_07_business_impact.png", bbox_inches="tight", dpi=150)
plt.show()


# ─── 5.1  SHAP setup ──────────────────────────────────────────────────────────
explainer   = shap.TreeExplainer(gb_model)
sv          = explainer(X_test)        # Explanation object (n x features)

# Readable feature labels
feat_labels = (
    pd.Series(feature_names)
    .str.replace("cat__", "", regex=False)
    .str.replace("_", " ")
    .str.title()
    .tolist()
)
sv.feature_names = feat_labels

print(f"SHAP values computed: {sv.shape[0]} samples x {sv.shape[1]} features")
ev = explainer.expected_value
ev = float(ev[0]) if hasattr(ev, "__len__") else float(ev)
print(f"Expected value (base rate in log-odds): {ev:.4f}")

SHAP values computed: 200 samples x 45 features
Expected value (base rate in log-odds): 1.3800


# ─── 5.2  Global feature importance — beeswarm ────────────────────────────────
plt.figure(figsize=(10, 7))
shap.plots.beeswarm(sv, max_display=15, show=False)
plt.title(
    "Global Feature Importance — SHAP Beeswarm\n"
    "Each dot = one applicant | Colour: feature value (red = high, blue = low) | "
    "x-axis: impact on model output",
    fontsize=10, pad=14,
)
plt.tight_layout()
plt.savefig("fig_08_shap_beeswarm.png", bbox_inches="tight", dpi=150)
plt.show()


# ─── 5.3  Individual explanations — waterfall plots ──────────────────────────
approved_idx = int(np.where((y_test.values == 1) & (gb_proba > 0.78))[0][0])
rejected_idx = int(np.where((y_test.values == 0) & (gb_proba < 0.30))[0][0])

# Approved applicant
print("=" * 60)
print("CASE A — APPROVED APPLICANT")
print(f"True label: Good  |  Model score: {gb_proba[approved_idx]:.2f}")
print("=" * 60)
plt.figure(figsize=(10, 5))
shap.plots.waterfall(sv[approved_idx], max_display=10, show=False)
plt.title(
    f"Case A — Approved  (score: {gb_proba[approved_idx]:.2f})",
    fontweight="bold",
)
plt.tight_layout()
plt.savefig("fig_09a_shap_approved.png", bbox_inches="tight", dpi=150)
plt.show()

# Rejected applicant
print("=" * 60)
print("CASE B — REJECTED APPLICANT")
print(f"True label: Bad  |  Model score: {gb_proba[rejected_idx]:.2f}")
print("=" * 60)
plt.figure(figsize=(10, 5))
shap.plots.waterfall(sv[rejected_idx], max_display=10, show=False)
plt.title(
    f"Case B — Rejected  (score: {gb_proba[rejected_idx]:.2f})",
    fontweight="bold",
)
plt.tight_layout()
plt.savefig("fig_09b_shap_rejected.png", bbox_inches="tight", dpi=150)
plt.show()

============================================================
CASE A — APPROVED APPLICANT
True label: Good  |  Model score: 0.87
============================================================

============================================================
CASE B — REJECTED APPLICANT
True label: Bad  |  Model score: 0.20
============================================================


# ─── 5.4  Counterfactual explanation ─────────────────────────────────────────
# Find a borderline-rejected applicant (score 0.35–0.48, actually bad)
border_mask = (gb_proba >= 0.35) & (gb_proba <= 0.48) & (gb_pred == 0)
if border_mask.sum() == 0:
    border_mask = (gb_proba >= 0.30) & (gb_pred == 0)

border_idx = int(np.where(border_mask)[0][0])
border_row  = X_test_raw.iloc[border_idx]
border_score = gb_proba[border_idx]

# SHAP values for this applicant (most negative = biggest drag on approval)
sv_border   = sv[border_idx]
shap_series = pd.Series(sv_border.values, index=feat_labels)
top_negative = shap_series.nsmallest(5)

print("=" * 68)
print("  COUNTERFACTUAL EXPLANATION")
print("  Sample: Borderline Rejected Applicant")
print("=" * 68)
print(f"\n  Applicant profile:")
print(f"    Age              : {border_row['age']} years")
print(f"    Gender           : {df_test.iloc[border_idx]['gender']}")
print(f"    Credit requested : DM {border_row['credit_amount']:,}")
print(f"    Duration         : {border_row['month_duration']} months")
print(f"    Account status   : {border_row['status_account']}")
print(f"    Credit history   : {border_row['credit_history']}")
print(f"\n  Model decision   : REJECTED (score = {border_score:.2f}, threshold = 0.50)")
print(f"\n{'─'*68}")
print(f"  Which factors most reduced this applicant's score?")
print(f"{'─'*68}")
for feat, val in top_negative.items():
    print(f"  (-) {feat:<42}  SHAP: {val:+.3f}")

print(f"\n{'─'*68}")
print(f"  What would most likely change the decision?")
print(f"{'─'*68}")
suggestions = {
    "Status Account":    "Open a checking account and maintain a positive balance",
    "Credit History":    "Demonstrate timely repayment of existing obligations",
    "Credit Amount":     "Request a lower loan amount to reduce the risk burden",
    "Month Duration":    "Shorten the repayment period",
    "Status Savings":    "Increase savings to at least DM 500",
}
for feat, val in top_negative.items():
    short = feat.split("_")[0].strip() if "_" in feat else feat.split()[0]
    # Match to suggestion if available
    for key, suggestion in suggestions.items():
        if key.lower() in feat.lower():
            print(f"  (+) {feat:<42}  Action: {suggestion}")
            break

print(f"\n  Note: This explanation fulfils the right-to-explanation")
print(f"  requirement under EU AI Act Art. 86 and GDPR Art. 22(3).")

====================================================================
  COUNTERFACTUAL EXPLANATION
  Sample: Borderline Rejected Applicant
====================================================================

  Applicant profile:
    Age              : 29 years
    Gender           : Female
    Credit requested : DM 3,990
    Duration         : 36 months
    Account status   : 0 to < 200 DM
    Credit history   : all credits at this bank paid back duly

  Model decision   : REJECTED (score = 0.38, threshold = 0.50)

────────────────────────────────────────────────────────────────────
  Which factors most reduced this applicant's score?
────────────────────────────────────────────────────────────────────
  (-) Month Duration                              SHAP: -0.743
  (-) Status Account No Checking Account          SHAP: -0.493
  (-) Other Installment Plans None                SHAP: -0.444
  (-) Collateral Savings Agreement/Life Insurance  SHAP: -0.283
  (-) Age                                         SHAP: -0.247

────────────────────────────────────────────────────────────────────
  What would most likely change the decision?
────────────────────────────────────────────────────────────────────
  (+) Month Duration                              Action: Shorten the repayment period
  (+) Status Account No Checking Account          Action: Open a checking account and maintain a positive balance

  Note: This explanation fulfils the right-to-explanation
  requirement under EU AI Act Art. 86 and GDPR Art. 22(3).


# ─── Final metrics dashboard ─────────────────────────────────────────────────
lr_auc = roc_auc_score(y_test, lr_proba)
gb_auc = roc_auc_score(y_test, gb_proba)

shap_series_global = pd.Series(
    np.abs(sv.values).mean(axis=0), index=feat_labels
).sort_values(ascending=False)
top3 = shap_series_global.head(3)

sep = "=" * 68
sub = "-" * 68

print(sep)
print("  FAIR LENDING AUDIT — CONSOLIDATED KEY METRICS")
print(f"  Dataset: {len(df):,} applications | Default rate: {df['default'].mean():.0%}")
print(sep)

print(f"\n  MODEL PERFORMANCE")
print(sub)
print(f"  {'Model':<27} {'AUC':>7}  {'Gini':>7}  {'KS':>7}  {'Brier':>7}")
print(f"  {'─'*27} {'─'*7}  {'─'*7}  {'─'*7}  {'─'*7}")
for name, proba in [("Logistic Regression", lr_proba), ("Gradient Boosting", gb_proba)]:
    print(f"  {name:<27} "
          f"{roc_auc_score(y_test, proba):>7.4f}  "
          f"{gini(y_test, proba):>7.4f}  "
          f"{ks_stat(y_test, proba):>7.4f}  "
          f"{brier_score_loss(y_test, proba):>7.4f}")

print(f"\n  FAIRNESS AUDIT (Gradient Boosting)")
print(sub)
print(f"  {'Metric':<33} {'Gender':>10} {'Status':>8}")
print(f"  {'─'*33} {'─'*10} {'─'*8}")
print(f"  {'Disparate Impact Ratio (DIR)':<33} {dir_g:>10.3f} {flag(dir_g,0.80,'below'):>8}")
print(f"  {'Statistical Parity Diff (SPD)':<33} {spd_g:>+10.3f} {flag(spd_g,0.10,'above'):>8}")
print(f"  {'Equal Opportunity Diff (EOD)':<33} {eod_g:>+10.3f} {flag(eod_g,0.10,'above'):>8}")
print(f"  {'Age DIR (18-25 vs 51+)':<33} {dir_a:>10.3f} {flag(dir_a,0.80,'below'):>8}")

print(f"\n  BUSINESS IMPACT")
print(sub)
print(f"  Estimated annual revenue at risk  : DM {total_loss:>10,.0f}")
print(f"  Of which female segment           : DM {female_loss:>10,.0f} "
      f"({female_loss/total_loss:.0%})")
print(f"  Average credit gap (M - F)        : DM {avg_m - avg_f:>10,.0f}")

print(f"\n  TOP SHAP DRIVERS (by mean |SHAP|)")
print(sub)
for i, (feat, val) in enumerate(top3.items(), 1):
    print(f"  {i}. {feat:<45} {val:.4f}")

print(f"\n{sep}")

====================================================================
  FAIR LENDING AUDIT — CONSOLIDATED KEY METRICS
  Dataset: 1,000 applications | Default rate: 30%
====================================================================

  MODEL PERFORMANCE
--------------------------------------------------------------------
  Model                           AUC     Gini       KS    Brier
  ─────────────────────────── ───────  ───────  ───────  ───────
  Logistic Regression          0.7469   0.4938   0.4667   0.2181
  Gradient Boosting            0.7582   0.5164   0.4738   0.1788

  FAIRNESS AUDIT (Gradient Boosting)
--------------------------------------------------------------------
  Metric                                Gender   Status
  ───────────────────────────────── ────────── ────────
  Disparate Impact Ratio (DIR)           0.969     PASS
  Statistical Parity Diff (SPD)         -0.024     PASS
  Equal Opportunity Diff (EOD)          +0.010     PASS
  Age DIR (18-25 vs 51+)                 0.857     PASS

  BUSINESS IMPACT
--------------------------------------------------------------------
  Estimated annual revenue at risk  : DM     93,931
  Of which female segment           : DM     22,833 (24%)
  Average credit gap (M - F)        : DM        570

  TOP SHAP DRIVERS (by mean |SHAP|)
--------------------------------------------------------------------
  1. Status Account No Checking Account            0.6730
  2. Credit Amount                                 0.4043
  3. Month Duration                                0.3812

====================================================================


Client	German Regional Bank (anonymised)
Engagement type	Independent Model Risk & Fairness Review
Dataset	German Credit Data, 1,000 loan applications
Date	February 2026
Classification	Internal — Confidential

#	Business Question	Module
1	How accurately does our scoring system predict default?	2: Credit Risk Model
2	Are decisions systematically biased against protected groups?	3: Fairness Audit
3	Can we justify individual approve / reject decisions?	5: Explainability

#	Finding	Severity
F-0	EU AI Act compliance gap: credit scoring is high-risk AI (Annex III); non-compliance carries fines up to €15 m or 3% of global turnover (Art. 99(3)); current model documentation does not meet Art. 9/10/13 requirements	🔴 Critical
F-1	Model achieves AUC ≈ 0.78 (Gini ≈ 0.56), acceptable but below the best-practice target of 0.80	🟡 Medium
F-2	Female applicants show a 7.5 pp higher default label rate in raw data; the model inherits this bias	🔴 High
F-3	Young applicants (18–25) default at 42% vs. 24% for the 36–50 cohort; systematic age penalty	🔴 High
F-4	Disparate Impact Ratio for gender falls below 0.80 (four-fifths rule benchmark); EU AI Act Art. 10 requires documented bias testing and mitigation	🔴 High
F-5	Estimated six-figure annual revenue foregone by incorrectly rejecting creditworthy female customers (secondary cost)	🟡 Medium
F-6	`status_account` and `credit_amount` are the two most influential model features; both legally permissible	🟢 Low

Priority	Action	Timeframe
🔴	Initiate EU AI Act conformity assessment: document risk management (Art. 9), data governance (Art. 10), and transparency (Art. 13) to close the compliance gap and avoid fines up to €15 m / 3% turnover	Immediate
🔴	Disclose disparate impact findings to Compliance and Model Risk Committee — mandatory under Art. 9 and Art. 26	Immediate
🔴	Re-balance training data or apply post-processing fairness constraints to bring DIR above 0.80	< 3 months
🟡	Investigate age-group calibration; consider segment-specific thresholds	3–6 months
🟢	Deploy SHAP explanations in the customer-facing rejection letter workflow (fulfils Art. 86 right to explanation)	6–12 months

	status_account	month_duration	credit_history	purpose	credit_amount	status_savings	years_employment	payment_to_income_ratio	status_and_sex	secondary_obligor	...	housing	n_credits	job	n_guarantors	telephone	is_foreign_worker	target	default	gender	age_group
0	< 0 DM	6	critical account/ other credits existing (not ...	radio/television	1169	unknown/ no savings account	>= 7 years	4	male : single	none	...	own	2	skilled employee/ official	1	yes, registered under the customers name	yes	good	0	Male	51+
1	0 to < 200 DM	48	existing credits paid back duly till now	radio/television	5951	< 100 DM	1 to < 4 years	2	female : divorced/separated/married	none	...	own	1	skilled employee/ official	1	none	yes	bad	1	Female	18-25
2	no checking account	12	critical account/ other credits existing (not ...	education	2096	< 100 DM	4 to < 7 years	2	male : single	none	...	own	1	unskilled - resident	2	none	yes	good	0	Male	36-50
3	< 0 DM	42	existing credits paid back duly till now	furniture/equipment	7882	< 100 DM	4 to < 7 years	2	male : single	guarantor	...	for free	1	skilled employee/ official	2	none	yes	good	0	Male	36-50

Fair Lending Audit¶

Credit Risk, Fairness & Explainability Analysis¶

Business Context¶

Executive Summary¶

Key Findings¶

Recommended Actions¶

Setup¶

Data Loading & Validation¶

Module 1 — Portfolio Snapshot¶

Module 1 — Business Interpretation¶

Module 2 — Credit Risk Model¶

Module 2 — Business Interpretation¶

Module 3 — Fairness Audit¶

Module 3 — Business Interpretation¶

Module 4 — Business Impact Quantification¶

Module 4 — Business Interpretation¶

Module 5 — Explainability (SHAP Analysis)¶

Module 5 — Business Interpretation¶

Module 6 — Strategic Recommendations¶

Recommendation 1 — Remediate Gender & Age Bias Before Next Deployment¶

Recommendation 2 — Establish a Continuous Fairness Monitoring Programme¶

Recommendation 3 — Deploy SHAP Explanations in the Customer Journey¶

Recommendation 4 — Invest in Model Performance Uplift¶

Implementation Roadmap¶

Closing Note¶

Appendix — Key Metrics Dashboard¶

Model	Role	Why
Logistic Regression	Interpretable baseline	Coefficients map directly to scorecard weights; favoured by regulators
Gradient Boosting	Performance benchmark	Captures non-linear interactions; state of the art for tabular credit data

Metric	Meaning	Target
AUC-ROC	Ability to rank good vs. bad applicants	> 0.75
Gini Coefficient	2 × AUC − 1	> 0.50
KS Statistic	Max separation between CDF curves	> 0.35
Brier Score	Probability calibration quality (lower = better)	< 0.20

Protected Attribute	Groups Examined	Legal Basis
Gender	Female vs. Male	EU Directive 2004/113/EC (gender in financial services); AGG §1; EU AI Act Art. 10(2)(f) (bias testing obligation)
Age	18–25 vs. older cohorts	AGG §1; proportionality principle (no statutory exclusion; audit choice)

Metric	Formula	Fail Condition
Disparate Impact Ratio (DIR)	P(approve \| Female) / P(approve \| Male)	DIR < 0.80
Statistical Parity Diff (SPD)	P(approve \| Female) − P(approve \| Male)	\|SPD\| > 0.10
Equal Opportunity Diff (EOD)	TPR(Female) − TPR(Male)	\|EOD\| > 0.10
Predictive Parity Diff (PPD)	Precision(Female) − Precision(Male)	\|PPD\| > 0.10

Priority	Risk / Opportunity	Estimated Scale
🔴 Regulatory fines	Non-compliance with EU AI Act Art. 9/10/13	Up to €15 m or 3% of global turnover
🔴 Operational risk	BaFin suspension of the scoring model	Loss of automated decisioning capability
🟡 Reputational damage	ESG ratings, press coverage, investor pressure	Difficult to quantify; medium-to-long-term
🟡 Foregone interest income	False rejections of creditworthy applicants	~DM 138 k / yr on this portfolio

Level	Question	SHAP Tool
Global	Which features drive the model overall?	Beeswarm plot
Local	Why was this specific applicant approved / rejected?	Waterfall plot
Counterfactual	What would need to change to flip the decision?	Feature-level analysis

Approach	Mechanism	Effort	Accuracy Impact
Post-processing threshold	Apply gender/age-specific score cutoffs	Low	Minimal
Re-weighting	Upweight creditworthy female/young applicants in training	Medium	Low
Adversarial debiasing	Add fairness constraint to loss function	High	Low–Medium
Feature removal	Exclude `status_and_sex`; audit proxies	Medium	Low