# Copyright 2026 Srikumar Krishnamoorthy
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
HUG-IML first-class visualizations using Plotly.
Public API
----------
from hugiml.plots import HUGPlotter
plotter = HUGPlotter(clf)
fig = plotter.plot_marginal_bin_profile("age", X) # EBM shape-function equivalent
fig = plotter.plot_feature_combinations("age") # compound patterns for one feature
fig = plotter.plot_feature_importance(top_n=15)
fig = plotter.plot_utility_vs_ig() # scatter: utility × IG × support
fig = plotter.plot_top_patterns(top_n=20)
fig = plotter.plot_feature_coverage()
fig = plotter.plot_pattern_lengths()
fig = plotter.plot_support_distribution()
fig = plotter.plot_active_patterns(X, sample_idx=0) # local explanation
fig = plotter.plot_dashboard(X) # full multi-panel HTML
"""
from __future__ import annotations
from typing import Any
import numpy as np
import pandas as pd
try:
import plotly.graph_objects as go
from plotly.subplots import make_subplots
_PLOTLY = True
except ImportError:
_PLOTLY = False
__all__ = [
"HUGPlotter",
]
# ---------------------------------------------------------------------------
# Design tokens (matching the governance dashboard)
# ---------------------------------------------------------------------------
_T = {
"bg": "#0d1117",
"panel": "#161b22",
"border": "#30363d",
"grid": "#21262d",
"text": "#e6edf3",
"muted": "#8b949e",
"a1": "#58a6ff", # blue – utility / main bars
"a2": "#3fb950", # green – combinations +1
"a3": "#f78166", # red – combinations +3 / negative
"a4": "#d2a8ff", # purple– support dist histogram
"a5": "#ffa657", # orange– support line overlay
"font_b": "JetBrains Mono, monospace",
"font_t": "Syne, sans-serif",
}
# Plotly 6 rejects duplicate kwargs in update_layout(**_LAYOUT_BASE, margin=...).
# margin and legend are excluded here; each chart sets them in a separate call.
_LAYOUT_BASE = dict(
paper_bgcolor=_T["bg"],
plot_bgcolor=_T["panel"],
font=dict(color=_T["text"], family=_T["font_b"], size=11),
xaxis=dict(tickfont=dict(color=_T["muted"]), gridcolor=_T["grid"], linecolor=_T["border"]),
yaxis=dict(tickfont=dict(color=_T["muted"]), gridcolor=_T["grid"], linecolor=_T["border"]),
)
_LAYOUT_MARGIN = dict(l=44, r=20, t=50, b=40)
_LAYOUT_LEGEND = dict(
font=dict(color=_T["muted"]), bgcolor="rgba(0,0,0,0)", bordercolor=_T["border"]
)
def _title(text: str) -> dict:
return dict(text=text, font=dict(color=_T["a1"], size=13, family=_T["font_t"]))
def _require_plotly() -> None:
if not _PLOTLY:
raise ImportError("plotly is required for HUGPlotter. Install with: pip install plotly")
# ---------------------------------------------------------------------------
# HUGPlotter
# ---------------------------------------------------------------------------
def _parse_bin_lower(bin_label: str) -> float | None:
"""Parse the lower bound from a bin label like '[7.69,11.41]' or '[11.8,17.5)'.
Returns None for non-numeric labels (categorical values without brackets).
"""
if bin_label.startswith("["):
try:
return float(bin_label[1:].split(",")[0])
except (ValueError, IndexError):
pass
return None
def _get_bin_edges(clf, feature_name: str):
"""Return real-value bin edges for *feature_name*, or None if unavailable.
Works for both HUGIMLAdaptive (uses _bin_edges_) and native
HUGIMLClassifierNative (reconstructs from td_._cpp_all_edges).
"""
# HUGIMLAdaptive path — edges stored directly
if hasattr(clf, "_bin_edges_") and feature_name in clf._bin_edges_:
return np.array(clf._bin_edges_[feature_name])
# Native clf path — reconstruct from normalised C++ edges
feature_names = getattr(clf, "feature_names_in_", None)
if feature_names is None:
return None
try:
j = list(feature_names).index(feature_name)
except ValueError:
return None
td = getattr(clf, "td_", None)
if td is None:
return None
all_edges = getattr(td, "_cpp_all_edges", None)
if all_edges is None or j >= len(all_edges):
return None
edges_norm = np.array(all_edges[j])
if len(edges_norm) < 2:
return None
col_min_arr = getattr(td, "_cpp_col_min", None)
col_range_arr = getattr(td, "_cpp_col_range", None)
if col_min_arr is None or col_range_arr is None:
return None
col_min = float(col_min_arr[j])
col_range = float(col_range_arr[j])
return edges_norm * col_range + col_min
[docs]
class HUGPlotter:
"""Unified Plotly-based visualization interface for a fitted HUGIMLClassifierNative.
Parameters
----------
clf : fitted HUGIMLClassifierNative
height_default : int
Default figure height.
"""
def __init__(self, clf: Any, height_default: int = 380) -> None:
_require_plotly()
self._clf = clf
self._h = height_default
self._validate_fitted()
# ------------------------------------------------------------------
# 1. Marginal Bin Profile (= EBM per-feature shape function)
# ------------------------------------------------------------------
[docs]
def plot_marginal_bin_profile(
self,
feature_name: str,
X: Any | None = None,
height: int | None = None,
title: str | None = None,
) -> go.Figure:
"""1-D HUG profile — EBM shape function equivalent.
For a given feature, shows every singleton pattern bin as a bar
(x = bin label, y = utility, colour = information gain).
An orange dotted line overlays the training support fraction on the
right y-axis, mirroring the dashboard's "Marginal Bin Profile" card.
Parameters
----------
feature_name : str
X : ignored
Support uses training data stored in ``clf.x_train_hup_``.
height : int, optional
title : str, optional
Returns
-------
plotly.graph_objects.Figure
"""
clf = self._clf
all_labels = clf.get_hug_features()
n_train = clf.x_train_hup_.shape[0]
# ── Step 1: collect all singleton patterns for this feature ─────────
# Key: lower-bound float → (bin_label_str, util, ig, sup)
# Using lower bound as key lets us match patterns to edge-derived bins
# regardless of minor float formatting differences between the two.
mined: dict[float, tuple[str, float, float, float]] = {}
for idx, (label, pe) in enumerate(zip(all_labels, clf.patterns_)):
if len(pe.items) != 1:
continue
parts = label.split(", ")
if len(parts) == 1 and label.startswith(feature_name + "="):
bin_label = label[len(feature_name) + 1 :]
lo = _parse_bin_lower(bin_label)
util = float(pe.utility)
ig = float(pe.ig)
sup = float(clf.x_train_hup_[:, idx].sum()) / n_train * 100
key = lo if lo is not None else float("inf")
mined[key] = (bin_label, util, ig, sup)
# ── Step 2: build the dense ordered bin sequence ────────────────────
# Try to get ALL bin edges so empty bins (utility ≈ 0, not mined) are
# shown as zero-height bars rather than silently dropped.
real_edges = _get_bin_edges(clf, feature_name)
if real_edges is not None and len(real_edges) >= 2:
# Reconstruct every bin from edges; fill missing ones with zeros.
bins_x, bins_util, bins_ig, bins_sup, bins_mined = [], [], [], [], []
for i in range(len(real_edges) - 1):
lo_val = float(real_edges[i])
hi_val = float(real_edges[i + 1])
# Find the closest mined bin by lower bound (1% tolerance)
match = None
for key, data in mined.items():
if key != float("inf") and abs(key - lo_val) / (abs(lo_val) + 1e-9) < 0.02:
match = data
break
if match:
bins_x.append(match[0])
bins_util.append(match[1])
bins_ig.append(match[2])
bins_sup.append(match[3])
bins_mined.append(True)
else:
# Construct a readable label for the un-mined bin
bins_x.append(f"[{lo_val:.4g},{hi_val:.4g}]")
bins_util.append(0.0)
bins_ig.append(0.0)
bins_sup.append(0.0)
bins_mined.append(False)
else:
# Fallback: only mined bins, sorted left-to-right by lower bound
if not mined:
fig = go.Figure()
fig.update_layout(
**_LAYOUT_BASE,
title=_title(title or f"Marginal Bin Profile · {feature_name}"),
height=height or self._h,
)
fig.add_annotation(
text=f"No singleton patterns for '{feature_name}'. "
"Try reducing G or increasing topK.",
xref="paper",
yref="paper",
x=0.5,
y=0.5,
showarrow=False,
font=dict(color=_T["muted"]),
)
return fig
sorted_items = sorted(mined.items(), key=lambda kv: kv[0])
bins_x = [v[0] for _, v in sorted_items]
bins_util = [v[1] for _, v in sorted_items]
bins_ig = [v[2] for _, v in sorted_items]
bins_sup = [v[3] for _, v in sorted_items]
bins_mined = [True] * len(bins_x)
n_mined = sum(bins_mined)
n_total = len(bins_x)
subtitle = (
f"{n_mined}/{n_total} bins have patterns"
if n_mined < n_total
else f"All {n_total} bins have patterns"
)
# ── Step 3: colours — IG intensity for mined bins, gray for empty ───
max_ig = max(bins_ig) if any(bins_ig) else 1
colors = []
for ig, is_m in zip(bins_ig, bins_mined):
if is_m:
alpha = 0.4 + 0.6 * ig / (max_ig + 1e-9)
colors.append(f"rgba(88,166,255,{alpha:.2f})")
else:
colors.append("rgba(100,100,100,0.25)") # grey — no pattern mined
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(
go.Bar(
x=bins_x,
y=bins_util,
name="Utility",
marker=dict(color=colors, line=dict(color=_T["border"], width=0.5)),
hovertemplate="<b>%{x}</b><br>Utility: %{y:.4f}<extra></extra>",
),
secondary_y=False,
)
fig.add_trace(
go.Scatter(
x=bins_x,
y=bins_sup,
name="Support %",
mode="lines+markers",
line=dict(color=_T["a5"], dash="dot", width=2.5),
marker=dict(color=_T["a5"], size=7),
hovertemplate="Support: %{y:.1f}%<extra></extra>",
),
secondary_y=True,
)
fig.update_layout(
**_LAYOUT_BASE,
title=_title(title or f"Marginal Bin Profile · {feature_name}"),
height=height or self._h,
annotations=[
dict(
text=subtitle,
xref="paper",
yref="paper",
x=0.99,
y=0.99,
showarrow=False,
xanchor="right",
font=dict(size=10, color=_T["muted"]),
)
],
)
fig.update_layout(
legend=dict(
x=0.01,
y=0.99,
font=dict(color=_T["muted"]),
bgcolor="rgba(0,0,0,0)",
bordercolor=_T["border"],
)
)
fig.update_xaxes(
title_text="Bin Range",
tickangle=-35,
tickfont=dict(size=9, color=_T["muted"]),
gridcolor=_T["grid"],
linecolor=_T["border"],
)
fig.update_yaxes(
title_text="Utility",
secondary_y=False,
title_font=dict(color=_T["a1"]),
tickfont=dict(color=_T["muted"]),
gridcolor=_T["grid"],
linecolor=_T["border"],
)
fig.update_yaxes(
title_text="Support %",
secondary_y=True,
title_font=dict(color=_T["a5"]),
showgrid=False,
tickfont=dict(color=_T["a5"]),
)
return fig
# ------------------------------------------------------------------
# 2. Feature Combinations
# ------------------------------------------------------------------
[docs]
def plot_feature_combinations(
self,
feature_name: str,
top_n: int = 25,
height: int | None = None,
title: str | None = None,
) -> go.Figure:
"""Compound patterns that include a specific feature.
Each bar = one compound pattern; bars coloured by the number of
extra features (+1 = green, +2 = orange, +3 = red), matching the
dashboard's "Feature Combinations" card.
Parameters
----------
feature_name : str
top_n : int
height : int, optional
title : str, optional
Returns
-------
go.Figure
"""
clf = self._clf
all_labels = clf.get_hug_features()
n_train = clf.x_train_hup_.shape[0]
# Collect compound patterns for this feature.
# Split compound labels ("feat1=[lo,hi], feat2=val") and check each
# part with startswith to avoid substring matches like "age" in "mortgage_age=...".
rows = []
for idx, (label, pe) in enumerate(zip(all_labels, clf.patterns_)):
if len(pe.items) < 2:
continue
parts = [p.strip() for p in label.split(", ")]
if not any(p.startswith(feature_name + "=") for p in parts):
continue
util = float(pe.utility)
sup = float(clf.x_train_hup_[:, idx].sum()) / n_train * 100
extra = len(pe.items) - 1
rows.append({"label": label, "util": util, "extra": extra, "sup": sup})
fig = go.Figure()
if not rows:
fig.update_layout(
**_LAYOUT_BASE,
title=_title(title or f"Feature Combinations · {feature_name}"),
height=height or self._h,
)
fig.add_annotation(
text=f"No compound patterns for '{feature_name}'",
xref="paper",
yref="paper",
x=0.5,
y=0.5,
showarrow=False,
font=dict(color=_T["muted"]),
)
return fig
df = pd.DataFrame(rows).sort_values("util", ascending=False).head(top_n)[::-1]
# Colour by extra-feature count
color_map = {1: _T["a2"], 2: _T["a5"], 3: _T["a3"]}
colors = [color_map.get(e, _T["a4"]) for e in df["extra"]]
fig.add_trace(
go.Bar(
x=df["util"],
y=[
f"{r['label'][:45]}…" if len(r["label"]) > 45 else r["label"]
for _, r in df.iterrows()
],
orientation="h",
marker=dict(color=colors, line=dict(color=_T["border"], width=0.3)),
text=[f"supp={r['sup']:.1f}%" for _, r in df.iterrows()],
textfont=dict(color=_T["muted"], size=8),
textposition="outside",
hovertemplate="<b>%{y}</b><br>Utility: %{x:.4f}<extra></extra>",
)
)
# Legend annotation
fig.add_annotation(
text=(
f"<span style='color:{_T['a2']}'>■</span> +1 "
f"<span style='color:{_T['a5']}'>■</span> +2 "
f"<span style='color:{_T['a3']}'>■</span> +3 extra features"
),
x=1.01,
xanchor="right",
xref="paper",
y=1.06,
yref="paper",
showarrow=False,
font=dict(color=_T["muted"], size=9),
)
fig.update_layout(
**_LAYOUT_BASE,
title=_title(title or f"Feature Combinations · {feature_name}"),
height=max(320, len(df) * 22 + 80) if height is None else height,
)
fig.update_xaxes(
title_text="Utility",
tickfont=dict(color=_T["muted"]),
gridcolor=_T["grid"],
linecolor=_T["border"],
)
fig.update_yaxes(
tickfont=dict(size=9, color=_T["muted"]),
autorange="reversed",
gridcolor=_T["grid"],
linecolor=_T["border"],
)
return fig
# ------------------------------------------------------------------
# 3. Feature Importance
# ------------------------------------------------------------------
[docs]
def plot_feature_importance(
self,
top_n: int = 15,
height: int | None = None,
title: str | None = None,
) -> go.Figure:
"""Feature importance: mean utility per feature, coloured by mean IG.
Matches the "Feature Importance" card in the governance dashboard.
Parameters
----------
top_n : int
height : int, optional
title : str, optional
Returns
-------
go.Figure
"""
clf = self._clf
all_labels = clf.get_hug_features()
feat_stats: dict[str, dict] = {}
for idx, (label, pe) in enumerate(zip(all_labels, clf.patterns_)):
parts = label.split(", ")
for part in parts:
if "=" in part:
fname = part.split("=")[0]
entry = feat_stats.setdefault(fname, {"utils": [], "igs": [], "n": 0})
entry["utils"].append(float(pe.utility))
entry["igs"].append(float(pe.ig))
entry["n"] += 1
rows = []
for fname, v in feat_stats.items():
rows.append(
{
"feature": fname,
"mean_util": float(np.mean(v["utils"])),
"mean_ig": float(np.mean(v["igs"])),
"n": v["n"],
}
)
df = pd.DataFrame(rows).sort_values("mean_util", ascending=False).head(top_n)[::-1]
fig = go.Figure(
go.Bar(
x=df["mean_util"],
y=df["feature"],
orientation="h",
marker=dict(
color=df["mean_ig"],
colorscale=[[0, "#30123b"], [0.5, "#1bcfd4"], [1, "#7a0402"]],
showscale=True,
colorbar=dict(
thickness=11,
tickfont=dict(color=_T["muted"]),
title=dict(font=dict(color=_T["muted"]), text="Mean IG"),
),
line=dict(color=_T["border"], width=0.3),
),
text=[f"n={r['n']}" for _, r in df.iterrows()],
textfont=dict(color=_T["muted"], size=9),
textposition="outside",
hovertemplate="<b>%{y}</b><br>Mean U: %{x:.4f}<extra></extra>",
)
)
fig.update_layout(
**_LAYOUT_BASE,
title=_title(title or "Feature Importance"),
height=height or max(280, len(df) * 28 + 80),
margin=dict(l=44, r=20, t=76, b=40),
)
fig.update_xaxes(
title_text="Mean Utility",
tickfont=dict(color=_T["muted"]),
gridcolor=_T["grid"],
linecolor=_T["border"],
)
fig.update_yaxes(
autorange="reversed",
tickfont=dict(color=_T["muted"]),
gridcolor=_T["grid"],
linecolor=_T["border"],
)
return fig
# ------------------------------------------------------------------
# 4. Utility vs Information Gain scatter
# ------------------------------------------------------------------
[docs]
def plot_utility_vs_ig(
self,
feature_filter: str | None = None,
height: int | None = None,
title: str | None = None,
) -> go.Figure:
"""Scatter: utility (x) × information gain (y), coloured by support.
Matches the "Utility vs Info Gain" card in the governance dashboard.
Optionally filter to patterns containing one feature.
Parameters
----------
feature_filter : str, optional
If given, highlight only patterns for this feature.
height : int, optional
title : str, optional
Returns
-------
go.Figure
"""
clf = self._clf
all_labels = clf.get_hug_features()
n_train = clf.x_train_hup_.shape[0]
x_util, y_ig, z_sup, texts = [], [], [], []
for idx, (label, pe) in enumerate(zip(all_labels, clf.patterns_)):
if feature_filter and feature_filter + "=" not in label:
continue
x_util.append(float(pe.utility))
y_ig.append(float(pe.ig))
z_sup.append(float(clf.x_train_hup_[:, idx].sum()) / n_train)
texts.append(label)
fig = go.Figure(
go.Scatter(
x=x_util,
y=y_ig,
mode="markers",
text=texts,
hovertemplate="<b>%{text}</b><br>U=%{x:.4f} IG=%{y:.4f}<extra></extra>",
marker=dict(
color=z_sup,
colorscale="Viridis",
showscale=True,
colorbar=dict(
thickness=11,
tickfont=dict(color=_T["muted"]),
title=dict(font=dict(color=_T["muted"]), text="Support"),
),
line=dict(color=_T["border"], width=0.4),
opacity=0.85,
size=9,
),
)
)
fig.update_layout(
**_LAYOUT_BASE,
title=_title(
title
or ("Utility vs Info Gain" + (f" · {feature_filter}" if feature_filter else ""))
),
height=height or 430,
margin=dict(l=44, r=20, t=82, b=40),
)
fig.update_xaxes(
title_text="Utility",
tickfont=dict(color=_T["muted"]),
gridcolor=_T["grid"],
linecolor=_T["border"],
)
fig.update_yaxes(
title_text="Information Gain",
tickfont=dict(color=_T["muted"]),
gridcolor=_T["grid"],
linecolor=_T["border"],
)
return fig
# ------------------------------------------------------------------
# 5. Top Patterns
# ------------------------------------------------------------------
[docs]
def plot_top_patterns(
self,
top_n: int = 20,
height: int | None = None,
title: str | None = None,
) -> go.Figure:
"""Horizontal bar chart of top-N patterns by utility, coloured by IG.
Matches the "Top Patterns" card in the governance dashboard.
Parameters
----------
top_n : int
height : int, optional
title : str, optional
Returns
-------
go.Figure
"""
clf = self._clf
all_labels = clf.get_hug_features()
n_train = clf.x_train_hup_.shape[0]
rows = []
for idx, (label, pe) in enumerate(zip(all_labels, clf.patterns_)):
sup = float(clf.x_train_hup_[:, idx].sum()) / n_train
rows.append({"label": label, "util": float(pe.utility), "ig": float(pe.ig), "sup": sup})
df = pd.DataFrame(rows).sort_values("util", ascending=False).head(top_n)[::-1]
short = [
f"{r['label'][:40]}…" if len(r["label"]) > 40 else r["label"] for _, r in df.iterrows()
]
fig = go.Figure(
go.Bar(
x=df["util"],
y=short,
orientation="h",
marker=dict(
color=df["ig"],
colorscale=[[0, "#0d0887"], [0.5, "#bd3786"], [1, "#f0f921"]],
showscale=True,
colorbar=dict(
thickness=11,
tickfont=dict(color=_T["muted"]),
title=dict(font=dict(color=_T["muted"]), text="IG"),
),
line=dict(color=_T["border"], width=0.3),
),
text=[f"s={r['sup']:.3f}" for _, r in df.iterrows()],
textfont=dict(color=_T["muted"], size=9),
textposition="outside",
hovertemplate="<b>%{y}</b><br>Utility: %{x:.4f}<extra></extra>",
)
)
fig.update_layout(
**_LAYOUT_BASE,
title=_title(title or "Top Patterns"),
height=height or max(280, len(df) * 24 + 80),
margin=dict(l=44, r=20, t=76, b=40),
)
fig.update_xaxes(
title_text="Utility",
tickfont=dict(color=_T["muted"]),
gridcolor=_T["grid"],
linecolor=_T["border"],
)
fig.update_yaxes(
autorange="reversed",
tickfont=dict(color=_T["muted"]),
gridcolor=_T["grid"],
linecolor=_T["border"],
)
return fig
# ------------------------------------------------------------------
# 6. Feature Coverage
# ------------------------------------------------------------------
[docs]
def plot_feature_coverage(
self,
top_n: int = 15,
height: int | None = None,
title: str | None = None,
) -> go.Figure:
"""Horizontal bar: how many patterns reference each feature.
Matches the "Feature Coverage" card in the governance dashboard.
"""
clf = self._clf
all_labels = clf.get_hug_features()
from collections import Counter
feat_counts: Counter = Counter()
for label in all_labels:
for part in label.split(", "):
if "=" in part:
feat_counts[part.split("=")[0]] += 1
df = pd.DataFrame(feat_counts.most_common(top_n), columns=["feature", "count"]).iloc[::-1]
fig = go.Figure(
go.Bar(
x=df["count"],
y=df["feature"],
orientation="h",
marker=dict(color=_T["a2"], line=dict(color=_T["border"], width=0.3)),
text=df["count"].astype(str),
textposition="auto",
)
)
fig.update_layout(
**_LAYOUT_BASE,
title=_title(title or "Feature Coverage"),
height=height or max(280, len(df) * 26 + 80),
margin=dict(l=44, r=20, t=76, b=40),
)
fig.update_xaxes(
title_text="# Patterns",
tickfont=dict(color=_T["muted"]),
gridcolor=_T["grid"],
linecolor=_T["border"],
)
fig.update_yaxes(
autorange="reversed",
tickfont=dict(color=_T["muted"]),
gridcolor=_T["grid"],
linecolor=_T["border"],
)
return fig
# ------------------------------------------------------------------
# 7. Pattern Lengths
# ------------------------------------------------------------------
[docs]
def plot_pattern_lengths(
self,
height: int | None = None,
title: str | None = None,
) -> go.Figure:
"""Bar chart of pattern length distribution.
Matches the "Pattern Lengths" card in the governance dashboard.
"""
from collections import Counter
lengths = Counter(len(pe.items) for pe in self._clf.patterns_)
xs = sorted(lengths.keys())
colors = [_T["a1"] if i == 0 else _T["a2"] for i in range(len(xs))]
fig = go.Figure(
go.Bar(
x=[f"Length {x}" for x in xs],
y=[lengths[x] for x in xs],
marker=dict(color=colors[: len(xs)]),
text=[str(lengths[x]) for x in xs],
textposition="auto",
)
)
fig.update_layout(
**_LAYOUT_BASE,
title=_title(title or "Pattern Lengths"),
height=height or 280,
)
fig.update_xaxes(
title_text="Pattern Length",
tickfont=dict(color=_T["muted"]),
gridcolor=_T["grid"],
linecolor=_T["border"],
)
fig.update_yaxes(
title_text="Count",
tickfont=dict(color=_T["muted"]),
gridcolor=_T["grid"],
linecolor=_T["border"],
)
return fig
# ------------------------------------------------------------------
# 8. Support Distribution
# ------------------------------------------------------------------
[docs]
def plot_support_distribution(
self,
height: int | None = None,
title: str | None = None,
) -> go.Figure:
"""Histogram of pattern support values.
Matches the "Support Distribution" card in the governance dashboard.
"""
n_train = self._clf.x_train_hup_.shape[0]
supports = [
float(self._clf.x_train_hup_[:, i].sum()) / n_train
for i in range(len(self._clf.patterns_))
]
fig = go.Figure(
go.Histogram(
x=supports,
nbinsx=25,
marker=dict(color=_T["a4"], line=dict(color=_T["border"], width=0.3)),
)
)
fig.update_layout(
**_LAYOUT_BASE,
title=_title(title or "Support Distribution"),
height=height or 280,
)
fig.update_xaxes(
title_text="Support",
tickfont=dict(color=_T["muted"]),
gridcolor=_T["grid"],
linecolor=_T["border"],
)
fig.update_yaxes(
title_text="Count",
tickfont=dict(color=_T["muted"]),
gridcolor=_T["grid"],
linecolor=_T["border"],
)
return fig
# ------------------------------------------------------------------
# 9. Active-pattern local explanation
# ------------------------------------------------------------------
[docs]
def plot_active_patterns(
self,
X: Any,
sample_idx: int = 0,
max_patterns: int = 20,
height: int | None = None,
title: str | None = None,
) -> go.Figure:
"""Local explanation: active HUG patterns for a single sample.
Shows active patterns sorted by absolute coefficient magnitude, coloured blue
for positive coefficients and red for negative coefficients.
Parameters
----------
X : array-like or DataFrame
sample_idx : int
max_patterns : int
height : int, optional
title : str, optional
Returns
-------
go.Figure
"""
clf = self._clf
is_df = isinstance(X, pd.DataFrame)
row = X.iloc[[sample_idx]] if is_df else X[sample_idx : sample_idx + 1]
hup = clf.transform(row)
active_cols = hup[0].nonzero()[1].tolist()
all_labels = clf.get_hug_features()
try:
imp = clf.feature_importances()
coef_map = dict(zip(imp["pattern"], imp["coefficient"]))
except Exception:
coef_map = {}
records = []
for col_idx in active_cols:
if col_idx < len(all_labels):
lbl = all_labels[col_idx]
records.append({"label": lbl, "coef": coef_map.get(lbl, 0.0)})
records.sort(key=lambda r: abs(r["coef"]), reverse=True)
records = records[:max_patterns][::-1]
fig = go.Figure()
if not records:
fig.update_layout(
**_LAYOUT_BASE,
title=_title(f"Active Patterns — sample #{sample_idx}"),
height=height or self._h,
)
fig.add_annotation(
text="No active patterns for this sample.",
xref="paper",
yref="paper",
x=0.5,
y=0.5,
showarrow=False,
font=dict(color=_T["muted"]),
)
return fig
coefs = [r["coef"] for r in records]
colors = [_T["a1"] if c >= 0 else _T["a3"] for c in coefs]
labels = [f"{r['label'][:50]}…" if len(r["label"]) > 50 else r["label"] for r in records]
fig.add_trace(
go.Bar(
x=coefs,
y=labels,
orientation="h",
marker=dict(color=colors, line=dict(color=_T["border"], width=0.3)),
hovertemplate="<b>%{y}</b><br>Coef: %{x:.4f}<extra></extra>",
)
)
n_active = len(active_cols)
fig.update_layout(
**_LAYOUT_BASE,
title=_title(title or f"Active Patterns — sample #{sample_idx} ({n_active} active)"),
height=height or max(280, len(records) * 24 + 80),
)
fig.update_xaxes(
title_text="Coefficient",
tickfont=dict(color=_T["muted"]),
gridcolor=_T["grid"],
linecolor=_T["border"],
)
fig.update_yaxes(
tickfont=dict(size=9, color=_T["muted"]), gridcolor=_T["grid"], linecolor=_T["border"]
)
return fig
# ------------------------------------------------------------------
# 10. Radar / performance summary
# ------------------------------------------------------------------
# ------------------------------------------------------------------
# 11. 2-D HUG profile heatmap
# ------------------------------------------------------------------
[docs]
def plot_2d_profile(
self,
feature_a: str,
feature_b: str,
height: int | None = None,
title: str | None = None,
) -> go.Figure:
"""2-D HUG profile heatmap for compound patterns involving two features.
Parameters
----------
feature_a, feature_b : str
height : int, optional
title : str, optional
Returns
-------
go.Figure
"""
clf = self._clf
all_labels = clf.get_hug_features()
def _bin(label: str, feat: str) -> str:
for p in label.split(", "):
if p.startswith(feat + "="):
return p[len(feat) + 1 :]
return "?"
compound = [
(lbl, pe)
for lbl, pe in zip(all_labels, clf.patterns_)
if feature_a + "=" in lbl and feature_b + "=" in lbl
]
fig = go.Figure()
if not compound:
fig.update_layout(
**_LAYOUT_BASE,
title=_title(title or f"2-D HUG Profile · {feature_a} × {feature_b}"),
height=height or self._h,
)
fig.add_annotation(
text=f"No compound patterns for '{feature_a}' × '{feature_b}'.\nTry L≥2 and lower G.",
xref="paper",
yref="paper",
x=0.5,
y=0.5,
showarrow=False,
font=dict(color=_T["muted"]),
)
return fig
bins_a = sorted(set(_bin(lbl, feature_a) for lbl, _ in compound))
bins_b = sorted(set(_bin(lbl, feature_b) for lbl, _ in compound))
idx_a = {b: i for i, b in enumerate(bins_a)}
idx_b = {b: i for i, b in enumerate(bins_b)}
grid = np.zeros((len(bins_a), len(bins_b)))
for lbl, pe in compound:
ia = idx_a.get(_bin(lbl, feature_a))
ib = idx_b.get(_bin(lbl, feature_b))
if ia is not None and ib is not None:
grid[ia, ib] += float(pe.utility)
fig.add_trace(
go.Heatmap(
z=grid,
x=bins_b,
y=bins_a,
colorscale="RdBu_r",
zmid=0,
colorbar=dict(
tickfont=dict(color=_T["muted"]),
title=dict(font=dict(color=_T["muted"]), text="aggregate utility"),
),
)
)
fig.update_layout(
**_LAYOUT_BASE,
title=_title(title or f"2-D HUG Profile · {feature_a} × {feature_b}"),
height=height or self._h,
)
fig.update_xaxes(
title_text=feature_b,
tickangle=-35,
tickfont=dict(size=9, color=_T["muted"]),
gridcolor=_T["grid"],
linecolor=_T["border"],
)
fig.update_yaxes(
title_text=feature_a,
tickfont=dict(size=9, color=_T["muted"]),
gridcolor=_T["grid"],
linecolor=_T["border"],
)
return fig
# ------------------------------------------------------------------
# 12. Full HTML dashboard
# ------------------------------------------------------------------
[docs]
def plot_dashboard(
self,
X: Any,
dataset_name: str = "Dataset",
feature_names_for_profile: list[str] | None = None,
output_path: str | None = None,
) -> str:
"""Generate a self-contained multi-panel HTML dashboard.
Produces performance overview, feature importance, utility-vs-IG,
top patterns, pattern lengths, support distribution, feature coverage,
and per-feature marginal bin profiles.
Parameters
----------
X : array-like or DataFrame
Used for active-pattern coverage check.
dataset_name : str
feature_names_for_profile : list of str, optional
Which features to include marginal bin profiles for.
Defaults to all features that have singleton patterns.
output_path : str, optional
If given, writes the HTML to this path.
Returns
-------
str (HTML string)
"""
clf = self._clf
all_labels = clf.get_hug_features()
# Find features with singleton patterns
singleton_feats = sorted(
{
lbl.split(", ")[0].split("=")[0]
for lbl, pe in zip(all_labels, clf.patterns_)
if len(pe.items) == 1
}
)
if feature_names_for_profile is None:
feature_names_for_profile = singleton_feats[:12] # limit for dashboard
# Build all figures
figs = [
self.plot_feature_importance(top_n=10),
self.plot_top_patterns(top_n=15),
self.plot_utility_vs_ig(),
self.plot_feature_coverage(top_n=10),
self.plot_pattern_lengths(),
self.plot_support_distribution(),
]
# Per-feature bin profiles
for fname in feature_names_for_profile:
figs.append(self.plot_marginal_bin_profile(fname))
figs.append(self.plot_feature_combinations(fname, top_n=15))
# Assemble HTML — use plotly's own bundled JS so the file is self-contained
# and works without a network connection (no CDN pin to go stale).
import plotly.io as pio
html_parts = [
"<!DOCTYPE html><html><head>",
"<meta charset='utf-8'>",
f"<title>HUG-IML Dashboard — {dataset_name}</title>",
"<style>",
f"body{{background:{_T['bg']};color:{_T['text']};font-family:{_T['font_b']};margin:0;padding:16px;}}",
f"h1{{font-family:{_T['font_t']};color:{_T['a1']};}}",
".grid-2{{display:grid;grid-template-columns:1fr 1fr;gap:14px;margin-bottom:14px;}}",
".card{{background:{panel};border:1px solid {border};border-radius:8px;overflow:hidden;}}".format(
panel=_T["panel"], border=_T["border"]
),
"</style></head><body>",
f"<h1>HUG-IML Governance Dashboard — {dataset_name}</h1>",
"<div class='grid-2'>",
]
for i, fig in enumerate(figs):
fig_html = pio.to_html(fig, full_html=False, include_plotlyjs=(i == 0))
html_parts.append(f"<div class='card'>{fig_html}</div>")
if (i + 1) % 2 == 0 and i + 1 < len(figs):
html_parts.append("</div><div class='grid-2'>")
html_parts += ["</div></body></html>"]
html = "\n".join(html_parts)
if output_path:
with open(output_path, "w", encoding="utf-8") as fh:
fh.write(html)
return html
# ------------------------------------------------------------------
def _validate_fitted(self) -> None:
if not hasattr(self._clf, "patterns_"):
raise RuntimeError("Classifier must be fitted before creating a HUGPlotter.")