Source code for hugiml.plots

# Copyright 2026 Srikumar Krishnamoorthy
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
HUG-IML first-class visualizations using Plotly.

Public API
----------
    from hugiml.plots import HUGPlotter

    plotter = HUGPlotter(clf)
    fig = plotter.plot_marginal_bin_profile("age", X)   # EBM shape-function equivalent
    fig = plotter.plot_feature_combinations("age")       # compound patterns for one feature
    fig = plotter.plot_feature_importance(top_n=15)
    fig = plotter.plot_utility_vs_ig()                   # scatter: utility × IG × support
    fig = plotter.plot_top_patterns(top_n=20)
    fig = plotter.plot_feature_coverage()
    fig = plotter.plot_pattern_lengths()
    fig = plotter.plot_support_distribution()
    fig = plotter.plot_active_patterns(X, sample_idx=0) # local explanation
    fig = plotter.plot_dashboard(X)                      # full multi-panel HTML
"""

from __future__ import annotations

from typing import Any

import numpy as np
import pandas as pd

try:
    import plotly.graph_objects as go
    from plotly.subplots import make_subplots

    _PLOTLY = True
except ImportError:
    _PLOTLY = False


__all__ = [
    "HUGPlotter",
]

# ---------------------------------------------------------------------------
# Design tokens (matching the governance dashboard)
# ---------------------------------------------------------------------------

_T = {
    "bg": "#0d1117",
    "panel": "#161b22",
    "border": "#30363d",
    "grid": "#21262d",
    "text": "#e6edf3",
    "muted": "#8b949e",
    "a1": "#58a6ff",  # blue  – utility / main bars
    "a2": "#3fb950",  # green – combinations +1
    "a3": "#f78166",  # red   – combinations +3 / negative
    "a4": "#d2a8ff",  # purple– support dist histogram
    "a5": "#ffa657",  # orange– support line overlay
    "font_b": "JetBrains Mono, monospace",
    "font_t": "Syne, sans-serif",
}

# Plotly 6 rejects duplicate kwargs in update_layout(**_LAYOUT_BASE, margin=...).
# margin and legend are excluded here; each chart sets them in a separate call.
_LAYOUT_BASE = dict(
    paper_bgcolor=_T["bg"],
    plot_bgcolor=_T["panel"],
    font=dict(color=_T["text"], family=_T["font_b"], size=11),
    xaxis=dict(tickfont=dict(color=_T["muted"]), gridcolor=_T["grid"], linecolor=_T["border"]),
    yaxis=dict(tickfont=dict(color=_T["muted"]), gridcolor=_T["grid"], linecolor=_T["border"]),
)
_LAYOUT_MARGIN = dict(l=44, r=20, t=50, b=40)
_LAYOUT_LEGEND = dict(
    font=dict(color=_T["muted"]), bgcolor="rgba(0,0,0,0)", bordercolor=_T["border"]
)


def _title(text: str) -> dict:
    return dict(text=text, font=dict(color=_T["a1"], size=13, family=_T["font_t"]))


def _require_plotly() -> None:
    if not _PLOTLY:
        raise ImportError("plotly is required for HUGPlotter. Install with: pip install plotly")


# ---------------------------------------------------------------------------
# HUGPlotter
# ---------------------------------------------------------------------------


def _parse_bin_lower(bin_label: str) -> float | None:
    """Parse the lower bound from a bin label like '[7.69,11.41]' or '[11.8,17.5)'.

    Returns None for non-numeric labels (categorical values without brackets).
    """
    if bin_label.startswith("["):
        try:
            return float(bin_label[1:].split(",")[0])
        except (ValueError, IndexError):
            pass
    return None


def _get_bin_edges(clf, feature_name: str):
    """Return real-value bin edges for *feature_name*, or None if unavailable.

    Works for both HUGIMLAdaptive (uses _bin_edges_) and native
    HUGIMLClassifierNative (reconstructs from td_._cpp_all_edges).
    """
    # HUGIMLAdaptive path — edges stored directly
    if hasattr(clf, "_bin_edges_") and feature_name in clf._bin_edges_:
        return np.array(clf._bin_edges_[feature_name])

    # Native clf path — reconstruct from normalised C++ edges
    feature_names = getattr(clf, "feature_names_in_", None)
    if feature_names is None:
        return None
    try:
        j = list(feature_names).index(feature_name)
    except ValueError:
        return None
    td = getattr(clf, "td_", None)
    if td is None:
        return None
    all_edges = getattr(td, "_cpp_all_edges", None)
    if all_edges is None or j >= len(all_edges):
        return None
    edges_norm = np.array(all_edges[j])
    if len(edges_norm) < 2:
        return None
    col_min_arr = getattr(td, "_cpp_col_min", None)
    col_range_arr = getattr(td, "_cpp_col_range", None)
    if col_min_arr is None or col_range_arr is None:
        return None
    col_min = float(col_min_arr[j])
    col_range = float(col_range_arr[j])
    return edges_norm * col_range + col_min


[docs] class HUGPlotter: """Unified Plotly-based visualization interface for a fitted HUGIMLClassifierNative. Parameters ---------- clf : fitted HUGIMLClassifierNative height_default : int Default figure height. """ def __init__(self, clf: Any, height_default: int = 380) -> None: _require_plotly() self._clf = clf self._h = height_default self._validate_fitted() # ------------------------------------------------------------------ # 1. Marginal Bin Profile (= EBM per-feature shape function) # ------------------------------------------------------------------
[docs] def plot_marginal_bin_profile( self, feature_name: str, X: Any | None = None, height: int | None = None, title: str | None = None, ) -> go.Figure: """1-D HUG profile — EBM shape function equivalent. For a given feature, shows every singleton pattern bin as a bar (x = bin label, y = utility, colour = information gain). An orange dotted line overlays the training support fraction on the right y-axis, mirroring the dashboard's "Marginal Bin Profile" card. Parameters ---------- feature_name : str X : ignored Support uses training data stored in ``clf.x_train_hup_``. height : int, optional title : str, optional Returns ------- plotly.graph_objects.Figure """ clf = self._clf all_labels = clf.get_hug_features() n_train = clf.x_train_hup_.shape[0] # ── Step 1: collect all singleton patterns for this feature ───────── # Key: lower-bound float → (bin_label_str, util, ig, sup) # Using lower bound as key lets us match patterns to edge-derived bins # regardless of minor float formatting differences between the two. mined: dict[float, tuple[str, float, float, float]] = {} for idx, (label, pe) in enumerate(zip(all_labels, clf.patterns_)): if len(pe.items) != 1: continue parts = label.split(", ") if len(parts) == 1 and label.startswith(feature_name + "="): bin_label = label[len(feature_name) + 1 :] lo = _parse_bin_lower(bin_label) util = float(pe.utility) ig = float(pe.ig) sup = float(clf.x_train_hup_[:, idx].sum()) / n_train * 100 key = lo if lo is not None else float("inf") mined[key] = (bin_label, util, ig, sup) # ── Step 2: build the dense ordered bin sequence ──────────────────── # Try to get ALL bin edges so empty bins (utility ≈ 0, not mined) are # shown as zero-height bars rather than silently dropped. real_edges = _get_bin_edges(clf, feature_name) if real_edges is not None and len(real_edges) >= 2: # Reconstruct every bin from edges; fill missing ones with zeros. bins_x, bins_util, bins_ig, bins_sup, bins_mined = [], [], [], [], [] for i in range(len(real_edges) - 1): lo_val = float(real_edges[i]) hi_val = float(real_edges[i + 1]) # Find the closest mined bin by lower bound (1% tolerance) match = None for key, data in mined.items(): if key != float("inf") and abs(key - lo_val) / (abs(lo_val) + 1e-9) < 0.02: match = data break if match: bins_x.append(match[0]) bins_util.append(match[1]) bins_ig.append(match[2]) bins_sup.append(match[3]) bins_mined.append(True) else: # Construct a readable label for the un-mined bin bins_x.append(f"[{lo_val:.4g},{hi_val:.4g}]") bins_util.append(0.0) bins_ig.append(0.0) bins_sup.append(0.0) bins_mined.append(False) else: # Fallback: only mined bins, sorted left-to-right by lower bound if not mined: fig = go.Figure() fig.update_layout( **_LAYOUT_BASE, title=_title(title or f"Marginal Bin Profile · {feature_name}"), height=height or self._h, ) fig.add_annotation( text=f"No singleton patterns for '{feature_name}'. " "Try reducing G or increasing topK.", xref="paper", yref="paper", x=0.5, y=0.5, showarrow=False, font=dict(color=_T["muted"]), ) return fig sorted_items = sorted(mined.items(), key=lambda kv: kv[0]) bins_x = [v[0] for _, v in sorted_items] bins_util = [v[1] for _, v in sorted_items] bins_ig = [v[2] for _, v in sorted_items] bins_sup = [v[3] for _, v in sorted_items] bins_mined = [True] * len(bins_x) n_mined = sum(bins_mined) n_total = len(bins_x) subtitle = ( f"{n_mined}/{n_total} bins have patterns" if n_mined < n_total else f"All {n_total} bins have patterns" ) # ── Step 3: colours — IG intensity for mined bins, gray for empty ─── max_ig = max(bins_ig) if any(bins_ig) else 1 colors = [] for ig, is_m in zip(bins_ig, bins_mined): if is_m: alpha = 0.4 + 0.6 * ig / (max_ig + 1e-9) colors.append(f"rgba(88,166,255,{alpha:.2f})") else: colors.append("rgba(100,100,100,0.25)") # grey — no pattern mined fig = make_subplots(specs=[[{"secondary_y": True}]]) fig.add_trace( go.Bar( x=bins_x, y=bins_util, name="Utility", marker=dict(color=colors, line=dict(color=_T["border"], width=0.5)), hovertemplate="<b>%{x}</b><br>Utility: %{y:.4f}<extra></extra>", ), secondary_y=False, ) fig.add_trace( go.Scatter( x=bins_x, y=bins_sup, name="Support %", mode="lines+markers", line=dict(color=_T["a5"], dash="dot", width=2.5), marker=dict(color=_T["a5"], size=7), hovertemplate="Support: %{y:.1f}%<extra></extra>", ), secondary_y=True, ) fig.update_layout( **_LAYOUT_BASE, title=_title(title or f"Marginal Bin Profile · {feature_name}"), height=height or self._h, annotations=[ dict( text=subtitle, xref="paper", yref="paper", x=0.99, y=0.99, showarrow=False, xanchor="right", font=dict(size=10, color=_T["muted"]), ) ], ) fig.update_layout( legend=dict( x=0.01, y=0.99, font=dict(color=_T["muted"]), bgcolor="rgba(0,0,0,0)", bordercolor=_T["border"], ) ) fig.update_xaxes( title_text="Bin Range", tickangle=-35, tickfont=dict(size=9, color=_T["muted"]), gridcolor=_T["grid"], linecolor=_T["border"], ) fig.update_yaxes( title_text="Utility", secondary_y=False, title_font=dict(color=_T["a1"]), tickfont=dict(color=_T["muted"]), gridcolor=_T["grid"], linecolor=_T["border"], ) fig.update_yaxes( title_text="Support %", secondary_y=True, title_font=dict(color=_T["a5"]), showgrid=False, tickfont=dict(color=_T["a5"]), ) return fig
# ------------------------------------------------------------------ # 2. Feature Combinations # ------------------------------------------------------------------
[docs] def plot_feature_combinations( self, feature_name: str, top_n: int = 25, height: int | None = None, title: str | None = None, ) -> go.Figure: """Compound patterns that include a specific feature. Each bar = one compound pattern; bars coloured by the number of extra features (+1 = green, +2 = orange, +3 = red), matching the dashboard's "Feature Combinations" card. Parameters ---------- feature_name : str top_n : int height : int, optional title : str, optional Returns ------- go.Figure """ clf = self._clf all_labels = clf.get_hug_features() n_train = clf.x_train_hup_.shape[0] # Collect compound patterns for this feature. # Split compound labels ("feat1=[lo,hi], feat2=val") and check each # part with startswith to avoid substring matches like "age" in "mortgage_age=...". rows = [] for idx, (label, pe) in enumerate(zip(all_labels, clf.patterns_)): if len(pe.items) < 2: continue parts = [p.strip() for p in label.split(", ")] if not any(p.startswith(feature_name + "=") for p in parts): continue util = float(pe.utility) sup = float(clf.x_train_hup_[:, idx].sum()) / n_train * 100 extra = len(pe.items) - 1 rows.append({"label": label, "util": util, "extra": extra, "sup": sup}) fig = go.Figure() if not rows: fig.update_layout( **_LAYOUT_BASE, title=_title(title or f"Feature Combinations · {feature_name}"), height=height or self._h, ) fig.add_annotation( text=f"No compound patterns for '{feature_name}'", xref="paper", yref="paper", x=0.5, y=0.5, showarrow=False, font=dict(color=_T["muted"]), ) return fig df = pd.DataFrame(rows).sort_values("util", ascending=False).head(top_n)[::-1] # Colour by extra-feature count color_map = {1: _T["a2"], 2: _T["a5"], 3: _T["a3"]} colors = [color_map.get(e, _T["a4"]) for e in df["extra"]] fig.add_trace( go.Bar( x=df["util"], y=[ f"{r['label'][:45]}…" if len(r["label"]) > 45 else r["label"] for _, r in df.iterrows() ], orientation="h", marker=dict(color=colors, line=dict(color=_T["border"], width=0.3)), text=[f"supp={r['sup']:.1f}%" for _, r in df.iterrows()], textfont=dict(color=_T["muted"], size=8), textposition="outside", hovertemplate="<b>%{y}</b><br>Utility: %{x:.4f}<extra></extra>", ) ) # Legend annotation fig.add_annotation( text=( f"<span style='color:{_T['a2']}'>■</span> +1 " f"<span style='color:{_T['a5']}'>■</span> +2 " f"<span style='color:{_T['a3']}'>■</span> +3 extra features" ), x=1.01, xanchor="right", xref="paper", y=1.06, yref="paper", showarrow=False, font=dict(color=_T["muted"], size=9), ) fig.update_layout( **_LAYOUT_BASE, title=_title(title or f"Feature Combinations · {feature_name}"), height=max(320, len(df) * 22 + 80) if height is None else height, ) fig.update_xaxes( title_text="Utility", tickfont=dict(color=_T["muted"]), gridcolor=_T["grid"], linecolor=_T["border"], ) fig.update_yaxes( tickfont=dict(size=9, color=_T["muted"]), autorange="reversed", gridcolor=_T["grid"], linecolor=_T["border"], ) return fig
# ------------------------------------------------------------------ # 3. Feature Importance # ------------------------------------------------------------------
[docs] def plot_feature_importance( self, top_n: int = 15, height: int | None = None, title: str | None = None, ) -> go.Figure: """Feature importance: mean utility per feature, coloured by mean IG. Matches the "Feature Importance" card in the governance dashboard. Parameters ---------- top_n : int height : int, optional title : str, optional Returns ------- go.Figure """ clf = self._clf all_labels = clf.get_hug_features() feat_stats: dict[str, dict] = {} for idx, (label, pe) in enumerate(zip(all_labels, clf.patterns_)): parts = label.split(", ") for part in parts: if "=" in part: fname = part.split("=")[0] entry = feat_stats.setdefault(fname, {"utils": [], "igs": [], "n": 0}) entry["utils"].append(float(pe.utility)) entry["igs"].append(float(pe.ig)) entry["n"] += 1 rows = [] for fname, v in feat_stats.items(): rows.append( { "feature": fname, "mean_util": float(np.mean(v["utils"])), "mean_ig": float(np.mean(v["igs"])), "n": v["n"], } ) df = pd.DataFrame(rows).sort_values("mean_util", ascending=False).head(top_n)[::-1] fig = go.Figure( go.Bar( x=df["mean_util"], y=df["feature"], orientation="h", marker=dict( color=df["mean_ig"], colorscale=[[0, "#30123b"], [0.5, "#1bcfd4"], [1, "#7a0402"]], showscale=True, colorbar=dict( thickness=11, tickfont=dict(color=_T["muted"]), title=dict(font=dict(color=_T["muted"]), text="Mean IG"), ), line=dict(color=_T["border"], width=0.3), ), text=[f"n={r['n']}" for _, r in df.iterrows()], textfont=dict(color=_T["muted"], size=9), textposition="outside", hovertemplate="<b>%{y}</b><br>Mean U: %{x:.4f}<extra></extra>", ) ) fig.update_layout( **_LAYOUT_BASE, title=_title(title or "Feature Importance"), height=height or max(280, len(df) * 28 + 80), margin=dict(l=44, r=20, t=76, b=40), ) fig.update_xaxes( title_text="Mean Utility", tickfont=dict(color=_T["muted"]), gridcolor=_T["grid"], linecolor=_T["border"], ) fig.update_yaxes( autorange="reversed", tickfont=dict(color=_T["muted"]), gridcolor=_T["grid"], linecolor=_T["border"], ) return fig
# ------------------------------------------------------------------ # 4. Utility vs Information Gain scatter # ------------------------------------------------------------------
[docs] def plot_utility_vs_ig( self, feature_filter: str | None = None, height: int | None = None, title: str | None = None, ) -> go.Figure: """Scatter: utility (x) × information gain (y), coloured by support. Matches the "Utility vs Info Gain" card in the governance dashboard. Optionally filter to patterns containing one feature. Parameters ---------- feature_filter : str, optional If given, highlight only patterns for this feature. height : int, optional title : str, optional Returns ------- go.Figure """ clf = self._clf all_labels = clf.get_hug_features() n_train = clf.x_train_hup_.shape[0] x_util, y_ig, z_sup, texts = [], [], [], [] for idx, (label, pe) in enumerate(zip(all_labels, clf.patterns_)): if feature_filter and feature_filter + "=" not in label: continue x_util.append(float(pe.utility)) y_ig.append(float(pe.ig)) z_sup.append(float(clf.x_train_hup_[:, idx].sum()) / n_train) texts.append(label) fig = go.Figure( go.Scatter( x=x_util, y=y_ig, mode="markers", text=texts, hovertemplate="<b>%{text}</b><br>U=%{x:.4f} IG=%{y:.4f}<extra></extra>", marker=dict( color=z_sup, colorscale="Viridis", showscale=True, colorbar=dict( thickness=11, tickfont=dict(color=_T["muted"]), title=dict(font=dict(color=_T["muted"]), text="Support"), ), line=dict(color=_T["border"], width=0.4), opacity=0.85, size=9, ), ) ) fig.update_layout( **_LAYOUT_BASE, title=_title( title or ("Utility vs Info Gain" + (f" · {feature_filter}" if feature_filter else "")) ), height=height or 430, margin=dict(l=44, r=20, t=82, b=40), ) fig.update_xaxes( title_text="Utility", tickfont=dict(color=_T["muted"]), gridcolor=_T["grid"], linecolor=_T["border"], ) fig.update_yaxes( title_text="Information Gain", tickfont=dict(color=_T["muted"]), gridcolor=_T["grid"], linecolor=_T["border"], ) return fig
# ------------------------------------------------------------------ # 5. Top Patterns # ------------------------------------------------------------------
[docs] def plot_top_patterns( self, top_n: int = 20, height: int | None = None, title: str | None = None, ) -> go.Figure: """Horizontal bar chart of top-N patterns by utility, coloured by IG. Matches the "Top Patterns" card in the governance dashboard. Parameters ---------- top_n : int height : int, optional title : str, optional Returns ------- go.Figure """ clf = self._clf all_labels = clf.get_hug_features() n_train = clf.x_train_hup_.shape[0] rows = [] for idx, (label, pe) in enumerate(zip(all_labels, clf.patterns_)): sup = float(clf.x_train_hup_[:, idx].sum()) / n_train rows.append({"label": label, "util": float(pe.utility), "ig": float(pe.ig), "sup": sup}) df = pd.DataFrame(rows).sort_values("util", ascending=False).head(top_n)[::-1] short = [ f"{r['label'][:40]}…" if len(r["label"]) > 40 else r["label"] for _, r in df.iterrows() ] fig = go.Figure( go.Bar( x=df["util"], y=short, orientation="h", marker=dict( color=df["ig"], colorscale=[[0, "#0d0887"], [0.5, "#bd3786"], [1, "#f0f921"]], showscale=True, colorbar=dict( thickness=11, tickfont=dict(color=_T["muted"]), title=dict(font=dict(color=_T["muted"]), text="IG"), ), line=dict(color=_T["border"], width=0.3), ), text=[f"s={r['sup']:.3f}" for _, r in df.iterrows()], textfont=dict(color=_T["muted"], size=9), textposition="outside", hovertemplate="<b>%{y}</b><br>Utility: %{x:.4f}<extra></extra>", ) ) fig.update_layout( **_LAYOUT_BASE, title=_title(title or "Top Patterns"), height=height or max(280, len(df) * 24 + 80), margin=dict(l=44, r=20, t=76, b=40), ) fig.update_xaxes( title_text="Utility", tickfont=dict(color=_T["muted"]), gridcolor=_T["grid"], linecolor=_T["border"], ) fig.update_yaxes( autorange="reversed", tickfont=dict(color=_T["muted"]), gridcolor=_T["grid"], linecolor=_T["border"], ) return fig
# ------------------------------------------------------------------ # 6. Feature Coverage # ------------------------------------------------------------------
[docs] def plot_feature_coverage( self, top_n: int = 15, height: int | None = None, title: str | None = None, ) -> go.Figure: """Horizontal bar: how many patterns reference each feature. Matches the "Feature Coverage" card in the governance dashboard. """ clf = self._clf all_labels = clf.get_hug_features() from collections import Counter feat_counts: Counter = Counter() for label in all_labels: for part in label.split(", "): if "=" in part: feat_counts[part.split("=")[0]] += 1 df = pd.DataFrame(feat_counts.most_common(top_n), columns=["feature", "count"]).iloc[::-1] fig = go.Figure( go.Bar( x=df["count"], y=df["feature"], orientation="h", marker=dict(color=_T["a2"], line=dict(color=_T["border"], width=0.3)), text=df["count"].astype(str), textposition="auto", ) ) fig.update_layout( **_LAYOUT_BASE, title=_title(title or "Feature Coverage"), height=height or max(280, len(df) * 26 + 80), margin=dict(l=44, r=20, t=76, b=40), ) fig.update_xaxes( title_text="# Patterns", tickfont=dict(color=_T["muted"]), gridcolor=_T["grid"], linecolor=_T["border"], ) fig.update_yaxes( autorange="reversed", tickfont=dict(color=_T["muted"]), gridcolor=_T["grid"], linecolor=_T["border"], ) return fig
# ------------------------------------------------------------------ # 7. Pattern Lengths # ------------------------------------------------------------------
[docs] def plot_pattern_lengths( self, height: int | None = None, title: str | None = None, ) -> go.Figure: """Bar chart of pattern length distribution. Matches the "Pattern Lengths" card in the governance dashboard. """ from collections import Counter lengths = Counter(len(pe.items) for pe in self._clf.patterns_) xs = sorted(lengths.keys()) colors = [_T["a1"] if i == 0 else _T["a2"] for i in range(len(xs))] fig = go.Figure( go.Bar( x=[f"Length {x}" for x in xs], y=[lengths[x] for x in xs], marker=dict(color=colors[: len(xs)]), text=[str(lengths[x]) for x in xs], textposition="auto", ) ) fig.update_layout( **_LAYOUT_BASE, title=_title(title or "Pattern Lengths"), height=height or 280, ) fig.update_xaxes( title_text="Pattern Length", tickfont=dict(color=_T["muted"]), gridcolor=_T["grid"], linecolor=_T["border"], ) fig.update_yaxes( title_text="Count", tickfont=dict(color=_T["muted"]), gridcolor=_T["grid"], linecolor=_T["border"], ) return fig
# ------------------------------------------------------------------ # 8. Support Distribution # ------------------------------------------------------------------
[docs] def plot_support_distribution( self, height: int | None = None, title: str | None = None, ) -> go.Figure: """Histogram of pattern support values. Matches the "Support Distribution" card in the governance dashboard. """ n_train = self._clf.x_train_hup_.shape[0] supports = [ float(self._clf.x_train_hup_[:, i].sum()) / n_train for i in range(len(self._clf.patterns_)) ] fig = go.Figure( go.Histogram( x=supports, nbinsx=25, marker=dict(color=_T["a4"], line=dict(color=_T["border"], width=0.3)), ) ) fig.update_layout( **_LAYOUT_BASE, title=_title(title or "Support Distribution"), height=height or 280, ) fig.update_xaxes( title_text="Support", tickfont=dict(color=_T["muted"]), gridcolor=_T["grid"], linecolor=_T["border"], ) fig.update_yaxes( title_text="Count", tickfont=dict(color=_T["muted"]), gridcolor=_T["grid"], linecolor=_T["border"], ) return fig
# ------------------------------------------------------------------ # 9. Active-pattern local explanation # ------------------------------------------------------------------
[docs] def plot_active_patterns( self, X: Any, sample_idx: int = 0, max_patterns: int = 20, height: int | None = None, title: str | None = None, ) -> go.Figure: """Local explanation: active HUG patterns for a single sample. Shows active patterns sorted by absolute coefficient magnitude, coloured blue for positive coefficients and red for negative coefficients. Parameters ---------- X : array-like or DataFrame sample_idx : int max_patterns : int height : int, optional title : str, optional Returns ------- go.Figure """ clf = self._clf is_df = isinstance(X, pd.DataFrame) row = X.iloc[[sample_idx]] if is_df else X[sample_idx : sample_idx + 1] hup = clf.transform(row) active_cols = hup[0].nonzero()[1].tolist() all_labels = clf.get_hug_features() try: imp = clf.feature_importances() coef_map = dict(zip(imp["pattern"], imp["coefficient"])) except Exception: coef_map = {} records = [] for col_idx in active_cols: if col_idx < len(all_labels): lbl = all_labels[col_idx] records.append({"label": lbl, "coef": coef_map.get(lbl, 0.0)}) records.sort(key=lambda r: abs(r["coef"]), reverse=True) records = records[:max_patterns][::-1] fig = go.Figure() if not records: fig.update_layout( **_LAYOUT_BASE, title=_title(f"Active Patterns — sample #{sample_idx}"), height=height or self._h, ) fig.add_annotation( text="No active patterns for this sample.", xref="paper", yref="paper", x=0.5, y=0.5, showarrow=False, font=dict(color=_T["muted"]), ) return fig coefs = [r["coef"] for r in records] colors = [_T["a1"] if c >= 0 else _T["a3"] for c in coefs] labels = [f"{r['label'][:50]}…" if len(r["label"]) > 50 else r["label"] for r in records] fig.add_trace( go.Bar( x=coefs, y=labels, orientation="h", marker=dict(color=colors, line=dict(color=_T["border"], width=0.3)), hovertemplate="<b>%{y}</b><br>Coef: %{x:.4f}<extra></extra>", ) ) n_active = len(active_cols) fig.update_layout( **_LAYOUT_BASE, title=_title(title or f"Active Patterns — sample #{sample_idx} ({n_active} active)"), height=height or max(280, len(records) * 24 + 80), ) fig.update_xaxes( title_text="Coefficient", tickfont=dict(color=_T["muted"]), gridcolor=_T["grid"], linecolor=_T["border"], ) fig.update_yaxes( tickfont=dict(size=9, color=_T["muted"]), gridcolor=_T["grid"], linecolor=_T["border"] ) return fig
# ------------------------------------------------------------------ # 10. Radar / performance summary # ------------------------------------------------------------------
[docs] def plot_performance_radar( self, metrics: dict, dataset_name: str = "Dataset", height: int | None = None, ) -> go.Figure: """Radar / spider chart of classification performance metrics. Matches the "Performance" card in the governance dashboard. Parameters ---------- metrics : dict Keys: 'accuracy', 'balanced_accuracy', 'roc_auc', 'f1' Values: floats in [0, 1]. dataset_name : str height : int, optional Returns ------- go.Figure """ cats = ["Accuracy", "Bal Acc", "ROC AUC", "F1"] vals = [ metrics.get("accuracy", 0), metrics.get("balanced_accuracy", 0), metrics.get("roc_auc", 0), metrics.get("f1", 0), ] # Close the polygon cats = cats + [cats[0]] vals = vals + [vals[0]] fig = go.Figure( go.Scatterpolar( r=vals, theta=cats, fill="toself", fillcolor="rgba(88,166,255,.12)", line=dict(color=_T["a1"], width=2), ) ) fig.update_layout( **_LAYOUT_BASE, title=_title(f"{dataset_name}\nPerformance"), height=height or 300, polar=dict( radialaxis=dict( visible=True, range=[0, 1], gridcolor=_T["grid"], tickfont=dict(color=_T["muted"]), ), angularaxis=dict(gridcolor=_T["grid"], tickfont=dict(color=_T["text"])), bgcolor=_T["panel"], ), ) return fig
# ------------------------------------------------------------------ # 11. 2-D HUG profile heatmap # ------------------------------------------------------------------
[docs] def plot_2d_profile( self, feature_a: str, feature_b: str, height: int | None = None, title: str | None = None, ) -> go.Figure: """2-D HUG profile heatmap for compound patterns involving two features. Parameters ---------- feature_a, feature_b : str height : int, optional title : str, optional Returns ------- go.Figure """ clf = self._clf all_labels = clf.get_hug_features() def _bin(label: str, feat: str) -> str: for p in label.split(", "): if p.startswith(feat + "="): return p[len(feat) + 1 :] return "?" compound = [ (lbl, pe) for lbl, pe in zip(all_labels, clf.patterns_) if feature_a + "=" in lbl and feature_b + "=" in lbl ] fig = go.Figure() if not compound: fig.update_layout( **_LAYOUT_BASE, title=_title(title or f"2-D HUG Profile · {feature_a} × {feature_b}"), height=height or self._h, ) fig.add_annotation( text=f"No compound patterns for '{feature_a}' × '{feature_b}'.\nTry L≥2 and lower G.", xref="paper", yref="paper", x=0.5, y=0.5, showarrow=False, font=dict(color=_T["muted"]), ) return fig bins_a = sorted(set(_bin(lbl, feature_a) for lbl, _ in compound)) bins_b = sorted(set(_bin(lbl, feature_b) for lbl, _ in compound)) idx_a = {b: i for i, b in enumerate(bins_a)} idx_b = {b: i for i, b in enumerate(bins_b)} grid = np.zeros((len(bins_a), len(bins_b))) for lbl, pe in compound: ia = idx_a.get(_bin(lbl, feature_a)) ib = idx_b.get(_bin(lbl, feature_b)) if ia is not None and ib is not None: grid[ia, ib] += float(pe.utility) fig.add_trace( go.Heatmap( z=grid, x=bins_b, y=bins_a, colorscale="RdBu_r", zmid=0, colorbar=dict( tickfont=dict(color=_T["muted"]), title=dict(font=dict(color=_T["muted"]), text="aggregate utility"), ), ) ) fig.update_layout( **_LAYOUT_BASE, title=_title(title or f"2-D HUG Profile · {feature_a} × {feature_b}"), height=height or self._h, ) fig.update_xaxes( title_text=feature_b, tickangle=-35, tickfont=dict(size=9, color=_T["muted"]), gridcolor=_T["grid"], linecolor=_T["border"], ) fig.update_yaxes( title_text=feature_a, tickfont=dict(size=9, color=_T["muted"]), gridcolor=_T["grid"], linecolor=_T["border"], ) return fig
# ------------------------------------------------------------------ # 12. Full HTML dashboard # ------------------------------------------------------------------
[docs] def plot_dashboard( self, X: Any, dataset_name: str = "Dataset", feature_names_for_profile: list[str] | None = None, output_path: str | None = None, ) -> str: """Generate a self-contained multi-panel HTML dashboard. Produces performance overview, feature importance, utility-vs-IG, top patterns, pattern lengths, support distribution, feature coverage, and per-feature marginal bin profiles. Parameters ---------- X : array-like or DataFrame Used for active-pattern coverage check. dataset_name : str feature_names_for_profile : list of str, optional Which features to include marginal bin profiles for. Defaults to all features that have singleton patterns. output_path : str, optional If given, writes the HTML to this path. Returns ------- str (HTML string) """ clf = self._clf all_labels = clf.get_hug_features() # Find features with singleton patterns singleton_feats = sorted( { lbl.split(", ")[0].split("=")[0] for lbl, pe in zip(all_labels, clf.patterns_) if len(pe.items) == 1 } ) if feature_names_for_profile is None: feature_names_for_profile = singleton_feats[:12] # limit for dashboard # Build all figures figs = [ self.plot_feature_importance(top_n=10), self.plot_top_patterns(top_n=15), self.plot_utility_vs_ig(), self.plot_feature_coverage(top_n=10), self.plot_pattern_lengths(), self.plot_support_distribution(), ] # Per-feature bin profiles for fname in feature_names_for_profile: figs.append(self.plot_marginal_bin_profile(fname)) figs.append(self.plot_feature_combinations(fname, top_n=15)) # Assemble HTML — use plotly's own bundled JS so the file is self-contained # and works without a network connection (no CDN pin to go stale). import plotly.io as pio html_parts = [ "<!DOCTYPE html><html><head>", "<meta charset='utf-8'>", f"<title>HUG-IML Dashboard — {dataset_name}</title>", "<style>", f"body{{background:{_T['bg']};color:{_T['text']};font-family:{_T['font_b']};margin:0;padding:16px;}}", f"h1{{font-family:{_T['font_t']};color:{_T['a1']};}}", ".grid-2{{display:grid;grid-template-columns:1fr 1fr;gap:14px;margin-bottom:14px;}}", ".card{{background:{panel};border:1px solid {border};border-radius:8px;overflow:hidden;}}".format( panel=_T["panel"], border=_T["border"] ), "</style></head><body>", f"<h1>HUG-IML Governance Dashboard — {dataset_name}</h1>", "<div class='grid-2'>", ] for i, fig in enumerate(figs): fig_html = pio.to_html(fig, full_html=False, include_plotlyjs=(i == 0)) html_parts.append(f"<div class='card'>{fig_html}</div>") if (i + 1) % 2 == 0 and i + 1 < len(figs): html_parts.append("</div><div class='grid-2'>") html_parts += ["</div></body></html>"] html = "\n".join(html_parts) if output_path: with open(output_path, "w", encoding="utf-8") as fh: fh.write(html) return html
# ------------------------------------------------------------------ def _validate_fitted(self) -> None: if not hasattr(self._clf, "patterns_"): raise RuntimeError("Classifier must be fitted before creating a HUGPlotter.")