# Copyright 2026 Srikumar Krishnamoorthy
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Interpretability-complexity metrics for a fitted HUGIMLClassifierNative.
All functions accept a fitted ``HUGIMLClassifierNative`` and (optionally) a
data matrix ``X`` to compute sample-level statistics. They never re-train
the model.
Quick reference
---------------
Example::
from hugiml.metrics import compute_all_metrics
m = compute_all_metrics(clf, X_test)
print(m)
Available metrics
-----------------
* ``n_patterns`` — total mined patterns.
* ``avg_pattern_length`` — mean number of items per pattern.
* ``coverage`` — fraction of samples matched by at least one pattern.
* ``overlap_rate`` — mean number of patterns active per sample.
* ``top_k_cumulative_contribution(k)`` — cumulative absolute-coefficient share of top-k patterns.
* ``active_patterns_per_prediction`` — per-sample array.
* ``explanation_sparsity`` — fraction of patterns never active on the supplied data.
"""
from __future__ import annotations
import dataclasses
from typing import Any
import numpy as np
import pandas as pd
__all__ = [
"InterpretabilityMetrics",
"compute_all_metrics",
"metrics_dataframe",
]
# ---------------------------------------------------------------------------
# Data container
# ---------------------------------------------------------------------------
[docs]
@dataclasses.dataclass
class InterpretabilityMetrics:
"""All interpretability metrics for one fitted model + dataset.
Attributes
----------
n_patterns : int
Total number of mined HUG patterns.
avg_pattern_length : float
Mean items (conditions) per pattern.
max_pattern_length : int
Length of the longest pattern.
coverage : float
Fraction of samples covered by at least one active pattern.
mean_active_patterns : float
Average number of patterns active per sample.
std_active_patterns : float
Standard deviation of active patterns per sample.
overlap_rate : float
Alias for mean_active_patterns / n_patterns (normalised).
explanation_sparsity : float
Fraction of patterns that are never active on *X* ("dead" patterns).
top_k_cumulative_contribution : dict[int, float]
Mapping from k to cumulative share of total absolute coefficient magnitude for the top-k patterns.
Keys: [1, 5, 10, 20, 50].
n_samples : int
Number of rows in X used for sample-level metrics.
"""
n_patterns: int = 0
avg_pattern_length: float = 0.0
max_pattern_length: int = 0
coverage: float = 0.0
mean_active_patterns: float = 0.0
std_active_patterns: float = 0.0
overlap_rate: float = 0.0
explanation_sparsity: float = 0.0
top_k_cumulative_contribution: dict = dataclasses.field(default_factory=dict)
n_samples: int = 0
def __str__(self) -> str: # pragma: no cover
lines = [
"InterpretabilityMetrics",
"=" * 42,
f" n_patterns : {self.n_patterns}",
f" avg_pattern_length : {self.avg_pattern_length:.2f}",
f" max_pattern_length : {self.max_pattern_length}",
f" coverage : {self.coverage:.4f} ({self.coverage * 100:.1f}% of {self.n_samples} samples)",
f" mean_active_patterns : {self.mean_active_patterns:.2f}",
f" std_active_patterns : {self.std_active_patterns:.2f}",
f" overlap_rate (norm.) : {self.overlap_rate:.4f}",
f" explanation_sparsity : {self.explanation_sparsity:.4f}",
" top-k cumulative |coef|:",
]
for k, v in sorted(self.top_k_cumulative_contribution.items()):
lines.append(f" top-{k:>3} : {v * 100:6.1f}%")
return "\n".join(lines)
[docs]
def to_dict(self) -> dict:
"""Return a flat dict suitable for DataFrame construction."""
d = dataclasses.asdict(self)
topk = d.pop("top_k_cumulative_contribution", {})
for k, v in topk.items():
d[f"top_{k}_cumcontrib"] = v
return d
# ---------------------------------------------------------------------------
# Individual metric functions
# ---------------------------------------------------------------------------
def n_patterns(clf: Any) -> int:
"""Return the total number of mined HUG patterns."""
_require_fitted(clf)
return len(clf.patterns_)
def avg_pattern_length(clf: Any) -> float:
"""Return the mean number of items (conditions) per pattern."""
_require_fitted(clf)
if not clf.patterns_:
return 0.0
return float(np.mean([len(pe.items) for pe in clf.patterns_]))
def max_pattern_length(clf: Any) -> int:
"""Return the length of the longest mined pattern."""
_require_fitted(clf)
if not clf.patterns_:
return 0
return int(max(len(pe.items) for pe in clf.patterns_))
def coverage(clf: Any, X: Any) -> float:
"""Fraction of samples in X matched by at least one active pattern.
Parameters
----------
clf : fitted HUGIMLClassifierNative
X : array-like or DataFrame
Returns
-------
float in [0, 1]
"""
hup = _transform(clf, X)
# row sums: how many patterns are active per sample
row_sums = np.asarray(hup.sum(axis=1)).ravel()
return float((row_sums > 0).mean())
def active_patterns_per_prediction(clf: Any, X: Any) -> np.ndarray:
"""Return a 1-D array with the number of active patterns for each sample.
Parameters
----------
clf : fitted HUGIMLClassifierNative
X : array-like or DataFrame
Returns
-------
np.ndarray of int, shape (n_samples,)
"""
hup = _transform(clf, X)
return np.asarray(hup.sum(axis=1)).ravel().astype(int)
def overlap_rate(clf: Any, X: Any) -> float:
"""Normalised overlap: mean active patterns / n_patterns.
An overlap_rate of 0 means every sample activates exactly 0 patterns.
A rate of 1 means every sample activates all patterns.
"""
_require_fitted(clf)
n_pats = len(clf.patterns_)
if n_pats == 0:
return 0.0
ap = active_patterns_per_prediction(clf, X)
return float(ap.mean()) / n_pats
def explanation_sparsity(clf: Any, X: Any) -> float:
"""Fraction of patterns that are never active on X ("dead" patterns).
A sparsity of 1.0 means no pattern fires on any sample (degenerate model).
A sparsity of 0.0 means every pattern fires on at least one sample.
"""
hup = _transform(clf, X)
# column sums: how many samples activate each pattern
col_sums = np.asarray(hup.sum(axis=0)).ravel()
n_pats = len(clf.patterns_)
if n_pats == 0:
return 1.0
dead = int((col_sums == 0).sum())
return float(dead) / n_pats
def top_k_cumulative_contribution(clf: Any, ks: list[int] | None = None) -> dict[int, float]:
"""Cumulative share of total |coef| held by the top-k patterns.
Parameters
----------
clf : fitted HUGIMLClassifierNative
ks : list of int
Values of k to evaluate. Default: [1, 5, 10, 20, 50].
Returns
-------
dict mapping k → cumulative fraction (float in [0, 1])
"""
_require_fitted(clf)
if ks is None:
ks = [1, 5, 10, 20, 50]
# Retrieve coefficients
try:
imp = clf.feature_importances()
abs_coefs = imp["abs_coefficient"].values.astype(float)
except Exception:
# Fallback: use utility as proxy
abs_coefs = np.array([pe.utility for pe in clf.patterns_], dtype=float)
total = float(abs_coefs.sum())
if total == 0:
return {k: 0.0 for k in ks}
sorted_coefs = np.sort(abs_coefs)[::-1]
result: dict[int, float] = {}
for k in ks:
actual_k = min(k, len(sorted_coefs))
result[k] = float(sorted_coefs[:actual_k].sum() / total)
return result
# ---------------------------------------------------------------------------
# Composite helper
# ---------------------------------------------------------------------------
[docs]
def compute_all_metrics(clf: Any, X: Any) -> InterpretabilityMetrics:
"""Compute all interpretability metrics in a single call.
Parameters
----------
clf : fitted HUGIMLClassifierNative
X : array-like or DataFrame
Returns
-------
InterpretabilityMetrics
"""
_require_fitted(clf)
hup = _transform(clf, X)
n_samp = hup.shape[0]
n_pats = len(clf.patterns_)
lengths = [len(pe.items) for pe in clf.patterns_] if clf.patterns_ else [0]
avg_len = float(np.mean(lengths))
max_len = int(max(lengths))
row_sums = np.asarray(hup.sum(axis=1)).ravel()
col_sums = np.asarray(hup.sum(axis=0)).ravel()
cov = float((row_sums > 0).mean())
mean_ap = float(row_sums.mean())
std_ap = float(row_sums.std())
ol = float(mean_ap / n_pats) if n_pats > 0 else 0.0
dead = int((col_sums == 0).sum())
sparsity = float(dead / n_pats) if n_pats > 0 else 1.0
topk = top_k_cumulative_contribution(clf)
return InterpretabilityMetrics(
n_patterns=n_pats,
avg_pattern_length=round(avg_len, 3),
max_pattern_length=max_len,
coverage=round(cov, 4),
mean_active_patterns=round(mean_ap, 3),
std_active_patterns=round(std_ap, 3),
overlap_rate=round(ol, 4),
explanation_sparsity=round(sparsity, 4),
top_k_cumulative_contribution=topk,
n_samples=n_samp,
)
[docs]
def metrics_dataframe(results: dict[str, InterpretabilityMetrics]) -> pd.DataFrame:
"""Convert a mapping of {model_name: InterpretabilityMetrics} to a DataFrame.
Useful for side-by-side comparisons across models or configurations.
Parameters
----------
results : dict
Keys are model labels; values are InterpretabilityMetrics instances.
Returns
-------
pd.DataFrame
"""
rows = {}
for name, m in results.items():
rows[name] = m.to_dict()
return pd.DataFrame(rows).T
# ---------------------------------------------------------------------------
# Internal helpers
# ---------------------------------------------------------------------------
def _require_fitted(clf: Any) -> None:
if not hasattr(clf, "patterns_"):
raise RuntimeError("Classifier must be fitted before computing interpretability metrics.")
def _transform(clf: Any, X: Any):
"""Return the sparse binary HUG pattern matrix for X."""
_require_fitted(clf)
return clf.transform(X)