Source code for hugiml.metrics

# Copyright 2026 Srikumar Krishnamoorthy
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Interpretability-complexity metrics for a fitted HUGIMLClassifierNative.

All functions accept a fitted ``HUGIMLClassifierNative`` and (optionally) a
data matrix ``X`` to compute sample-level statistics.  They never re-train
the model.

Quick reference
---------------
Example::

    from hugiml.metrics import compute_all_metrics
    m = compute_all_metrics(clf, X_test)
    print(m)

Available metrics
-----------------

* ``n_patterns`` — total mined patterns.
* ``avg_pattern_length`` — mean number of items per pattern.
* ``coverage`` — fraction of samples matched by at least one pattern.
* ``overlap_rate`` — mean number of patterns active per sample.
* ``top_k_cumulative_contribution(k)`` — cumulative absolute-coefficient share of top-k patterns.
* ``active_patterns_per_prediction`` — per-sample array.
* ``explanation_sparsity`` — fraction of patterns never active on the supplied data.
"""

from __future__ import annotations

import dataclasses
from typing import Any

import numpy as np
import pandas as pd

__all__ = [
    "InterpretabilityMetrics",
    "compute_all_metrics",
    "metrics_dataframe",
]


# ---------------------------------------------------------------------------
# Data container
# ---------------------------------------------------------------------------


[docs] @dataclasses.dataclass class InterpretabilityMetrics: """All interpretability metrics for one fitted model + dataset. Attributes ---------- n_patterns : int Total number of mined HUG patterns. avg_pattern_length : float Mean items (conditions) per pattern. max_pattern_length : int Length of the longest pattern. coverage : float Fraction of samples covered by at least one active pattern. mean_active_patterns : float Average number of patterns active per sample. std_active_patterns : float Standard deviation of active patterns per sample. overlap_rate : float Alias for mean_active_patterns / n_patterns (normalised). explanation_sparsity : float Fraction of patterns that are never active on *X* ("dead" patterns). top_k_cumulative_contribution : dict[int, float] Mapping from k to cumulative share of total absolute coefficient magnitude for the top-k patterns. Keys: [1, 5, 10, 20, 50]. n_samples : int Number of rows in X used for sample-level metrics. """ n_patterns: int = 0 avg_pattern_length: float = 0.0 max_pattern_length: int = 0 coverage: float = 0.0 mean_active_patterns: float = 0.0 std_active_patterns: float = 0.0 overlap_rate: float = 0.0 explanation_sparsity: float = 0.0 top_k_cumulative_contribution: dict = dataclasses.field(default_factory=dict) n_samples: int = 0 def __str__(self) -> str: # pragma: no cover lines = [ "InterpretabilityMetrics", "=" * 42, f" n_patterns : {self.n_patterns}", f" avg_pattern_length : {self.avg_pattern_length:.2f}", f" max_pattern_length : {self.max_pattern_length}", f" coverage : {self.coverage:.4f} ({self.coverage * 100:.1f}% of {self.n_samples} samples)", f" mean_active_patterns : {self.mean_active_patterns:.2f}", f" std_active_patterns : {self.std_active_patterns:.2f}", f" overlap_rate (norm.) : {self.overlap_rate:.4f}", f" explanation_sparsity : {self.explanation_sparsity:.4f}", " top-k cumulative |coef|:", ] for k, v in sorted(self.top_k_cumulative_contribution.items()): lines.append(f" top-{k:>3} : {v * 100:6.1f}%") return "\n".join(lines)
[docs] def to_dict(self) -> dict: """Return a flat dict suitable for DataFrame construction.""" d = dataclasses.asdict(self) topk = d.pop("top_k_cumulative_contribution", {}) for k, v in topk.items(): d[f"top_{k}_cumcontrib"] = v return d
# --------------------------------------------------------------------------- # Individual metric functions # --------------------------------------------------------------------------- def n_patterns(clf: Any) -> int: """Return the total number of mined HUG patterns.""" _require_fitted(clf) return len(clf.patterns_) def avg_pattern_length(clf: Any) -> float: """Return the mean number of items (conditions) per pattern.""" _require_fitted(clf) if not clf.patterns_: return 0.0 return float(np.mean([len(pe.items) for pe in clf.patterns_])) def max_pattern_length(clf: Any) -> int: """Return the length of the longest mined pattern.""" _require_fitted(clf) if not clf.patterns_: return 0 return int(max(len(pe.items) for pe in clf.patterns_)) def coverage(clf: Any, X: Any) -> float: """Fraction of samples in X matched by at least one active pattern. Parameters ---------- clf : fitted HUGIMLClassifierNative X : array-like or DataFrame Returns ------- float in [0, 1] """ hup = _transform(clf, X) # row sums: how many patterns are active per sample row_sums = np.asarray(hup.sum(axis=1)).ravel() return float((row_sums > 0).mean()) def active_patterns_per_prediction(clf: Any, X: Any) -> np.ndarray: """Return a 1-D array with the number of active patterns for each sample. Parameters ---------- clf : fitted HUGIMLClassifierNative X : array-like or DataFrame Returns ------- np.ndarray of int, shape (n_samples,) """ hup = _transform(clf, X) return np.asarray(hup.sum(axis=1)).ravel().astype(int) def overlap_rate(clf: Any, X: Any) -> float: """Normalised overlap: mean active patterns / n_patterns. An overlap_rate of 0 means every sample activates exactly 0 patterns. A rate of 1 means every sample activates all patterns. """ _require_fitted(clf) n_pats = len(clf.patterns_) if n_pats == 0: return 0.0 ap = active_patterns_per_prediction(clf, X) return float(ap.mean()) / n_pats def explanation_sparsity(clf: Any, X: Any) -> float: """Fraction of patterns that are never active on X ("dead" patterns). A sparsity of 1.0 means no pattern fires on any sample (degenerate model). A sparsity of 0.0 means every pattern fires on at least one sample. """ hup = _transform(clf, X) # column sums: how many samples activate each pattern col_sums = np.asarray(hup.sum(axis=0)).ravel() n_pats = len(clf.patterns_) if n_pats == 0: return 1.0 dead = int((col_sums == 0).sum()) return float(dead) / n_pats def top_k_cumulative_contribution(clf: Any, ks: list[int] | None = None) -> dict[int, float]: """Cumulative share of total |coef| held by the top-k patterns. Parameters ---------- clf : fitted HUGIMLClassifierNative ks : list of int Values of k to evaluate. Default: [1, 5, 10, 20, 50]. Returns ------- dict mapping k → cumulative fraction (float in [0, 1]) """ _require_fitted(clf) if ks is None: ks = [1, 5, 10, 20, 50] # Retrieve coefficients try: imp = clf.feature_importances() abs_coefs = imp["abs_coefficient"].values.astype(float) except Exception: # Fallback: use utility as proxy abs_coefs = np.array([pe.utility for pe in clf.patterns_], dtype=float) total = float(abs_coefs.sum()) if total == 0: return {k: 0.0 for k in ks} sorted_coefs = np.sort(abs_coefs)[::-1] result: dict[int, float] = {} for k in ks: actual_k = min(k, len(sorted_coefs)) result[k] = float(sorted_coefs[:actual_k].sum() / total) return result # --------------------------------------------------------------------------- # Composite helper # ---------------------------------------------------------------------------
[docs] def compute_all_metrics(clf: Any, X: Any) -> InterpretabilityMetrics: """Compute all interpretability metrics in a single call. Parameters ---------- clf : fitted HUGIMLClassifierNative X : array-like or DataFrame Returns ------- InterpretabilityMetrics """ _require_fitted(clf) hup = _transform(clf, X) n_samp = hup.shape[0] n_pats = len(clf.patterns_) lengths = [len(pe.items) for pe in clf.patterns_] if clf.patterns_ else [0] avg_len = float(np.mean(lengths)) max_len = int(max(lengths)) row_sums = np.asarray(hup.sum(axis=1)).ravel() col_sums = np.asarray(hup.sum(axis=0)).ravel() cov = float((row_sums > 0).mean()) mean_ap = float(row_sums.mean()) std_ap = float(row_sums.std()) ol = float(mean_ap / n_pats) if n_pats > 0 else 0.0 dead = int((col_sums == 0).sum()) sparsity = float(dead / n_pats) if n_pats > 0 else 1.0 topk = top_k_cumulative_contribution(clf) return InterpretabilityMetrics( n_patterns=n_pats, avg_pattern_length=round(avg_len, 3), max_pattern_length=max_len, coverage=round(cov, 4), mean_active_patterns=round(mean_ap, 3), std_active_patterns=round(std_ap, 3), overlap_rate=round(ol, 4), explanation_sparsity=round(sparsity, 4), top_k_cumulative_contribution=topk, n_samples=n_samp, )
[docs] def metrics_dataframe(results: dict[str, InterpretabilityMetrics]) -> pd.DataFrame: """Convert a mapping of {model_name: InterpretabilityMetrics} to a DataFrame. Useful for side-by-side comparisons across models or configurations. Parameters ---------- results : dict Keys are model labels; values are InterpretabilityMetrics instances. Returns ------- pd.DataFrame """ rows = {} for name, m in results.items(): rows[name] = m.to_dict() return pd.DataFrame(rows).T
# --------------------------------------------------------------------------- # Internal helpers # --------------------------------------------------------------------------- def _require_fitted(clf: Any) -> None: if not hasattr(clf, "patterns_"): raise RuntimeError("Classifier must be fitted before computing interpretability metrics.") def _transform(clf: Any, X: Any): """Return the sparse binary HUG pattern matrix for X.""" _require_fitted(clf) return clf.transform(X)