Source code for lingam.utils._mggd

import numpy as np
from scipy.stats import gamma
from scipy.special import digamma
from scipy.optimize import root_scalar



[docs]
class MGGD:
    """Multivariate Generalized Gaussian Distribution (MGGD)"""


[docs]
    def __init__(self, mean, cov, beta=1, tolerance=1e-6):
        """Initializes the MGGD with given parameters.

        Parameters
        ----------
        mean : array-like, shape (n_features,)
            Mean vector of the distribution.
        cov : array-like, shape (n_features, n_features)
            Covariance matrix of the distribution.
        beta : float, optional (default=1)
            Shape parameter of the distribution.
        tolerance : float, optional (default=1e-6)
            Tolerance for convergence in parameter estimation.
        """
        self.mean = np.array(mean)
        self.cov = np.array(cov)
        self.beta = beta
        self.tolerance = tolerance

        n_features = self.mean.shape[0]

        if self.cov.shape[0] != n_features or self.cov.shape[1] != n_features:
            raise ValueError("cov must be a square matrix matching the length of mean.")
        if not np.allclose(self.cov, self.cov.T):
            raise ValueError("cov must be a symmetric matrix.")
        if np.any(np.linalg.eigvals(self.cov) < self.tolerance * np.max(np.abs(np.linalg.eigvals(self.cov)))):
            raise ValueError("cov must be a symmetric positive definite matrix.")
        if self.beta <= 0:
            raise ValueError("beta must be a positive value.")



[docs]
    def rvs(self, size):
        """Generates random samples from the MGGD.

        Parameters
        ----------
        size : int
            Number of samples to generate.

        Returns
        -------
        samples : array-like, shape (size, n_features)
            Generated samples.
        """
        p = self.mean.shape[0]
        eigvals, eigvecs = np.linalg.eigh(self.cov)
        A = eigvecs @ np.diag(np.sqrt(eigvals)) @ eigvecs.T

        r = gamma.rvs(a=p / (2 * self.beta), scale=2, size=size) ** (
            1 / (2 * self.beta)
        )
        U = np.random.multivariate_normal(np.zeros(p), np.eye(p), size)
        U_norm = np.linalg.norm(U, axis=1, keepdims=True)
        U = U / U_norm

        samples = (r[:, np.newaxis] * (U @ A.T)) + self.mean
        return samples





[docs]
class MGGDEstimator:
    """Estimator for Multivariate Generalized Gaussian Distribution (MGGD) parameters"""


[docs]
    def __init__(self):
        pass



[docs]
    def fit(self, X, eps=1e-6):
        """Estimates the parameters of the MGGD from data.

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            Input data.
        eps : float, optional (default=1e-6)
            Small value to avoid division by zero.

        Returns
        -------
        mggd : MGGD
            Estimated MGGD object.
        """
        if X.ndim == 1:
            X = X.reshape(-1, 1)

        n, p = X.shape
        mean = np.mean(X, axis=0)
        X_centered = X - mean

        cov = np.eye(p)
        beta = 0.1
        beta_1 = np.inf

        k = 0
        while np.abs(beta - beta_1) > eps and k < 1000:
            k += 1
            cov_k = np.zeros((p, p))
            inv_cov = np.dot(X_centered, np.linalg.inv(cov))
            u = np.sum(inv_cov * X_centered, axis=1)

            for i in range(n):
                cov_k += u[i] ** (beta - 1) * np.outer(X_centered[i], X_centered[i])

            cov = cov_k / n
            cov = (p / np.sum(np.diag(cov))) * cov

            beta_1 = beta
            beta = self._estimate_beta(u, beta, p, eps)

        m = (beta / (p * n) * np.sum(u**beta)) ** (1 / beta)
        cov *= m

        return MGGD(mean=mean, cov=cov, beta=beta, tolerance=eps)


    def _estimate_beta(self, u, beta, p, eps):
        """Estimates the shape parameter beta using root finding."""
        N = len(u)

        def equation(z):
            term1 = p * N / (2 * np.sum(u**z)) * np.sum(np.log(u + eps) * u**z)
            term2 = p * N / (2 * z) * (np.log(2) + digamma(p / (2 * z)))
            term3 = N
            term4 = p * N / (2 * z) * np.log(z / (p * N) * np.sum(u**z + eps))
            return term1 - term2 - term3 - term4

        bracket = [eps, 2 * np.ceil(beta)]
        f_a, f_b = equation(bracket[0]), equation(bracket[1])

        while f_a * f_b > 0:
            if f_a > 0:
                bracket[0] /= 2
            else:
                bracket[1] *= 2
            f_a, f_b = equation(bracket[0]), equation(bracket[1])

        result = root_scalar(equation, bracket=bracket, method="brentq")
        return result.root