Source code for MEDiml.biomarkers.glcm

#!/usr/bin/env python3
# -*- coding: utf-8 -*-


from copy import deepcopy
from typing import Dict, List, Union, List

import numpy as np
import pandas as pd

from ..utils.textureTools import (coord2index, get_neighbour_direction,
                                  get_value, is_list_all_none)



[docs]
def get_matrix(roi_only: np.ndarray,
                    levels: Union[np.ndarray, List],
                    dist_correction=True) -> np.ndarray:
    r"""
    This function computes the Gray-Level Co-occurence Matrix (GLCM) of the
    region of interest (ROI) of an input volume. The input volume is assumed
    to be isotropically resampled. Only one GLCM is computed per scan,
    simultaneously recording (i.e. adding up) the neighboring properties of
    the 26-connected neighbors of all voxels in the ROI. To account for
    discretization length differences, neighbors at a distance of :math:`\sqrt{3}`
    voxels around a center voxel increment the GLCM by a value of :math:`\sqrt{3}`,
    neighbors at a distance of :math:`\sqrt{2}` voxels around a center voxel increment
    the GLCM by a value of :math:`\sqrt{2}`, and neighbors at a distance of 1 voxels
    around a center voxel increment the GLCM by a value of 1.
    This matrix refers to "Grey level co-occurrence based features" (ID = LFYI) 
    in the `IBSI1 reference manual <https://arxiv.org/pdf/1612.07003.pdf>`_.

    Args:
        roi_only (ndarray): Smallest box containing the ROI, with the imaging data
                            ready for texture analysis computations. Voxels outside the ROI are
                            set to NaNs.
        levels (ndarray or List): Vector containing the quantized gray-levels in the tumor region
                                  (or reconstruction ``levels`` of quantization).
        dist_correction (bool, optional): Set this variable to true in order to use
                                discretization length difference corrections as used by the `Institute of Physics and
                                Engineering in Medicine <https://doi.org/10.1088/0031-9155/60/14/5471>`_.
                                Set this variable to false to replicate IBSI results.

    Returns:
        ndarray: Gray-Level Co-occurence Matrix of ``roi_only``.

    References:
        [1] Haralick, R. M., Shanmugam, K., & Dinstein, I. (1973). Textural \
        features for image classification. IEEE Transactions on Systems, \
        Man and Cybernetics, smc 3(6), 610–621.
    """
    # PARSING "dist_correction" ARGUMENT
    if type(dist_correction) is not bool:
        # The user did not input either "true" or "false",
        # so the default behavior is used.
        dist_correction = True

    # PRELIMINARY
    roi_only = roi_only.copy()
    level_temp = np.max(levels)+1
    roi_only[np.isnan(roi_only)] = level_temp
    #levels = np.append(levels, level_temp)
    dim = np.shape(roi_only)

    if np.ndim(roi_only) == 2:
        dim[2] = 1

    q2 = np.reshape(roi_only, (1, np.prod(dim)))

    # QUANTIZATION EFFECTS CORRECTION (M. Vallieres)
    # In case (for example) we initially wanted to have 64 levels, but due to
    # quantization, only 60 resulted.
    # qs = round(levels*adjust)/adjust;
    # q2 = round(q2*adjust)/adjust;

    #qs = levels
    qs = levels.tolist() + [level_temp]
    lqs = np.size(qs)

    q3 = q2*0
    for k in range(0, lqs):
        q3[q2 == qs[k]] = k

    q3 = np.reshape(q3, dim).astype(int)
    GLCM = np.zeros((lqs, lqs))

    for i in range(1, dim[0]+1):
        i_min = max(1, i-1)
        i_max = min(i+1, dim[0])
        for j in range(1, dim[1]+1):
            j_min = max(1, j-1)
            j_max = min(j+1, dim[1])
            for k in range(1, dim[2]+1):
                k_min = max(1, k-1)
                k_max = min(k+1, dim[2])
                val_q3 = q3[i-1, j-1, k-1]
                for I2 in range(i_min, i_max+1):
                    for J2 in range(j_min, j_max+1):
                        for K2 in range(k_min, k_max+1):
                            if (I2 == i) & (J2 == j) & (K2 == k):
                                continue
                            else:
                                val_neighbor = q3[I2-1, J2-1, K2-1]
                                if dist_correction:
                                    # Discretization length correction
                                    GLCM[val_q3, val_neighbor] += \
                                        np.sqrt(abs(I2-i)+abs(J2-j)+abs(K2-k))
                                else:
                                    GLCM[val_q3, val_neighbor] += 1

    GLCM = GLCM[0:-1, 0:-1]

    return GLCM



[docs]
def joint_max(glcm_dict: Dict) -> Union[float, List[float]]:
    """Computes joint maximum features.
    This feature refers to "Fcm_joint_max" (ID = GYBY) in the `IBSI1 reference \
    manual <https://arxiv.org/pdf/1612.07003.pdf>`__.

    Args:
        glcm_dict (Dict): dictionary with glcm matrices, generated using :func:`get_glcm_matrices`

    Returns:
        Union[float, List[float]]:: List or float of the joint maximum feature(s)
    """
    temp = []
    joint_max = []
    for key in glcm_dict.keys():
        for glcm in glcm_dict[key]:
            df_pij, _, _, _, _, _ = glcm.get_cm_data([np.nan, np.nan])
            temp.append(np.max(df_pij.pij))
        if len(glcm_dict) <= 1:
            return sum(temp) / len(temp)
        else:
            print(f'Merge method: {key}, joint max: {sum(temp) / len(temp)}')
            joint_max.append(sum(temp) / len(temp))
    return joint_max



[docs]
def joint_avg(glcm_dict: Dict) -> Union[float, List[float]]:
    """Computes joint  average features.
    This feature refers to "Fcm_joint_avg" (ID = 60VM) in the `IBSI1 reference \
    manual <https://arxiv.org/pdf/1612.07003.pdf>`__.

    Args:
        glcm_dict (Dict): dictionary with glcm matrices, generated using :func:`get_glcm_matrices`

    Returns:
        Union[float, List[float]]:: List or float of the joint  average feature(s)
    """
    temp = []
    joint_avg = []
    for key in glcm_dict.keys():
        for glcm in glcm_dict[key]:
            df_pij, _, _, _, _, _ = glcm.get_cm_data([np.nan, np.nan])
            temp.append(np.sum(df_pij.i * df_pij.pij))
        if len(glcm_dict) <= 1:
            return sum(temp) / len(temp)
        else:
            print(f'Merge method: {key}, joint avg: {sum(temp) / len(temp)}')
            joint_avg.append(sum(temp) / len(temp))
    return joint_avg



[docs]
def joint_var(glcm_dict: Dict) -> Union[float, List[float]]:
    """Computes joint variance features.
    This feature refers to "Fcm_var" (ID = UR99) in the `IBSI1 reference \
    manual <https://arxiv.org/pdf/1612.07003.pdf>`__.

    Args:
        glcm_dict (Dict): dictionary with glcm matrices, generated using :func:`get_glcm_matrices`

    Returns:
        Union[float, List[float]]: List or float of the joint variance feature(s)
    """
    temp = []
    joint_var = []
    for key in glcm_dict.keys():
        for glcm in glcm_dict[key]:
            df_pij, _, _, _, _, _ = glcm.get_cm_data([np.nan, np.nan])
            m_u = np.sum(df_pij.i * df_pij.pij)
            temp.append(np.sum((df_pij.i - m_u) ** 2.0 * df_pij.pij))
        if len(glcm_dict) <= 1:
            return sum(temp) / len(temp)
        else:
            print(f'Merge method: {key}, joint var: {sum(temp) / len(temp)}')
            joint_var.append(sum(temp) / len(temp))
    return joint_var



[docs]
def joint_entr(glcm_dict: Dict) -> Union[float, List[float]]:
    """Computes joint entropy features.
    This feature refers to "Fcm_joint_entr" (ID = TU9B) in the `IBSI1 reference \
    manual <https://arxiv.org/pdf/1612.07003.pdf>`__.

    Args:
        glcm_dict (Dict): dictionary with glcm matrices, generated using :func:`get_glcm_matrices`

    Returns:
        Union[float, List[float]]: the joint entropy features
    """
    temp = []
    joint_entr = []
    for key in glcm_dict.keys():
        for glcm in glcm_dict[key]:
            df_pij, _, _, _, _, _ = glcm.get_cm_data([np.nan, np.nan])
            temp.append(-np.sum(df_pij.pij * np.log2(df_pij.pij)))
        if len(glcm_dict) <= 1:
            return sum(temp) / len(temp)
        else:
            print(f'Merge method: {key}, joint entr: {sum(temp) / len(temp)}')
            joint_entr.append(sum(temp) / len(temp))
    return joint_entr



[docs]
def diff_avg(glcm_dict: Dict) -> Union[float, List[float]]:
    """Computes difference average features.
    This feature refers to "Fcm_diff_avg" (ID = TF7R) in the `IBSI1 reference \
    manual <https://arxiv.org/pdf/1612.07003.pdf>`__.

    Args:
        glcm_dict (Dict): dictionary with glcm matrices, generated using :func:`get_glcm_matrices`

    Returns:
        Union[float, List[float]]: the difference average features
    """
    temp = []
    diff_avg = []
    for key in glcm_dict.keys():
        for glcm in glcm_dict[key]:
            _, _, _, df_pimj, _, _ = glcm.get_cm_data([np.nan, np.nan])
            temp.append(np.sum(df_pimj.k * df_pimj.pimj))
        if len(glcm_dict) <= 1:
            return sum(temp) / len(temp)
        else:
            print(f'Merge method: {key}, diff avg: {sum(temp) / len(temp)}')
            diff_avg.append(sum(temp) / len(temp))
    return diff_avg



[docs]
def diff_var(glcm_dict: Dict) -> Union[float, List[float]]:
    """Computes difference variance features.
    This feature refers to "Fcm_diff_var" (ID = D3YU) in the `IBSI1 reference \
    manual <https://arxiv.org/pdf/1612.07003.pdf>`__.

    Args:
        glcm_dict (Dict): dictionary with glcm matrices, generated using :func:`get_glcm_matrices`

    Returns:
        Union[float, List[float]]: the difference variance features
    """
    temp = []
    diff_var = []
    for key in glcm_dict.keys():
        for glcm in glcm_dict[key]:
            _, _, _, df_pimj, _, _ = glcm.get_cm_data([np.nan, np.nan])
            m_u = np.sum(df_pimj.k * df_pimj.pimj)
            temp.append(np.sum((df_pimj.k - m_u) ** 2.0 * df_pimj.pimj))
        if len(glcm_dict) <= 1:
            return sum(temp) / len(temp)
        else:
            print(f'Merge method: {key}, diff var: {sum(temp) / len(temp)}')
            diff_var.append(sum(temp) / len(temp))
    return diff_var



[docs]
def diff_entr(glcm_dict: Dict) -> Union[float, List[float]]:
    """Computes difference entropy features.
    This feature refers to "Fcm_diff_entr" (ID = NTRS) in the `IBSI1 reference \
    manual <https://arxiv.org/pdf/1612.07003.pdf>`__.

    Args:
        glcm_dict (Dict): dictionary with glcm matrices, generated using :func:`get_glcm_matrices`

    Returns:
        Union[float, List[float]]: the difference entropy features
    """
    temp = []
    diff_entr = []
    for key in glcm_dict.keys():
        for glcm in glcm_dict[key]:
            _, _, _, df_pimj, _, _ = glcm.get_cm_data([np.nan, np.nan])
            temp.append(-np.sum(df_pimj.pimj * np.log2(df_pimj.pimj)))
        if len(glcm_dict) <= 1:
            return sum(temp) / len(temp)
        else:
            print(f'Merge method: {key}, diff entr: {sum(temp) / len(temp)}')
            diff_entr.append(sum(temp) / len(temp))
    return diff_entr



[docs]
def sum_avg(glcm_dict: Dict) -> Union[float, List[float]]:
    """Computes sum average features.
    This feature refers to "Fcm_sum_avg" (ID = ZGXS) in the `IBSI1 reference \
    manual <https://arxiv.org/pdf/1612.07003.pdf>`__.

    Args:
        glcm_dict (Dict): dictionary with glcm matrices, generated using :func:`get_glcm_matrices`

    Returns:
        Union[float, List[float]]: the sum average features
    """
    temp = []
    sum_avg = []
    for key in glcm_dict.keys():
        for glcm in glcm_dict[key]:
            _, _, _, _, df_pipj, _ = glcm.get_cm_data([np.nan, np.nan])
            temp.append(np.sum(df_pipj.k * df_pipj.pipj))
        if len(glcm_dict) <= 1:
            return sum(temp) / len(temp)
        else:
            print(f'Merge method: {key}, sum avg: {sum(temp) / len(temp)}')
            sum_avg.append(sum(temp) / len(temp))
    return sum_avg



[docs]
def sum_var(glcm_dict: Dict) -> Union[float, List[float]]:
    """Computes sum variance features.
    This feature refers to "Fcm_sum_var" (ID = OEEB) in the `IBSI1 reference \
    manual <https://arxiv.org/pdf/1612.07003.pdf>`__.

    Args:
        glcm_dict (Dict): dictionary with glcm matrices, generated using :func:`get_glcm_matrices`

    Returns:
        Union[float, List[float]]: the sum variance features
    """
    temp = []
    sum_var = []
    for key in glcm_dict.keys():
        for glcm in glcm_dict[key]:
            _, _, _, _, df_pipj, _ = glcm.get_cm_data([np.nan, np.nan])
            m_u = np.sum(df_pipj.k * df_pipj.pipj)
            temp.append(np.sum((df_pipj.k - m_u) ** 2.0 * df_pipj.pipj))
        if len(glcm_dict) <= 1:
            return sum(temp) / len(temp)
        else:
            print(f'Merge method: {key}, sum var: {sum(temp) / len(temp)}')
            sum_var.append(sum(temp) / len(temp))
    return sum_var



[docs]
def sum_entr(glcm_dict: Dict) -> Union[float, List[float]]:
    """Computes sum entropy features.
    This feature refers to "Fcm_sum_entr" (ID = P6QZ) in the `IBSI1 reference \
    manual <https://arxiv.org/pdf/1612.07003.pdf>`__.

    Args:
        glcm_dict (Dict): dictionary with glcm matrices, generated using :func:`get_glcm_matrices`

    Returns:
        Union[float, List[float]]: the sum entropy features
    """
    temp = []
    sum_entr = []
    for key in glcm_dict.keys():
        for glcm in glcm_dict[key]:
            _, _, _, _, df_pipj, _ = glcm.get_cm_data([np.nan, np.nan])
            temp.append(-np.sum(df_pipj.pipj * np.log2(df_pipj.pipj)))
        if len(glcm_dict) <= 1:
            return sum(temp) / len(temp)
        else:
            print(f'Merge method: {key}, sum entr: {sum(temp) / len(temp)}')
            sum_entr.append(sum(temp) / len(temp))
    return sum_entr



[docs]
def energy(glcm_dict: Dict) -> Union[float, List[float]]:
    """Computes angular second moment features.
    This feature refers to "Fcm_energy" (ID = 8ZQL) in the `IBSI1 reference \
    manual <https://arxiv.org/pdf/1612.07003.pdf>`__.

    Args:
        glcm_dict (Dict): dictionary with glcm matrices, generated using :func:`get_glcm_matrices`

    Returns:
        Union[float, List[float]]: the angular second moment features
    """
    temp = []
    energy = []
    for key in glcm_dict.keys():
        for glcm in glcm_dict[key]:
            df_pij, _, _, _, _, _ = glcm.get_cm_data([np.nan, np.nan])
            temp.append(np.sum(df_pij.pij ** 2.0))
        if len(glcm_dict) <= 1:
            return sum(temp) / len(temp)
        else:
            print(f'Merge method: {key}, energy: {sum(temp) / len(temp)}')
            energy.append(sum(temp) / len(temp))
    return energy



[docs]
def contrast(glcm_dict: Dict) -> Union[float, List[float]]:
    """Computes constrast features.
    This feature refers to "Fcm_contrast" (ID = ACUI) in the `IBSI1 reference \
    manual <https://arxiv.org/pdf/1612.07003.pdf>`__.

    Args:
        glcm_dict (Dict): dictionary with glcm matrices, generated using :func:`get_glcm_matrices`

    Returns:
        Union[float, List[float]]: the contrast features
    """
    temp = []
    contrast = []
    for key in glcm_dict.keys():
        for glcm in glcm_dict[key]:
            df_pij, _, _, _, _, _ = glcm.get_cm_data([np.nan, np.nan])
            temp.append(np.sum((df_pij.i - df_pij.j) ** 2.0 * df_pij.pij))
        if len(glcm_dict) <= 1:
            return sum(temp) / len(temp)
        else:
            print(f'Merge method: {key}, contrast: {sum(temp) / len(temp)}')
            contrast.append(sum(temp) / len(temp))
    return contrast



[docs]
def dissimilarity(glcm_dict: Dict) -> Union[float, List[float]]:
    """Computes dissimilarity features.
    This feature refers to "Fcm_dissimilarity" (ID = 8S9J) in the `IBSI1 reference \
    manual <https://arxiv.org/pdf/1612.07003.pdf>`__.

    Args:
        glcm_dict (Dict): dictionary with glcm matrices, generated using :func:`get_glcm_matrices`

    Returns:
        Union[float, List[float]]: the dissimilarity features
    """
    temp = []
    dissimilarity = []
    for key in glcm_dict.keys():
        for glcm in glcm_dict[key]:
            df_pij, _, _, _, _, _ = glcm.get_cm_data([np.nan, np.nan])
            temp.append(np.sum(np.abs(df_pij.i - df_pij.j) * df_pij.pij))
        if len(glcm_dict) <= 1:
            return sum(temp) / len(temp)
        else:
            print(f'Merge method: {key}, dissimilarity: {sum(temp) / len(temp)}')
            dissimilarity.append(sum(temp) / len(temp))
    return dissimilarity



[docs]
def inv_diff(glcm_dict: Dict) -> Union[float, List[float]]:
    """Computes inverse difference features.
    This feature refers to "Fcm_inv_diff" (ID = IB1Z) in the `IBSI1 reference \
    manual <https://arxiv.org/pdf/1612.07003.pdf>`__.

    Args:
        glcm_dict (Dict): dictionary with glcm matrices, generated using :func:`get_glcm_matrices`

    Returns:
        Union[float, List[float]]: the inverse difference features
    """
    temp = []
    inv_diff = []
    for key in glcm_dict.keys():
        for glcm in glcm_dict[key]:
            df_pij, _, _, _, _, _ = glcm.get_cm_data([np.nan, np.nan])
            temp.append(np.sum(df_pij.pij / (1.0 + np.abs(df_pij.i - df_pij.j))))
        if len(glcm_dict) <= 1:
            return sum(temp) / len(temp)
        else:
            print(f'Merge method: {key}, inv diff: {sum(temp) / len(temp)}')
            inv_diff.append(sum(temp) / len(temp))
    return inv_diff



[docs]
def inv_diff_norm(glcm_dict: Dict) -> Union[float, List[float]]:
    """Computes inverse difference normalized features.
    This feature refers to "Fcm_inv_diff_norm" (ID = NDRX) in the `IBSI1 reference \
    manual <https://arxiv.org/pdf/1612.07003.pdf>`__.

    Args:
        glcm_dict (Dict): dictionary with glcm matrices, generated using :func:`get_glcm_matrices`

    Returns:
        Union[float, List[float]]: the inverse difference normalized features
    """
    temp = []
    inv_diff_norm = []
    for key in glcm_dict.keys():
        for glcm in glcm_dict[key]:
            df_pij, _, _, _, _, n_g = glcm.get_cm_data([np.nan, np.nan])
            temp.append(np.sum(df_pij.pij / (1.0 + np.abs(df_pij.i - df_pij.j) / n_g)))
        if len(glcm_dict) <= 1:
            return sum(temp) / len(temp)
        else:
            print(f'Merge method: {key}, inv diff norm: {sum(temp) / len(temp)}')
            inv_diff_norm.append(sum(temp) / len(temp))
    return inv_diff_norm



[docs]
def inv_diff_mom(glcm_dict: Dict) -> Union[float, List[float]]:
    """Computes inverse difference moment features.
    This feature refers to "Fcm_inv_diff_mom" (ID = WF0Z) in the `IBSI1 reference \
    manual <https://arxiv.org/pdf/1612.07003.pdf>`__.

    Args:
        glcm_dict (Dict): dictionary with glcm matrices, generated using :func:`get_glcm_matrices`

    Returns:
        Union[float, List[float]]: the inverse difference moment features
    """
    temp = []
    inv_diff_mom = []
    for key in glcm_dict.keys():
        for glcm in glcm_dict[key]:
            df_pij, _, _, _, _, _ = glcm.get_cm_data([np.nan, np.nan])
            temp.append(np.sum(df_pij.pij / (1.0 + (df_pij.i - df_pij.j) ** 2.0)))
        if len(glcm_dict) <= 1:
            return sum(temp) / len(temp)
        else:
            print(f'Merge method: {key}, inv diff mom: {sum(temp) / len(temp)}')
            inv_diff_mom.append(sum(temp) / len(temp))
    return inv_diff_mom



[docs]
def inv_diff_mom_norm(glcm_dict: Dict) -> Union[float, List[float]]:
    """Computes inverse difference moment normalized features.
    This feature refers to "Fcm_inv_diff_mom_norm" (ID = 1QCO) in the `IBSI1 reference \
    manual <https://arxiv.org/pdf/1612.07003.pdf>`__.

    Args:
        glcm_dict (Dict): dictionary with glcm matrices, generated using :func:`get_glcm_matrices`

    Returns:
        Union[float, List[float]]: the inverse difference moment normalized features
    """
    temp = []
    inv_diff_mom_norm = []
    for key in glcm_dict.keys():
        for glcm in glcm_dict[key]:
            df_pij, _, _, _, _, n_g = glcm.get_cm_data([np.nan, np.nan])
            temp.append(np.sum(df_pij.pij / (1.0 + (df_pij.i - df_pij.j)** 2.0 / n_g ** 2.0)))
        if len(glcm_dict) <= 1:
            return sum(temp) / len(temp)
        else:
            print(f'Merge method: {key}, inv diff mom norm: {sum(temp) / len(temp)}')
            inv_diff_mom_norm.append(sum(temp) / len(temp))
    return inv_diff_mom_norm



[docs]
def inv_var(glcm_dict: Dict) -> Union[float, List[float]]:
    """Computes inverse variance features.
    This feature refers to "Fcm_inv_var" (ID = E8JP) in the `IBSI1 reference \
    manual <https://arxiv.org/pdf/1612.07003.pdf>`__.

    Args:
        glcm_dict (Dict): dictionary with glcm matrices, generated using :func:`get_glcm_matrices`

    Returns:
        Union[float, List[float]]: the inverse variance features
    """
    temp = []
    inv_var = []
    for key in glcm_dict.keys():
        for glcm in glcm_dict[key]:
            df_pij, df_pi, _, _, _, _ = glcm.get_cm_data([np.nan, np.nan])
            mu_marg = np.sum(df_pi.i * df_pi.pi)
            var_marg = np.sum((df_pi.i - mu_marg) ** 2.0 * df_pi.pi)
            if var_marg == 0.0:
                temp.append(1.0)
            else:
                temp.append(1.0 / var_marg * (np.sum(df_pij.i * df_pij.j * df_pij.pij) - mu_marg ** 2.0))
        if len(glcm_dict) <= 1:
            return sum(temp) / len(temp)
        else:
            print(f'Merge method: {key}, inv var: {sum(temp) / len(temp)}')
            inv_var.append(sum(temp) / len(temp))
    return inv_var



[docs]
def corr(glcm_dict: Dict) -> Union[float, List[float]]:
    """Computes correlation features.
    This feature refers to "Fcm_corr" (ID = NI2N) in the `IBSI1 reference \
    manual <https://arxiv.org/pdf/1612.07003.pdf>`__.

    Args:
        glcm_dict (Dict): dictionary with glcm matrices, generated using :func:`get_glcm_matrices`

    Returns:
        Union[float, List[float]]: the correlation features
    """
    temp = []
    corr = []
    for key in glcm_dict.keys():
        for glcm in glcm_dict[key]:
            df_pij, df_pi, _, _, _, _ = glcm.get_cm_data([np.nan, np.nan])
            mu_marg = np.sum(df_pi.i * df_pi.pi)
            var_marg = np.sum((df_pi.i - mu_marg) ** 2.0 * df_pi.pi)
            if var_marg == 0.0:
                temp.append(1.0)
            else:
                temp.append(1.0 / var_marg * (np.sum(df_pij.i * df_pij.j * df_pij.pij) - mu_marg ** 2.0))
        if len(glcm_dict) <= 1:
            return sum(temp) / len(temp)
        else:
            print(f'Merge method: {key}, corr: {sum(temp) / len(temp)}')
            corr.append(sum(temp) / len(temp))
    return corr




[docs]
def auto_corr(glcm_dict: Dict) -> Union[float, List[float]]:
    """Computes autocorrelation features.
    This feature refers to "Fcm_auto_corr" (ID = QWB0) in the `IBSI1 reference \
    manual <https://arxiv.org/pdf/1612.07003.pdf>`__.

    Args:
        glcm_dict (Dict): dictionary with glcm matrices, generated using :func:`get_glcm_matrices`

    Returns:
        Union[float, List[float]]: the autocorrelation features
    """
    temp = []
    auto_corr = []
    for key in glcm_dict.keys():
        for glcm in glcm_dict[key]:
            df_pij, _, _, _, _, _ = glcm.get_cm_data([np.nan, np.nan])
            temp.append(np.sum(df_pij.i * df_pij.j * df_pij.pij))
        if len(glcm_dict) <= 1:
            return sum(temp) / len(temp)
        else:
            print(f'Merge method: {key}, auto corr: {sum(temp) / len(temp)}')
            auto_corr.append(sum(temp) / len(temp))
    return auto_corr



[docs]
def info_corr1(glcm_dict: Dict) -> Union[float, List[float]]:
    """Computes information correlation 1 features.
    This feature refers to "Fcm_info_corr1" (ID = R8DG) in the `IBSI1 reference \
    manual <https://arxiv.org/pdf/1612.07003.pdf>`__.

    Args:
        glcm_dict (Dict): dictionary with glcm matrices, generated using :func:`get_glcm_matrices`

    Returns:
        Union[float, List[float]]: the information correlation 1 features
    """
    temp = []
    info_corr1 = []
    for key in glcm_dict.keys():
        for glcm in glcm_dict[key]:
            df_pij, df_pi, _, _, _, _ = glcm.get_cm_data([np.nan, np.nan])
            hxy = -np.sum(df_pij.pij * np.log2(df_pij.pij))
            hxy_1 = -np.sum(df_pij.pij * np.log2(df_pij.pi * df_pij.pj))
            hx = -np.sum(df_pi.pi * np.log2(df_pi.pi))
            if len(df_pij) == 1 or hx == 0.0:
                temp.append(1.0)
            else:
                temp.append((hxy - hxy_1) / hx)
        if len(glcm_dict) <= 1:
            return sum(temp) / len(temp)
        else:
            print(f'Merge method: {key}, info corr 1: {sum(temp) / len(temp)}')
            info_corr1.append(sum(temp) / len(temp))
    return info_corr1



[docs]
def info_corr2(glcm_dict: Dict) -> Union[float, List[float]]:
    """Computes information correlation 2 features - Note: iteration over combinations of i and j
    This feature refers to "Fcm_info_corr2" (ID = JN9H) in the `IBSI1 reference \
    manual <https://arxiv.org/pdf/1612.07003.pdf>`__.

    Args:
        glcm_dict (Dict): dictionary with glcm matrices, generated using :func:`get_glcm_matrices`

    Returns:
        Union[float, List[float]]: the information correlation 2 features
    """
    temp = []
    info_corr2 = []
    for key in glcm_dict.keys():
        for glcm in glcm_dict[key]:
            df_pij, df_pi, df_pj, _, _, _ = glcm.get_cm_data([np.nan, np.nan])
            hxy = - np.sum(df_pij.pij * np.log2(df_pij.pij))
            hxy_2 = - np.sum(
                        np.tile(df_pi.pi, len(df_pj)) * np.repeat(df_pj.pj, len(df_pi)) * \
                        np.log2(np.tile(df_pi.pi, len(df_pj)) * np.repeat(df_pj.pj, len(df_pi)))
                        )
            if hxy_2 < hxy:
                temp.append(0)
            else:
                temp.append(np.sqrt(1 - np.exp(-2.0 * (hxy_2 - hxy))))
        if len(glcm_dict) <= 1:
            return sum(temp) / len(temp)
        else:
            print(f'Merge method: {key}, info corr 2: {sum(temp) / len(temp)}')
            info_corr2.append(sum(temp) / len(temp))
    return info_corr2



[docs]
def clust_tend(glcm_dict: Dict) -> Union[float, List[float]]:
    """Computes cluster tendency features.
    This feature refers to "Fcm_clust_tend" (ID = DG8W) in the `IBSI1 reference \
    manual <https://arxiv.org/pdf/1612.07003.pdf>`__.

    Args:
        glcm_dict (Dict): dictionary with glcm matrices, generated using :func:`get_glcm_matrices`

    Returns:
        Union[float, List[float]]: the cluster tendency features
    """
    temp = []
    clust_tend = []
    for key in glcm_dict.keys():
        for glcm in glcm_dict[key]:
            df_pij, df_pi, _, _, _, _ = glcm.get_cm_data([np.nan, np.nan])
            m_u = np.sum(df_pi.i * df_pi.pi)
            temp.append(np.sum((df_pij.i + df_pij.j - 2 * m_u) ** 2.0 * df_pij.pij))
        if len(glcm_dict) <= 1:
            return sum(temp) / len(temp)
        else:
            print(f'Merge method: {key}, clust tend: {sum(temp) / len(temp)}')
            clust_tend.append(sum(temp) / len(temp))
    return clust_tend



[docs]
def clust_shade(glcm_dict: Dict) -> Union[float, List[float]]:
    """Computes cluster shade features.
    This feature refers to "Fcm_clust_shade" (ID = 7NFM) in the `IBSI1 reference \
    manual <https://arxiv.org/pdf/1612.07003.pdf>`__.

    Args:
        glcm_dict (Dict): dictionary with glcm matrices, generated using :func:`get_glcm_matrices`

    Returns:
        Union[float, List[float]]: the cluster shade features
    """
    temp = []
    clust_shade = []
    for key in glcm_dict.keys():
        for glcm in glcm_dict[key]:
            df_pij, df_pi, _, _, _, _ = glcm.get_cm_data([np.nan, np.nan])
            m_u = np.sum(df_pi.i * df_pi.pi)
            temp.append(np.sum((df_pij.i + df_pij.j - 2 * m_u) ** 3.0 * df_pij.pij))
        if len(glcm_dict) <= 1:
            return sum(temp) / len(temp)
        else:
            print(f'Merge method: {key}, clust shade: {sum(temp) / len(temp)}')
            clust_shade.append(sum(temp) / len(temp))
    return clust_shade



[docs]
def clust_prom(glcm_dict: Dict) -> Union[float, List[float]]:
    """Computes cluster prominence features.
    This feature refers to "Fcm_clust_prom" (ID = AE86) in the `IBSI1 reference \
    manual <https://arxiv.org/pdf/1612.07003.pdf>`__.

    Args:
        glcm_dict (Dict): dictionary with glcm matrices, generated using :func:`get_glcm_matrices`

    Returns:
        Union[float, List[float]]: the cluster prominence features
    """
    temp = []
    clust_prom = []
    for key in glcm_dict.keys():
        for glcm in glcm_dict[key]:
            df_pij, df_pi, _, _, _, _ = glcm.get_cm_data([np.nan, np.nan])
            m_u = np.sum(df_pi.i * df_pi.pi)
            temp.append(np.sum((df_pij.i + df_pij.j - 2 * m_u) ** 4.0 * df_pij.pij))
        if len(glcm_dict) <= 1:
            return sum(temp) / len(temp)
        else:
            print(f'Merge method: {key}, clust prom: {sum(temp) / len(temp)}')
            clust_prom.append(sum(temp) / len(temp))
    return clust_prom



[docs]
def extract_all(vol, dist_correction=None, merge_method="vol_merge") -> Dict:
    """Computes glcm features.
    This features refer to Glcm family in the `IBSI1 reference \
    manual <https://arxiv.org/pdf/1612.07003.pdf>`__.

    Args:
        vol (ndarray): 3D volume, isotropically resampled, quantized
                       (e.g. n_g = 32, levels = [1, ..., n_g]), with NaNs outside the region
                       of interest.
        dist_correction (Union[bool, str], optional): Set this variable to true in order to use
                                                      discretization length difference corrections as used by the `Institute of Physics and
                                                      Engineering in Medicine <https://doi.org/10.1088/0031-9155/60/14/5471>`__.
                                                      Set this variable to false to replicate IBSI results.
                                                      Or use string and specify the norm for distance weighting. Weighting is
                                                      only performed if this argument is "manhattan", "euclidean" or "chebyshev".
        merge_method (str, optional): merging ``method`` which determines how features are
                                           calculated. One of "average", "slice_merge", "dir_merge" and "vol_merge".
                                           Note that not all combinations of spatial and merge ``method`` are valid.
        method (str, optional): Either 'old' (deprecated) or 'new' (faster) ``method``.

    Returns:
        Dict: Dict of the glcm features.

    Raises:
        ValueError: If `method` is not 'old' or 'new'.

    Todo:

        - Enable calculation of CM features using different spatial methods (2d, 2.5d, 3d)
        - Enable calculation of CM features using different CM distance settings
        - Enable calculation of CM features for different merge methods ("average", "slice_merge", "dir_merge" and "vol_merge")
        - Provide the range of discretised intensities from a calling function and pass to :func:`get_cm_features`.
        - Test if dist_correction works as expected.

    """
    glcm = get_cm_features(
                        vol=vol,
                        intensity_range=[np.nan, np.nan],
                        merge_method=merge_method,
                        dist_weight_norm=dist_correction
                        )

    return glcm



[docs]
def get_glcm_matrices(vol,
                    glcm_spatial_method="3d",
                    glcm_dist=1.0,
                    merge_method="vol_merge",
                    dist_weight_norm=None) -> Dict:
    """Extracts co-occurrence matrices from the intensity roi mask prior to features extraction.

    Note:
        This code was adapted from the in-house radiomics software created at
        OncoRay, Dresden, Germany.

    Args:
        vol (ndarray): volume with discretised intensities as 3D numpy array (x, y, z).
        intensity_range (ndarray): range of potential discretised intensities,provided as a list: 
            [minimal discretised intensity, maximal discretised intensity]. 
            If one or both values are unknown, replace the respective values with np.nan.
        glcm_spatial_method (str, optional): spatial method which determines the way
            co-occurrence matrices are calculated and how features are determined.
            Must be "2d", "2.5d" or "3d".
        glcm_dist (float, optional): Chebyshev distance for comparison between neighboring voxels.
        merge_method (str, optional): merging method which determines how features are
            calculated. One of "average", "slice_merge", "dir_merge" and "vol_merge".
            Note that not all combinations of spatial and merge method are valid.
        dist_weight_norm (Union[bool, str], optional): norm for distance weighting. Weighting is only
            performed if this argument is either "manhattan","euclidean", "chebyshev" or bool.

    Returns:
        Dict: Dict of co-occurrence matrices.

    Raises:
        ValueError: If `glcm_spatial_method` is not "2d", "2.5d" or "3d".
    """
    if type(glcm_spatial_method) is not list:
        glcm_spatial_method = [glcm_spatial_method]

    if type(glcm_dist) is not list:
        glcm_dist = [glcm_dist]

    if type(merge_method) is not list:
        merge_method = [merge_method]

    if type(dist_weight_norm) is bool:
        if dist_weight_norm:
            dist_weight_norm = "euclidean"

    # Get the roi in tabular format
    img_dims = vol.shape
    index_id = np.arange(start=0, stop=vol.size)
    coords = np.unravel_index(indices=index_id, shape=img_dims)
    df_img = pd.DataFrame({"index_id": index_id,
                           "g": np.ravel(vol),
                           "x": coords[0],
                           "y": coords[1],
                           "z": coords[2],
                           "roi_int_mask": np.ravel(np.isfinite(vol))})

    # Iterate over spatial arrangements
    for ii_spatial in glcm_spatial_method:
        # Iterate over distances
        for ii_dist in glcm_dist:
            # Initiate list of glcm objects
            glcm_list = []
            # Perform 2D analysis
            if ii_spatial.lower() in ["2d", "2.5d"]:
                # Iterate over slices
                for ii_slice in np.arange(0, img_dims[2]):
                    # Get neighbour direction and iterate over neighbours
                    nbrs = get_neighbour_direction(
                        d=1,
                        distance="chebyshev",
                        centre=False,
                        complete=False,
                        dim3=False) * int(ii_dist)
                    for ii_direction in np.arange(0, np.shape(nbrs)[1]):
                        # Add glcm matrices to list
                        glcm_list += [CooccurrenceMatrix(distance=int(ii_dist),
                                                        direction=nbrs[:, ii_direction],
                                                        direction_id=ii_direction,
                                                        spatial_method=ii_spatial.lower(),
                                                        img_slice=ii_slice)]

            # Perform 3D analysis
            elif ii_spatial.lower() == "3d":
                # Get neighbour direction and iterate over neighbours
                nbrs = get_neighbour_direction(d=1,
                                            distance="chebyshev",
                                            centre=False,
                                            complete=False,
                                            dim3=True) * int(ii_dist)

                for ii_direction in np.arange(0, np.shape(nbrs)[1]):
                    # Add glcm matrices to list
                    glcm_list += [CooccurrenceMatrix(distance=int(ii_dist), 
                                                    direction=nbrs[:, ii_direction], 
                                                    direction_id=ii_direction,
                                                    spatial_method=ii_spatial.lower())]

            else:
                raise ValueError(
                    "GCLM matrices can be determined in \"2d\", \"2.5d\" and \"3d\". \
                        The requested method (%s) is not implemented.", ii_spatial)

            # Calculate glcm matrices
            for glcm in glcm_list:
                glcm.calculate_cm_matrix(
                    df_img=df_img, img_dims=img_dims, dist_weight_norm=dist_weight_norm)

            # Merge matrices according to the given method
            upd_list = {}
            for merge_method in merge_method:
                upd_list[merge_method] = combine_matrices(
                    glcm_list=glcm_list, merge_method=merge_method, spatial_method=ii_spatial.lower())

                # Skip if no matrices are available (due to illegal combinations of merge and spatial methods
                if upd_list is None:
                    continue
    return upd_list



[docs]
def get_cm_features(vol,
                    intensity_range,
                    glcm_spatial_method="3d",
                    glcm_dist=1.0,
                    merge_method="vol_merge",
                    dist_weight_norm=None) -> Dict:
    """Extracts co-occurrence matrix-based features from the intensity roi mask.

    Note:
        This code was adapted from the in-house radiomics software created at
        OncoRay, Dresden, Germany.

    Args:
        vol (ndarray): volume with discretised intensities as 3D numpy array (x, y, z).
        intensity_range (ndarray): range of potential discretised intensities,
                                   provided as a list: [minimal discretised intensity, maximal discretised
                                   intensity]. If one or both values are unknown, replace the respective values
                                   with np.nan.
        glcm_spatial_method (str, optional): spatial method which determines the way
                                             co-occurrence matrices are calculated and how features are determined.
                                             MUST BE "2d", "2.5d" or "3d".
        glcm_dist (float, optional): chebyshev distance for comparison between neighbouring
                                     voxels.
        merge_method (str, optional): merging method which determines how features are
                                           calculated. One of "average", "slice_merge", "dir_merge" and "vol_merge".
                                           Note that not all combinations of spatial and merge method are valid.
        dist_weight_norm (Union[bool, str], optional): norm for distance weighting. Weighting is only
                                                       performed if this argument is either "manhattan",
                                                       "euclidean", "chebyshev" or bool.

    Returns:
        Dict: Dict of the glcm features.

    Raises:
        ValueError: If `glcm_spatial_method` is not "2d", "2.5d" or "3d".
    """
    if type(glcm_spatial_method) is not list:
        glcm_spatial_method = [glcm_spatial_method]

    if type(glcm_dist) is not list:
        glcm_dist = [glcm_dist]

    if type(merge_method) is not list:
        merge_method = [merge_method]

    if type(dist_weight_norm) is bool:
        if dist_weight_norm:
            dist_weight_norm = "euclidean"

    # Get the roi in tabular format
    img_dims = vol.shape
    index_id = np.arange(start=0, stop=vol.size)
    coords = np.unravel_index(indices=index_id, shape=img_dims)
    df_img = pd.DataFrame({"index_id": index_id,
                           "g": np.ravel(vol),
                           "x": coords[0],
                           "y": coords[1],
                           "z": coords[2],
                           "roi_int_mask": np.ravel(np.isfinite(vol))})

    # Generate an empty feature list
    feat_list = []

    # Iterate over spatial arrangements
    for ii_spatial in glcm_spatial_method:
        # Iterate over distances
        for ii_dist in glcm_dist:
            # Initiate list of glcm objects
            glcm_list = []
            # Perform 2D analysis
            if ii_spatial.lower() in ["2d", "2.5d"]:
                # Iterate over slices
                for ii_slice in np.arange(0, img_dims[2]):
                    # Get neighbour direction and iterate over neighbours
                    nbrs = get_neighbour_direction(
                        d=1,
                        distance="chebyshev",
                        centre=False,
                        complete=False,
                        dim3=False) * int(ii_dist)
                    for ii_direction in np.arange(0, np.shape(nbrs)[1]):
                        # Add glcm matrices to list
                        glcm_list += [CooccurrenceMatrix(distance=int(ii_dist),
                                                        direction=nbrs[:, ii_direction],
                                                        direction_id=ii_direction,
                                                        spatial_method=ii_spatial.lower(),
                                                        img_slice=ii_slice)]

            # Perform 3D analysis
            elif ii_spatial.lower() == "3d":
                # Get neighbour direction and iterate over neighbours
                nbrs = get_neighbour_direction(d=1,
                                            distance="chebyshev",
                                            centre=False,
                                            complete=False,
                                            dim3=True) * int(ii_dist)

                for ii_direction in np.arange(0, np.shape(nbrs)[1]):
                    # Add glcm matrices to list
                    glcm_list += [CooccurrenceMatrix(distance=int(ii_dist), 
                                                    direction=nbrs[:, ii_direction], 
                                                    direction_id=ii_direction,
                                                    spatial_method=ii_spatial.lower())]

            else:
                raise ValueError(
                    "GCLM matrices can be determined in \"2d\", \"2.5d\" and \"3d\". \
                        The requested method (%s) is not implemented.", ii_spatial)

            # Calculate glcm matrices
            for glcm in glcm_list:
                glcm.calculate_cm_matrix(
                    df_img=df_img, img_dims=img_dims, dist_weight_norm=dist_weight_norm)

            # Merge matrices according to the given method
            for merge_method in merge_method:
                upd_list = combine_matrices(
                    glcm_list=glcm_list, merge_method=merge_method, spatial_method=ii_spatial.lower())

                # Skip if no matrices are available (due to illegal combinations of merge and spatial methods
                if upd_list is None:
                    continue

                # Calculate features
                feat_run_list = []
                for glcm in upd_list:
                    feat_run_list += [glcm.calculate_cm_features(
                        intensity_range=intensity_range)]

                # Average feature values
                feat_list += [pd.concat(feat_run_list, axis=0).mean(axis=0, skipna=True).to_frame().transpose()]

    # Merge feature tables into a single table and return as a dictionary
    df_feat = pd.concat(feat_list, axis=1).to_dict(orient="records")[0]

    return df_feat



[docs]
def combine_matrices(glcm_list: List, merge_method: str, spatial_method: str) -> List:
    """Merges co-occurrence matrices prior to feature calculation.

    Note:
        This code was adapted from the in-house radiomics software created at
        OncoRay, Dresden, Germany.

    Args:
        glcm_list (List): List of CooccurrenceMatrix objects.
        merge_method (str): Merging method which determines how features are calculated.
                            One of "average", "slice_merge", "dir_merge" and "vol_merge". Note that not all
                            combinations of spatial and merge method are valid.
        spatial_method (str): spatial method which determines the way co-occurrence
                              matrices are calculated and how features are determined. One of "2d", "2.5d"
                              or "3d".

    Returns:
        List[CooccurrenceMatrix]: list of one or more merged CooccurrenceMatrix objects.
    """
    # Initiate empty list
    use_list = []

    # For average features over direction, maintain original glcms
    if merge_method == "average" and spatial_method in ["2d", "3d"]:
        # Make copy of glcm_list
        for glcm in glcm_list:
            use_list += [glcm._copy()]

        # Set merge method to average
        for glcm in use_list:
            glcm.merge_method = "average"

    # Merge glcms by slice
    elif merge_method == "slice_merge" and spatial_method == "2d":
        # Find slice_ids
        slice_id = []
        for glcm in glcm_list:
            slice_id += [glcm.slice]

        # Iterate over unique slice_ids
        for ii_slice in np.unique(slice_id):
            slice_glcm_id = np.squeeze(np.where(slice_id == ii_slice))

            # Select all matrices within the slice
            sel_matrix_list = []
            for glcm_id in slice_glcm_id:
                sel_matrix_list += [glcm_list[glcm_id].matrix]

            # Check if any matrix has been created for the currently selected slice
            if is_list_all_none(sel_matrix_list):
                # No matrix was created
                use_list += [CooccurrenceMatrix(distance=glcm_list[slice_glcm_id[0]].distance,
                                                direction=None,
                                                direction_id=None,
                                                spatial_method=spatial_method,
                                                img_slice=ii_slice,
                                                merge_method=merge_method,
                                                matrix=None,
                                                n_v=0.0)]
            else:
                # Merge matrices within the slice
                merge_cm = pd.concat(sel_matrix_list, axis=0)
                merge_cm = merge_cm.groupby(by=["i", "j"]).sum().reset_index()

                # Update the number of voxels within the merged slice
                merge_n_v = 0.0
                for glcm_id in slice_glcm_id:
                    merge_n_v += glcm_list[glcm_id].n_v

                # Create new cooccurrence matrix
                use_list += [CooccurrenceMatrix(distance=glcm_list[slice_glcm_id[0]].distance,
                                                direction=None,
                                                direction_id=None,
                                                spatial_method=spatial_method,
                                                img_slice=ii_slice,
                                                merge_method=merge_method,
                                                matrix=merge_cm,
                                                n_v=merge_n_v)]

    # Merge glcms by direction
    elif merge_method == "dir_merge" and spatial_method == "2.5d":
        # Find slice_ids
        dir_id = []
        for glcm in glcm_list:
            dir_id += [glcm.direction_id]

        # Iterate over unique directions
        for ii_dir in np.unique(dir_id):
            dir_glcm_id = np.squeeze(np.where(dir_id == ii_dir))

            # Select all matrices with the same direction
            sel_matrix_list = []
            for glcm_id in dir_glcm_id:
                sel_matrix_list += [glcm_list[glcm_id].matrix]

            # Check if any matrix has been created for the currently selected direction
            if is_list_all_none(sel_matrix_list):
                # No matrix was created
                use_list += [CooccurrenceMatrix(distance=glcm_list[dir_glcm_id[0]].distance,
                                                direction=glcm_list[dir_glcm_id[0]].direction,
                                                direction_id=ii_dir,
                                                spatial_method=spatial_method,
                                                img_slice=None,
                                                merge_method=merge_method,
                                                matrix=None, n_v=0.0)]
            else:
                # Merge matrices with the same direction
                merge_cm = pd.concat(sel_matrix_list, axis=0)
                merge_cm = merge_cm.groupby(by=["i", "j"]).sum().reset_index()

                # Update the number of voxels for the merged matrices with the same direction
                merge_n_v = 0.0
                for glcm_id in dir_glcm_id:
                    merge_n_v += glcm_list[glcm_id].n_v

                # Create new co-occurrence matrix
                use_list += [CooccurrenceMatrix(distance=glcm_list[dir_glcm_id[0]].distance,
                                                direction=glcm_list[dir_glcm_id[0]].direction,
                                                direction_id=ii_dir,
                                                spatial_method=spatial_method,
                                                img_slice=None,
                                                merge_method=merge_method,
                                                matrix=merge_cm,
                                                n_v=merge_n_v)]

    # Merge all glcms into a single representation
    elif merge_method == "vol_merge" and spatial_method in ["2.5d", "3d"]:
        # Select all matrices within the slice
        sel_matrix_list = []
        for glcm_id in np.arange(len(glcm_list)):
            sel_matrix_list += [glcm_list[glcm_id].matrix]

        # Check if any matrix was created
        if is_list_all_none(sel_matrix_list):
            # In case no matrix was created
            use_list += [CooccurrenceMatrix(distance=glcm_list[0].distance,
                                            direction=None,
                                            direction_id=None,
                                            spatial_method=spatial_method,
                                            img_slice=None,
                                            merge_method=merge_method,
                                            matrix=None,
                                            n_v=0.0)]
        else:
            # Merge co-occurrence matrices
            merge_cm = pd.concat(sel_matrix_list, axis=0)
            merge_cm = merge_cm.groupby(by=["i", "j"]).sum().reset_index()

            # Update the number of voxels
            merge_n_v = 0.0
            for glcm_id in np.arange(len(glcm_list)):
                merge_n_v += glcm_list[glcm_id].n_v

            # Create new co-occurrence matrix
            use_list += [CooccurrenceMatrix(distance=glcm_list[0].distance,
                                            direction=None,
                                            direction_id=None,
                                            spatial_method=spatial_method,
                                            img_slice=None,
                                            merge_method=merge_method,
                                            matrix=merge_cm,
                                            n_v=merge_n_v)]
    else:
        use_list = None

    return use_list




[docs]
class CooccurrenceMatrix:
    """ Class that contains a single co-occurrence ``matrix``.

    Note:
        Code was adapted from the in-house radiomics software created at
        OncoRay, Dresden, Germany.

    Attributes:
        distance (int): Chebyshev ``distance``.
        direction (ndarray): Direction along which neighbouring voxels are found.
        direction_id (int): Direction index to identify unique ``direction`` vectors.
        spatial_method (str): Spatial method used to calculate the co-occurrence
                              ``matrix``: "2d", "2.5d" or "3d".
        img_slice (ndarray): Corresponding slice index (only if the co-occurrence
                             ``matrix`` corresponds to a 2d image slice).
        merge_method (str): Method for merging the co-occurrence ``matrix`` with other
                            co-occurrence matrices.
        matrix (pandas.DataFrame): The actual co-occurrence ``matrix`` in sparse format
                                   (row, column, count).
        n_v (int): The number of voxels in the volume.
    """


[docs]
    def __init__(self,
                distance: int,
                direction: np.ndarray,
                direction_id: int,
                spatial_method: str,
                img_slice: np.ndarray=None,
                merge_method: str=None,
                matrix: pd.DataFrame=None,
                n_v: int=None) -> None:
        """Constructor of the CooccurrenceMatrix class

        Args:
            distance (int): Chebyshev ``distance``.
            direction (ndarray): Direction along which neighbouring voxels are found.
            direction_id (int): Direction index to identify unique ``direction`` vectors.
            spatial_method (str): Spatial method used to calculate the co-occurrence
                                ``matrix``: "2d", "2.5d" or "3d".
            img_slice (ndarray, optional): Corresponding slice index (only if the
                                        co-occurrence ``matrix`` corresponds to a 2d image slice).
            merge_method (str, optional): Method for merging the co-occurrence ``matrix``
                                        with other co-occurrence matrices.
            matrix (pandas.DataFrame, optional): The actual co-occurrence ``matrix`` in
                                                sparse format (row, column, count).
            n_v (int, optional): The number of voxels in the volume.
        
        Returns:
            None.
        """
        # Distance used
        self.distance = distance

        # Direction and slice for which the current matrix is extracted
        self.direction = direction
        self.direction_id = direction_id
        self.img_slice = img_slice

        # Spatial analysis method (2d, 2.5d, 3d) and merge method (average, slice_merge, dir_merge, vol_merge)
        self.spatial_method = spatial_method
        self.merge_method = merge_method

        # Place holders
        self.matrix = matrix
        self.n_v = n_v



[docs]
    def _copy(self):
        """
        Returns a copy of the co-occurrence matrix object.
        """
        return deepcopy(self)



[docs]
    def calculate_cm_matrix(self, df_img: pd.DataFrame, img_dims: np.ndarray, dist_weight_norm: str) -> None:
        """Function that calculates a co-occurrence matrix for the settings provided during
        initialisation and the input image.

        Args:
            df_img (pandas.DataFrame): Data table containing image intensities, x, y and z coordinates,
                and mask labels corresponding to voxels in the volume.
            img_dims (ndarray, List[float]): Dimensions of the image volume.
            dist_weight_norm (str): Norm for distance weighting. Weighting is only
                performed if this parameter is either "manhattan", "euclidean" or "chebyshev".

        Returns:
            None. Assigns the created image table (cm matrix) to the `matrix` attribute.

        Raises:
            ValueError:
                If `self.spatial_method` is not "2d", "2.5d" or "3d".
                Also, if ``dist_weight_norm`` is not "manhattan", "euclidean" or "chebyshev".

        """
        # Check if the roi contains any masked voxels. If this is not the case, don't construct the glcm.
        if not np.any(df_img.roi_int_mask):
            self.n_v = 0
            self.matrix = None

            return None

        # Create local copies of the image table
        if self.spatial_method == "3d":
            df_cm = deepcopy(df_img)
        elif self.spatial_method in ["2d", "2.5d"]:
            df_cm = deepcopy(df_img[df_img.z == self.img_slice])
            df_cm["index_id"] = np.arange(0, len(df_cm))
            df_cm["z"] = 0
            df_cm = df_cm.reset_index(drop=True)
        else:
            raise ValueError(
                "The spatial method for grey level co-occurrence matrices should be one of \"2d\", \"2.5d\" or \"3d\".")

        # Set grey level of voxels outside ROI to NaN
        df_cm.loc[df_cm.roi_int_mask == False, "g"] = np.nan

        # Determine potential transitions
        df_cm["to_index"] = coord2index(x=df_cm.x.values + self.direction[0],
                                        y=df_cm.y.values + self.direction[1],
                                        z=df_cm.z.values + self.direction[2],
                                        dims=img_dims)

        # Get grey levels from transitions
        df_cm["to_g"] = get_value(x=df_cm.g.values, index=df_cm.to_index.values)

        # Check if any transitions exist.
        if np.all(np.isnan(df_cm[["to_g"]])):
            self.n_v = 0
            self.matrix = None

            return None

        # Count occurrences of grey level transitions
        df_cm = df_cm.groupby(by=["g", "to_g"]).size().reset_index(name="n")

        # Append grey level transitions in opposite direction
        df_cm_inv = pd.DataFrame({"g": df_cm.to_g, "to_g": df_cm.g, "n": df_cm.n})
        df_cm = df_cm.append(df_cm_inv, ignore_index=True)

        # Sum occurrences of grey level transitions
        df_cm = df_cm.groupby(by=["g", "to_g"]).sum().reset_index()

        # Rename columns
        df_cm.columns = ["i", "j", "n"]

        if dist_weight_norm in ["manhattan", "euclidean", "chebyshev"]:
            if dist_weight_norm == "manhattan":
                weight = sum(abs(self.direction))
            elif dist_weight_norm == "euclidean":
                weight = np.sqrt(sum(np.power(self.direction, 2.0)))
            elif dist_weight_norm == "chebyshev":
                weight = np.max(abs(self.direction))
            df_cm.n /= weight

        # Set the number of voxels
        self.n_v = np.sum(df_cm.n)

        # Add matrix and number of voxels to object
        self.matrix = df_cm



[docs]
    def get_cm_data(self, intensity_range: np.ndarray):
        """Computes the probability distribution for the elements of the GLCM
        (diagonal probability, cross-diagonal probability...) and number of gray-levels.

        Args:
            intensity_range (ndarray): Range of potential discretised intensities, provided as a list:
                [minimal discretised intensity, maximal discretised intensity].
                If one or both values are unknown,replace the respective values with np.nan.

        Returns:
            Typle[pd.DataFrame, pd.DataFrame, pd.DataFrame, float]:
            - Occurence data frame
            - Diagonal probabilty
            - Cross-diagonal probabilty
            - Number of gray levels
        """
        # Occurrence data frames
        df_pij = deepcopy(self.matrix)
        df_pij["pij"] = df_pij.n / sum(df_pij.n)
        df_pi = df_pij.groupby(by="i")["pij"].agg(np.sum).reset_index().rename(columns={"pij": "pi"})
        df_pj = df_pij.groupby(by="j")["pij"].agg(np.sum).reset_index().rename(columns={"pij": "pj"})

        # Diagonal probilities p(i-j)
        df_pimj = deepcopy(df_pij)
        df_pimj["k"] = np.abs(df_pimj.i - df_pimj.j)
        df_pimj = df_pimj.groupby(by="k")["pij"].agg(np.sum).reset_index().rename(columns={"pij": "pimj"})

        # Cross-diagonal probabilities p(i+j)
        df_pipj = deepcopy(df_pij)
        df_pipj["k"] = df_pipj.i + df_pipj.j
        df_pipj = df_pipj.groupby(by="k")["pij"].agg(np.sum).reset_index().rename(columns={"pij": "pipj"})

        # Merger of df.p_ij, df.p_i and df.p_j
        df_pij = pd.merge(df_pij, df_pi, on="i")
        df_pij = pd.merge(df_pij, df_pj, on="j")

        # Constant definitions
        intensity_range_loc = deepcopy(intensity_range)
        if np.isnan(intensity_range[0]):
            intensity_range_loc[0] = np.min(df_pi.i) * 1.0
        if np.isnan(intensity_range[1]):
            intensity_range_loc[1] = np.max(df_pi.i) * 1.0
        # Number of grey levels
        n_g = intensity_range_loc[1] - intensity_range_loc[0] + 1.0

        return df_pij, df_pi, df_pj, df_pimj, df_pipj, n_g



[docs]
    def calculate_cm_features(self, intensity_range: np.ndarray) -> pd.DataFrame:
        """Wrapper to json.dump function.

        Args:
            intensity_range (np.ndarray): Range of potential discretised intensities, 
                provided as a list: [minimal discretised intensity, maximal discretised intensity].
                If one or both values are unknown,replace the respective values with np.nan.

        Returns:
            pandas.DataFrame: Data frame with values for each feature.
        """
        # Create feature table
        feat_names = ["Fcm_joint_max", "Fcm_joint_avg", "Fcm_joint_var", "Fcm_joint_entr",
                      "Fcm_diff_avg", "Fcm_diff_var", "Fcm_diff_entr",
                      "Fcm_sum_avg", "Fcm_sum_var", "Fcm_sum_entr",
                      "Fcm_energy", "Fcm_contrast", "Fcm_dissimilarity",
                      "Fcm_inv_diff", "Fcm_inv_diff_norm", "Fcm_inv_diff_mom",
                      "Fcm_inv_diff_mom_norm", "Fcm_inv_var", "Fcm_corr",
                      "Fcm_auto_corr", "Fcm_clust_tend", "Fcm_clust_shade",
                      "Fcm_clust_prom", "Fcm_info_corr1", "Fcm_info_corr2"]

        df_feat = pd.DataFrame(np.full(shape=(1, len(feat_names)), fill_value=np.nan))
        df_feat.columns = feat_names

        # Don't return data for empty slices or slices without a good matrix
        if self.matrix is None:
            # Update names
            #df_feat.columns += self._parse_names()
            return df_feat
        elif len(self.matrix) == 0:
            # Update names
            #df_feat.columns += self._parse_names()
            return df_feat

        df_pij, df_pi, df_pj, df_pimj, df_pipj, n_g = self.get_cm_data(intensity_range)

        ###############################################
        ######           glcm features           ######
        ###############################################
        # Joint maximum
        df_feat.loc[0, "Fcm_joint_max"] = np.max(df_pij.pij)

        # Joint average
        df_feat.loc[0, "Fcm_joint_avg"] = np.sum(df_pij.i * df_pij.pij)

        # Joint variance
        m_u = np.sum(df_pij.i * df_pij.pij)
        df_feat.loc[0, "Fcm_joint_var"] = np.sum((df_pij.i - m_u) ** 2.0 * df_pij.pij)

        # Joint entropy
        df_feat.loc[0, "Fcm_joint_entr"] = -np.sum(df_pij.pij * np.log2(df_pij.pij))

        # Difference average
        df_feat.loc[0, "Fcm_diff_avg"] = np.sum(df_pimj.k * df_pimj.pimj)

        # Difference variance
        m_u = np.sum(df_pimj.k * df_pimj.pimj)
        df_feat.loc[0, "Fcm_diff_var"] = np.sum((df_pimj.k - m_u) ** 2.0 * df_pimj.pimj)

        # Difference entropy
        df_feat.loc[0, "Fcm_diff_entr"] = -np.sum(df_pimj.pimj * np.log2(df_pimj.pimj))

        # Sum average
        df_feat.loc[0, "Fcm_sum_avg"] = np.sum(df_pipj.k * df_pipj.pipj)

        # Sum variance
        m_u = np.sum(df_pipj.k * df_pipj.pipj)
        df_feat.loc[0, "Fcm_sum_var"] = np.sum((df_pipj.k - m_u) ** 2.0 * df_pipj.pipj)

        # Sum entropy
        df_feat.loc[0, "Fcm_sum_entr"] = -np.sum(df_pipj.pipj * np.log2(df_pipj.pipj))

        # Angular second moment
        df_feat.loc[0, "Fcm_energy"] = np.sum(df_pij.pij ** 2.0)

        # Contrast
        df_feat.loc[0, "Fcm_contrast"] = np.sum((df_pij.i - df_pij.j) ** 2.0 * df_pij.pij)

        # Dissimilarity
        df_feat.loc[0, "Fcm_dissimilarity"] = np.sum(np.abs(df_pij.i - df_pij.j) * df_pij.pij)

        # Inverse difference
        df_feat.loc[0, "Fcm_inv_diff"] = np.sum(df_pij.pij / (1.0 + np.abs(df_pij.i - df_pij.j)))

        # Inverse difference normalised
        df_feat.loc[0, "Fcm_inv_diff_norm"] = np.sum(df_pij.pij / (1.0 + np.abs(df_pij.i - df_pij.j) / n_g))

        # Inverse difference moment
        df_feat.loc[0, "Fcm_inv_diff_mom"] = np.sum(df_pij.pij / (1.0 + (df_pij.i - df_pij.j) ** 2.0))

        # Inverse difference moment normalised
        df_feat.loc[0, "Fcm_inv_diff_mom_norm"] = np.sum(df_pij.pij / (1.0 + (df_pij.i - df_pij.j)
                                                  ** 2.0 / n_g ** 2.0))

        # Inverse variance
        df_sel = df_pij[df_pij.i != df_pij.j]
        df_feat.loc[0, "Fcm_inv_var"] = np.sum(df_sel.pij / (df_sel.i - df_sel.j) ** 2.0)
        del df_sel

        # Correlation
        mu_marg = np.sum(df_pi.i * df_pi.pi)
        var_marg = np.sum((df_pi.i - mu_marg) ** 2.0 * df_pi.pi)

        if var_marg == 0.0:
            df_feat.loc[0, "Fcm_corr"] = 1.0
        else:
            df_feat.loc[0, "Fcm_corr"] = 1.0 / var_marg * (np.sum(df_pij.i * df_pij.j * df_pij.pij) - mu_marg ** 2.0)

        del mu_marg, var_marg

        # Autocorrelation
        df_feat.loc[0, "Fcm_auto_corr"] = np.sum(df_pij.i * df_pij.j * df_pij.pij)

        # Information correlation 1
        hxy = -np.sum(df_pij.pij * np.log2(df_pij.pij))
        hxy_1 = -np.sum(df_pij.pij * np.log2(df_pij.pi * df_pij.pj))
        hx = -np.sum(df_pi.pi * np.log2(df_pi.pi))
        if len(df_pij) == 1 or hx == 0.0:
            df_feat.loc[0, "Fcm_info_corr1"] = 1.0
        else:
            df_feat.loc[0, "Fcm_info_corr1"] = (hxy - hxy_1) / hx
        del hxy, hxy_1, hx

        # Information correlation 2 - Note: iteration over combinations of i and j
        hxy = - np.sum(df_pij.pij * np.log2(df_pij.pij))
        hxy_2 = - np.sum(
                        np.tile(df_pi.pi, len(df_pj)) * np.repeat(df_pj.pj, len(df_pi)) * \
                        np.log2(np.tile(df_pi.pi, len(df_pj)) * np.repeat(df_pj.pj, len(df_pi)))
                        )

        if hxy_2 < hxy:
            df_feat.loc[0, "Fcm_info_corr2"] = 0
        else:
            df_feat.loc[0, "Fcm_info_corr2"] = np.sqrt(1 - np.exp(-2.0 * (hxy_2 - hxy)))
        del hxy, hxy_2

        # Cluster tendency
        m_u = np.sum(df_pi.i * df_pi.pi)
        df_feat.loc[0, "Fcm_clust_tend"] = np.sum((df_pij.i + df_pij.j - 2 * m_u) ** 2.0 * df_pij.pij)
        del m_u

        # Cluster shade
        m_u = np.sum(df_pi.i * df_pi.pi)
        df_feat.loc[0, "Fcm_clust_shade"] = np.sum((df_pij.i + df_pij.j - 2 * m_u) ** 3.0 * df_pij.pij)
        del m_u

        # Cluster prominence
        m_u = np.sum(df_pi.i * df_pi.pi)
        df_feat.loc[0, "Fcm_clust_prom"] = np.sum((df_pij.i + df_pij.j - 2 * m_u) ** 4.0 * df_pij.pij)

        del df_pi, df_pj, df_pij, df_pimj, df_pipj, n_g

        # Update names
        # df_feat.columns += self._parse_names()

        return df_feat



[docs]
    def _parse_names(self) -> str:
        """"Adds additional settings-related identifiers to each feature.
        Not used currently, as the use of different settings for the
        co-occurrence matrix is not supported.

        Returns:
            str: String of the features indetifier.
        """
        parse_str = ""

        # Add distance
        parse_str += "_d" + str(np.round(self.distance, 1))

        # Add spatial method
        if self.spatial_method is not None:
            parse_str += "_" + self.spatial_method

        # Add merge method
        if self.merge_method is not None:
            if self.merge_method == "average":
                parse_str += "_avg"
            if self.merge_method == "slice_merge":
                parse_str += "_s_mrg"
            if self.merge_method == "dir_merge":
                parse_str += "_d_mrg"
            if self.merge_method == "vol_merge":
                parse_str += "_v_mrg"

        return parse_str