Source code for MEDiml.utils.create_radiomics_table

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import logging
import random
from json import load
from pathlib import Path
from typing import Dict, List, Union

import numpy as np
import pandas as pd

from ..utils.get_patient_id_from_scan_name import get_patient_id_from_scan_name
from ..utils.initialize_features_names import initialize_features_names



[docs]
def create_radiomics_table(radiomics_files_paths: List, image_space: str, log_file: Union[str, Path]) -> Dict:
    """
    Creates a dictionary with a csv and other information

    Args:
        radiomics_files_paths(List): List of paths to the radiomics JSON files.
        image_space(str): String of the image space that contains the extracted features
        log_file(Union[str, Path]): Path to logging file.

    Returns:
        Dict: Dictionary containing the extracted radiomics and other info (patientID, feature names...)
    """
    if log_file:
        # Setting up logging settings
        logging.basicConfig(filename=log_file, level=logging.DEBUG)
    
    # INITIALIZATIONS OF RADIOMICS STRUCTURES
    n_files = len(radiomics_files_paths)
    patientID = [0] * n_files
    rad_structs = [0] * n_files
    file_open = [False] * n_files

    for f in range(n_files):
        with open(radiomics_files_paths[f], "r") as fp: 
            radStruct = load(fp)
        rad_structs[f] = radStruct
        file_open[f] = True
        patientID[f] = get_patient_id_from_scan_name(radiomics_files_paths[f].stem)

    # INITIALIZE FEATURE NAMES
    logging.info(f"\nnFiles: {n_files}")
    non_text_cell = []
    text_cell = []
    while len(non_text_cell) == 0 and len(text_cell) == 0:
        try:
            rand_patient = np.floor(n_files * random.uniform(0, 1)).astype(int)
            with open(radiomics_files_paths[rand_patient], "r") as fp: 
                radiomics_struct = load(fp)

            # IMAGE SPACE STRUCTURE --> .morph, .locInt, ...,  .texture
            image_space_struct = radiomics_struct[image_space]
            non_text_cell, text_cell = initialize_features_names(image_space_struct)
        except:
            pass

    # CREATE TABLE DATA
    features_name_dict = {}
    str_table = ''
    str_names = '||'
    count_var = 0

    # Non-texture features
    for im_type in range(len(non_text_cell[0])):
        for param in range(len(non_text_cell[2][im_type])):
            for feat in range(len(non_text_cell[1][im_type])):
                count_var = count_var + 1
                feature_name = 'radVar' + str(count_var)
                features_name_dict.update({feature_name: [0] * n_files})
                real_name_feature = non_text_cell[0][im_type] + '__' + \
                    non_text_cell[1][im_type][feat] + '__' + \
                    non_text_cell[2][im_type][param]
                str_table = str_table + feature_name + ','
                str_names = str_names + feature_name + ':' + real_name_feature + '||'

                for f in range(n_files):
                    if file_open[f]:
                        try:
                            val = rad_structs[f][image_space][
                                non_text_cell[0][im_type]][
                                non_text_cell[2][im_type][param]][
                                non_text_cell[1][im_type][feat]]
                        except:
                            val = np.NaN
                        if type(val) in [str, list]:
                            val = np.NaN
                    else:
                        val = np.NaN
                    features_name_dict[feature_name][f] = val

    # Texture features
    for im_type in range(len(text_cell[0])):
        for param in range(len(text_cell[2][im_type])):
            for feat in range(len(text_cell[1][im_type])):
                count_var = count_var + 1
                feature_name = 'radVar' + str(count_var)
                features_name_dict.update({feature_name: [0] * n_files})
                real_name_feature = text_cell[0][im_type] + '__' + \
                    text_cell[1][im_type][feat] + '__' + \
                    text_cell[2][im_type][param]
                str_table = str_table + feature_name + ','
                str_names = str_names + feature_name + ':' + real_name_feature + '||'
                for f in range(n_files):
                    if file_open[f]:
                        try:
                            val = rad_structs[f][image_space]['texture'][
                                text_cell[0][im_type]][
                                text_cell[2][im_type][param]][
                                text_cell[1][im_type][feat]]
                        except:
                            val = np.NaN
                        if type(val) in [str, list]:
                            val = np.NaN
                    else:
                        val = np.NaN
                    features_name_dict[feature_name][f] = val

    radiomics_table_dict = {
        'Table': pd.DataFrame(features_name_dict, index=patientID),
        'Properties': {'UserData': str_names,
                       'RowNames': patientID,
                       'DimensionNames': ['PatientID', 'Variables'],
                       'VariableNames': [key for key in features_name_dict.keys()]
                       }}

    return radiomics_table_dict