Source code for ecgprocess.process_dicom

"""
A module for extracting metadata, median beats, and raw waveforms from ECG
DICOM files.

This module provides an API through a reader class, which maps ECG data from
DICOM files to class attributes. These attributes can be programmatically accessed
and further processed by downstream ECGprocess modules or external programs
leveraging the API.
"""

# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# imports
import pathlib
import pydicom
import numpy as np
import ecgprocess.utils.ecg_tools as ecg_utils
import ecgprocess.utils.reader_tools as reader_utils
from ecgprocess.process_xml import ECGXMLReader
from dataclasses import dataclass, field
from typing import (
    Self, Any, Optional
)
from ecgprocess.errors import (
    is_type,
    _check_readable,
)
from ecgprocess.utils.general import(
    parse_number,
)
from ecgprocess.constants import (
    ProcessDicomNames as PDNames,
    CoreData as Core,
)
from ecgprocess.utils.general import(
    ManagedProperty,
)
from ecgprocess.utils.config_tools import(
    ConfigParser,
)

# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# constants
CTypes = Core.DataTypes
CProc = Core.ProcessingData
CMeta = Core.MetaData
CLeads = Core.Leads

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

[docs]
@dataclass
class ECGDICOMReader(ECGXMLReader):
    """
    Processes an DICOM file containing ECG data and extracts the metadata,
    median beats, and raw waveforms.
    
    Parameters
    ----------
    augment_leads : `bool`, default `False`
        Whether the augmented leads should be calculated if these are not
        already available in the source file.
    resample_500 : `bool`, default `True`
        Whether to resample the ECG to a frequency of 500 Hertz.
    
    Attributes
    ----------
    augment_leads : `bool`
        Whether the augmented leads were calculated if these were unavailable.
    resample_500 : `bool`
        Whether the ECG was resampled to a 500 Hertz frequency.
    
    Methods
    -------
    extract(config, skip_empty, parse_numeric, **kwargs)
        Processes the DICOM file content applying optional lead augmentation
        and resampling. The DICOM content will be mapped to class attributes.
    
    """
    # #### properties
    tags = ManagedProperty(CProc.TAGS, list)
    raw_data = ManagedProperty(CProc.RAW, dict)
    _as_array:bool = True
    # #### parameters, with defaults
    augment_leads:bool = field(default=False)
    resample_500:bool = field(default=True)
    # \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
    def __post_init__(self):
        """Validating inputs."""
        is_type(getattr(self, PDNames.AUG_LEADS), bool)
        is_type(getattr(self, PDNames.RESAMPLE), bool)
    # \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\

[docs]
    def __call__(self, path:str, verbose:bool=False,
                 **kwargs:Optional[Any],
                 ) -> Self:
        """
        Reads an `.dcm` file containing ECG readings.
        
        Parameters
        ----------
        path : `str`
            The path to a .dcm file.
        verbose : `bool`, default `False`
            Whether warnings and process info should be printed.
        **kwargs : any
            keyword arguments passed to flatten_dict.
        
        Attributes
        ----------
        tags : `list` [`str`]
            A list of strings with parsed tags matching the `raw_data` keys.
        raw_data : `dict` [`str`, `any`]
            The raw parsed data.
        
        Returns
        -------
        self : `ECGDICOMReader` instance
            Returns the class instance with updated attributes including the
            extracted DICOM data.
        """
        # #### check input
        is_type(path, (pathlib.PosixPath, pathlib.WindowsPath, str))
        is_type(verbose, bool)
        # #### assign to self
        self.verbose = verbose
        # #### confirm file is readable
        _check_readable(path)
        # #### read dicom file
        try:
            ds = pydicom.dcmread(path)
        except pydicom.errors.InvalidDicomError as e:
            raise ValueError(f"Failed to read DICOM file at {path}: {e}")
        # map to flatten_dict
        dicom_dict = reader_utils.flatten_dict(
            reader_utils.dicom_to_dict(ds),
            skip_root=False,
            **kwargs,
        )
        # ### store keys and data
        getattr(type(self), CProc.RAW).set_with_setter(self, dicom_dict)
        getattr(type(self), CProc.TAGS).set_with_setter(
            self, list(dicom_dict.keys()))
        # ### return
        return self

    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

[docs]
    def extract(self, config:ConfigParser, bits:np.dtype | None =None,
                skip_empty:bool=True, parse_numeric:bool=True,
                pattern:dict[str, str]|None=None,
                substitute:tuple[str,str]|None=(r'_[0-9]{1,2}\.*', ' '),
                character_trim:int=0,
                **kwargs:Optional[Any]) -> Self:
        """
        Processes the raw ECG data and assign these to class attributes
        performing resampling and lead augmentation if requested.
        
        Parameters
        ----------
        config : `ConfigParser`
            A class instance of a parsed configuration file, mapping the DICOM
            content to class attributes. Specifically this should include
            dictionary attributes `MetaData`, `WaveForms`, `MedianBeats`,
            `OtherData`. The `MetaData` includes some privileged keys including
            essential information to describe an ECG instance, as well as
            non-privileged  information. The difference between `OtherData` and
            `MetaData` is the way it is processed by other functions or methods
            with the `OtherData` processed without strong checks on its content.
            `WaveForms` and `MedianBeats` simply include the lead mappings.
            Please refer to the `constants.CoreData` class for the specifics
        parse_numeric : bool, default `True`
            Whether to check for numeric data accidentally recorded as string and
            try to parse these to int or float depending on the presence of a
            decimal separator.
        skip_empty : `bool`, default `True`
            Whether empty tags should be skipped or throw an error.
        bits : `np.dtype`, default `None`
            np.array bits passed to numpy.array dtype.
        pattern : `dict` [`str`, `str`], default `NoneType`
            Use this to extract a subset of items from `MetaData` based on the
            pattern key, and adds a unique name as a prefix to the keys of the
            selected subset. The unique name will be base on the value from the
            key which matches the pattern value.
        substitute : `tuple` [`str`,`str`] or `None`, default `(r"_[0-9]{1,2}\\.*", " ")`
            A tuple containing a regular expression pattern and replacement string.
            This substitution is applied to the remaining portion of the `data` key
            after removing the matching prefix.
        character_trim : int, default `0`
            The number of characters which should be removed from the right-hand
            side of the `data` key which did not match the `pattern` key.
        **kwargs
            The keyword arguments for reader_tools.get_ecg_data.
            For the waveforms and medianbeats as_array and bits are hard coded
            so these will raise an error if supplied as kwargs.
        
        Attributes
        ----------
        MetaData : `dict` [`str`, `any`]
            ECG metadata.
        Waveforms : `dict` [`str`, `np.array`]
            The lead specific ECG waveforms.
        MedianBeats : `dict` [`str`, `np.array`]
            The lead specific ECG median beats.
        OtherData : `dict` [`str`, `any`]
            Other data.
        
        Returns
        -------
        self : `ECGDICOMReader` instance
            Returns the class instance with updated attributes including the
            extracted DICOM data.
        """
        # #### run ECGXMLReader.extract
        super().extract(config=config, skip_empty=skip_empty,
                        parse_numeric=parse_numeric, **kwargs)
        # #### extract based on a string pattern
        if not pattern is None:
            new_meta = reader_utils.subset_dict(
                data=getattr(self, CProc.RAW), pattern=pattern,
                substitute=substitute, verbose=self.verbose,
                skip_empty=skip_empty,
                character_trim=character_trim)
            # make sure everything is af loat add perform optional mapping
            # to numeric
            new_meta = {k:str(v) for k, v in new_meta.items()}
            if parse_numeric:
                for k, v in new_meta.items():
                    try:
                        new_meta[k] = parse_number(v)
                        if isinstance(new_meta[k], list):
                            if len(new_meta[k]) == 1:
                                new_meta[k] = new_meta[k][0]
                    except ValueError:
                        new_meta[k] = v
            # assign to meta
            getattr(self, CTypes.MetaData).update(new_meta)
        # #### return
        return self