Source code for ecgprocess.process_dicom
"""
A module for extracting metadata, median beats, and raw waveforms from ECG
DICOM files.
This module provides an API through a reader class, which maps ECG data from
DICOM files to class attributes. These attributes can be programmatically accessed
and further processed by downstream ECGprocess modules or external programs
leveraging the API.
"""
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# imports
import pathlib
import pydicom
import numpy as np
import ecgprocess.utils.ecg_tools as ecg_utils
import ecgprocess.utils.reader_tools as reader_utils
from ecgprocess.process_xml import ECGXMLReader
from dataclasses import dataclass, field
from typing import (
Self, Any, Optional
)
from ecgprocess.errors import (
is_type,
_check_readable,
)
from ecgprocess.utils.general import(
parse_number,
)
from ecgprocess.constants import (
ProcessDicomNames as PDNames,
CoreData as Core,
)
from ecgprocess.utils.general import(
ManagedProperty,
)
from ecgprocess.utils.config_tools import(
ConfigParser,
)
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# constants
CTypes = Core.DataTypes
CProc = Core.ProcessingData
CMeta = Core.MetaData
CLeads = Core.Leads
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
[docs]
@dataclass
class ECGDICOMReader(ECGXMLReader):
"""
Processes an DICOM file containing ECG data and extracts the metadata,
median beats, and raw waveforms.
Parameters
----------
augment_leads : `bool`, default `False`
Whether the augmented leads should be calculated if these are not
already available in the source file.
resample_500 : `bool`, default `True`
Whether to resample the ECG to a frequency of 500 Hertz.
Attributes
----------
augment_leads : `bool`
Whether the augmented leads were calculated if these were unavailable.
resample_500 : `bool`
Whether the ECG was resampled to a 500 Hertz frequency.
Methods
-------
extract(config, skip_empty, parse_numeric, **kwargs)
Processes the DICOM file content applying optional lead augmentation
and resampling. The DICOM content will be mapped to class attributes.
"""
# #### properties
tags = ManagedProperty(CProc.TAGS, list)
raw_data = ManagedProperty(CProc.RAW, dict)
_as_array:bool = True
# #### parameters, with defaults
augment_leads:bool = field(default=False)
resample_500:bool = field(default=True)
# \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
def __post_init__(self):
"""Validating inputs."""
is_type(getattr(self, PDNames.AUG_LEADS), bool)
is_type(getattr(self, PDNames.RESAMPLE), bool)
# \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
[docs]
def __call__(self, path:str, verbose:bool=False,
**kwargs:Optional[Any],
) -> Self:
"""
Reads an `.dcm` file containing ECG readings.
Parameters
----------
path : `str`
The path to a .dcm file.
verbose : `bool`, default `False`
Whether warnings and process info should be printed.
**kwargs : any
keyword arguments passed to flatten_dict.
Attributes
----------
tags : `list` [`str`]
A list of strings with parsed tags matching the `raw_data` keys.
raw_data : `dict` [`str`, `any`]
The raw parsed data.
Returns
-------
self : `ECGDICOMReader` instance
Returns the class instance with updated attributes including the
extracted DICOM data.
"""
# #### check input
is_type(path, (pathlib.PosixPath, pathlib.WindowsPath, str))
is_type(verbose, bool)
# #### assign to self
self.verbose = verbose
# #### confirm file is readable
_check_readable(path)
# #### read dicom file
try:
ds = pydicom.dcmread(path)
except pydicom.errors.InvalidDicomError as e:
raise ValueError(f"Failed to read DICOM file at {path}: {e}")
# map to flatten_dict
dicom_dict = reader_utils.flatten_dict(
reader_utils.dicom_to_dict(ds),
skip_root=False,
**kwargs,
)
# ### store keys and data
getattr(type(self), CProc.RAW).set_with_setter(self, dicom_dict)
getattr(type(self), CProc.TAGS).set_with_setter(
self, list(dicom_dict.keys()))
# ### return
return self
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
[docs]
def extract(self, config:ConfigParser, bits:np.dtype | None =None,
skip_empty:bool=True, parse_numeric:bool=True,
pattern:dict[str, str]|None=None,
substitute:tuple[str,str]|None=(r'_[0-9]{1,2}\.*', ' '),
character_trim:int=0,
**kwargs:Optional[Any]) -> Self:
"""
Processes the raw ECG data and assign these to class attributes
performing resampling and lead augmentation if requested.
Parameters
----------
config : `ConfigParser`
A class instance of a parsed configuration file, mapping the DICOM
content to class attributes. Specifically this should include
dictionary attributes `MetaData`, `WaveForms`, `MedianBeats`,
`OtherData`. The `MetaData` includes some privileged keys including
essential information to describe an ECG instance, as well as
non-privileged information. The difference between `OtherData` and
`MetaData` is the way it is processed by other functions or methods
with the `OtherData` processed without strong checks on its content.
`WaveForms` and `MedianBeats` simply include the lead mappings.
Please refer to the `constants.CoreData` class for the specifics
parse_numeric : bool, default `True`
Whether to check for numeric data accidentally recorded as string and
try to parse these to int or float depending on the presence of a
decimal separator.
skip_empty : `bool`, default `True`
Whether empty tags should be skipped or throw an error.
bits : `np.dtype`, default `None`
np.array bits passed to numpy.array dtype.
pattern : `dict` [`str`, `str`], default `NoneType`
Use this to extract a subset of items from `MetaData` based on the
pattern key, and adds a unique name as a prefix to the keys of the
selected subset. The unique name will be base on the value from the
key which matches the pattern value.
substitute : `tuple` [`str`,`str`] or `None`, default `(r"_[0-9]{1,2}\\.*", " ")`
A tuple containing a regular expression pattern and replacement string.
This substitution is applied to the remaining portion of the `data` key
after removing the matching prefix.
character_trim : int, default `0`
The number of characters which should be removed from the right-hand
side of the `data` key which did not match the `pattern` key.
**kwargs
The keyword arguments for reader_tools.get_ecg_data.
For the waveforms and medianbeats as_array and bits are hard coded
so these will raise an error if supplied as kwargs.
Attributes
----------
MetaData : `dict` [`str`, `any`]
ECG metadata.
Waveforms : `dict` [`str`, `np.array`]
The lead specific ECG waveforms.
MedianBeats : `dict` [`str`, `np.array`]
The lead specific ECG median beats.
OtherData : `dict` [`str`, `any`]
Other data.
Returns
-------
self : `ECGDICOMReader` instance
Returns the class instance with updated attributes including the
extracted DICOM data.
"""
# #### run ECGXMLReader.extract
super().extract(config=config, skip_empty=skip_empty,
parse_numeric=parse_numeric, **kwargs)
# #### extract based on a string pattern
if not pattern is None:
new_meta = reader_utils.subset_dict(
data=getattr(self, CProc.RAW), pattern=pattern,
substitute=substitute, verbose=self.verbose,
skip_empty=skip_empty,
character_trim=character_trim)
# make sure everything is af loat add perform optional mapping
# to numeric
new_meta = {k:str(v) for k, v in new_meta.items()}
if parse_numeric:
for k, v in new_meta.items():
try:
new_meta[k] = parse_number(v)
if isinstance(new_meta[k], list):
if len(new_meta[k]) == 1:
new_meta[k] = new_meta[k][0]
except ValueError:
new_meta[k] = v
# assign to meta
getattr(self, CTypes.MetaData).update(new_meta)
# #### return
return self