Source code for ecgprocess.process_dicom
'''
A module for extracting metadata, median beats, and raw waveforms from ECG
DICOM files.
This module provides an API through a reader class, which maps ECG data from
DICOM files to class attributes. These attributes can be programmatically accessed
and further processed by downstream ECGprocess modules or external programs
leveraging the API.
'''
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# imports
import pathlib
import pydicom
import warnings
import numpy as np
import pandas as pd
import ecgprocess.utils.ecg_tools as ecg_utils
import ecgprocess.utils.reader_tools as reader_utils
from ecgprocess.process_xml import ECGXMLReader
from dataclasses import dataclass, field
from typing import (
Self, Dict, Any, Optional
)
from ecgprocess.errors import (
is_type,
Error_MSG,
Warn_MSG,
_check_readable,
MissingTagError,
)
from ecgprocess.utils.general import(
parse_number,
string_concat,
)
from ecgprocess.constants import (
ProcessDicomNames as PDNames,
FixedReaderNames as FRNames,
CoreData as Core,
DICOMTags,
)
from ecgprocess.utils.reader_tools import(
BaseReader,
)
from ecgprocess.utils.general import(
ManagedProperty,
)
from ecgprocess.utils.config_tools import(
ConfigParser,
)
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# constants
CTypes = Core.DataTypes
CProc = Core.ProcessingData
CMeta = Core.MetaData
CLeads = Core.Leads
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
[docs]
@dataclass
class ECGDICOMReader(ECGXMLReader):
"""
Processes an DICOM file containing ECG data and extracts the metadata,
median beats, and raw waveforms.
Parameters
----------
augment_leads : `bool`, default `False`
Whether the augmented leads should be calculated if these are not
already available in the source file.
resample : `bool`, default `True`
Whether to resample the ECG to a frequency of 500 Hertz.
Attributes
----------
augment_leads : `bool`
Whether the augmented leads were calculated if these were unavailable.
resample : `bool`
Whether the ECG was resampled to a 500 Hertz frequency.
Methods
-------
extract(config, skip_empty, parse_numeric, **kwargs)
Processes the DICOM file content applying optional lead augmentation
and resampling. The DICOM content will be mapped to class attributes.
"""
# #### properties
tags = ManagedProperty(CProc.TAGS, list)
raw_data = ManagedProperty(CProc.RAW, dict)
_as_array:bool = True
# #### parameters, with defaults
augment_leads:bool = field(default=False)
resample_500:bool = field(default=True)
# \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
[docs]
def __post_init__(self):
"""Validating inputs."""
is_type(getattr(self, PDNames.AUG_LEADS), bool)
is_type(getattr(self, PDNames.RESAMPLE), bool)
# \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
[docs]
def __call__(self, path:str, verbose:bool=False,
**kwargs:Optional[Any],
) -> Self:
"""
Reads an `.dcm` file containing ECG readings.
Parameters
----------
path : `str`
The path to a .dcm file.
verbose : `bool`, default `False`
Whether warnings and process info should be printed.
**kwargs : any
keyword arguments passed to flatten_dict.
Attributes
----------
tags : `list` [`str`]
A list of strings with parsed tags matching the `raw_data` keys.
raw_data : `dict` [`str`, `any`]
The raw parsed data.
Returns
-------
self : `ECGDICOMReader` instance
Returns the class instance with updated attributes including the
extracted DICOM data.
"""
# #### check input
is_type(path, (pathlib.PosixPath, pathlib.WindowsPath, str))
is_type(verbose, bool)
# #### assign to self
self.verbose = verbose
# #### confirm file is readable
_check_readable(path)
# #### read dicom file
try:
ds = pydicom.dcmread(path)
except pydicom.errors.InvalidDicomError as e:
raise ValueError(f"Failed to read DICOM file at {path}: {e}")
# map to flatten_dict
dicom_dict = reader_utils.flatten_dict(
reader_utils.dicom_to_dict(ds),
skip_root=False,
**kwargs,
)
# ### store keys and data
getattr(type(self), CProc.RAW).set_with_setter(self, dicom_dict)
getattr(type(self), CProc.TAGS).set_with_setter(
self, list(dicom_dict.keys()))
# ### return
return self
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
[docs]
def extract(self, config:ConfigParser, bits:np.dtype | None =None,
skip_empty:bool=True, parse_numeric:bool=True,
pattern:dict[str, str]|None=None,
substitute:tuple[str,str]|None=(r'_[0-9]{1,2}\.*', ' '),
character_trim:int=0,
**kwargs:Optional[Any]) -> Self:
"""
Processes the raw ECG data and assign these to class attributes
performing resampling and lead augmentation if requested.
Parameters
----------
config : `ConfigParser`
A class instance of a parsed configuration file, mapping the DICOM
content to class attributes. Specifically this should include
dictionary attributes `MetaData`, `WaveForms`, `MedianBeats`,
`OtherData`. The `MetaData` includes some privileged keys including
essential information to describe an ECG instance, as well as
non-privileged information. The difference between `OtherData` and
`MetaData` is the way it is processed by other functions or methods
with the `OtherData` processed without strong checks on its content.
`WaveForms` and `MedianBeats` simply include the lead mappings.
Please refer to the `constants.CoreData` class for the specifics
parse_numeric : bool, default `True`
Whether to check for numeric data accidentally recorded as string and
try to parse these to int or float depending on the presence of a
decimal separator.
skip_empty : `bool`, default `True`
Whether empty tags should be skipped or throw an error.
bits : `np.dtype`, default `None`
np.array bits passed to numpy.array dtype.
pattern : `dict` [`str`, `str`], default `NoneType`
Use this to extract a subset of items from `MetaData` based on the
pattern key, and adds a unique name as a prefix to the keys of the
selected subset. The unique name will be base on the value from the
key which matches the pattern value.
substitute : `tuple` [`str`,`str`] or `None`, default `(r"_[0-9]{1,2}\\.*", " ")`
A tuple containing a regular expression pattern and replacement string.
This substitution is applied to the remaining portion of the `data` key
after removing the matching prefix.
character_trim : int, default `0`
The number of charecters which should be removed from the right-hand
side of the `data` key which did not match the `pattern` key.
**kwargs
The keyword arguments for reader_tools.get_ecg_data.
For the waveforms and medianbeats as_array and bits are hard coded
so these will raise an error if supplied as kwargs.
Attributes
----------
MetaData : `dict` [`str`, `any`]
ECG metadata.
Waveforms : `dict` [`str`, `np.array`]
The lead specific ECG waveforms.
MedianBeats : `dict` [`str`, `np.array`]
The lead specific ECG median beats.
OtherData : `dict` [`str`, `any`]
Other data.
Returns
-------
self : `ECGDICOMReader` instance
Returns the class instance with updated attributes including the
extracted DICOM data.
"""
# #### run ECGXMLReader.extract
super().extract(config=config, skip_empty=skip_empty,
parse_numeric=parse_numeric, **kwargs)
# #### extract based on a string pattern
if not pattern is None:
new_meta = reader_utils.subset_dict(
data=getattr(self, CProc.RAW), pattern=pattern,
substitute=substitute, verbose=self.verbose,
skip_empty=skip_empty,
character_trim=character_trim)
# make sure everything is af loat add perform optional mapping
# to numeric
new_meta = {k:str(v) for k, v in new_meta.items()}
if parse_numeric:
for k, v in new_meta.items():
try:
new_meta[k] = parse_number(v)
if isinstance(new_meta[k], list):
if len(new_meta[k]) == 1:
new_meta[k] = new_meta[k][0]
except ValueError:
new_meta[k] = v
# assign to meta
getattr(self, CTypes.MetaData).update(new_meta)
# #### return
return self
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
[docs]
@dataclass
class FixedDICOMReader(BaseReader):
"""
Takes an ECG DICOM file and extracts metadata, median beats (if available)
and raw waveforms.
Parameters
----------
augment_leads : `bool`, default `False`
Whether the augmented leads are available in the DICOM, if not
these are calculated.
resample : `bool`, default `True`
Whether to resample the ECG to a frequency of 500 Hertz.
retain_raw : `bool`, default `False`
Whether the raw pydicom instance and raw waveforms should be retained.
Set to `False` to decrease memory usage. Set to `True` to explore the
orignal pydicom instance. For example, use this one a few files to
identify none-standard information to extract.
Attributes
----------
augment_leads : `bool`
Whether the augmented leads were calculated if these were unavailable.
resample : `bool`
Whether the ECG was resampled to a 500 Hertz frequency.
retain_raw : `bool`
Whether the raw pydicom data was retained.
METADATA : `dict` [`str`, `str`]
A dictionary describing the metadata one wants to extract from a
DICOM. The dictionary keys represents the `target` (new) name and the
dictionary values the `source` (old) names.
ECG_TRAIT_DICT : `dict` [`str`, `list[str]`]
A dictionary with the keys reflecting the desired name of the ECG trait.
Each key will have a list of strings as a value. These strings will be
compared to the names in `WaveformAnnotationSequence` attribute.
Matching is done without case-sensitivity. If for any key there are
multiple matching strings the algorithm will check if the extracted
values are all the same, if not multiple entries will be returned for
the user to decide what to do next. The extracted ECG traits will be
included with the extracted METADATA.
Methods
-------
get_metadata(path, skip_empty)
Extract the dicom metadata.
make_leadvoltages(waveform_array, lead_info, augment_leads)
Extracts the voltages from a DICOM file. Will automatically extract the
limb leads if missing.
Notes
-----
While the type of information that is extracted by this class is relatively
extensive and can be axpanded through for example `METADATA`, the
pydicom attributes which these data can be extracted from are fixed. This
therfore provides a less flexible solution than FixedDICOMReader.
"""
warnings.warn(
"'FixedDICOMReader' will be deprecated in a future version. "
"Please use 'ECGDICOMReader' instead.",
DeprecationWarning,
stacklevel=2
)
# #### parameters, with defaults
augment_leads: bool=False
resample_500:bool=True
retain_raw:bool=False
# #### check input
is_type(augment_leads, bool, 'augment_leads')
is_type(resample_500, bool, 'resample_500')
# #### default tags
METADATA:dict = field(default_factory=lambda: DICOMTags().METADATA)
WAVE_FORMS:dict = field(default_factory=lambda: DICOMTags().WAVE_FORMS)
MEDIAN_BEATS:dict = field(default_factory=lambda: DICOMTags().MEDIAN_BEATS)
ECG_TRAIT_DICT:dict =\
field(default_factory=lambda: DICOMTags().ECG_INTERPERTATION_DICT)
# #### Error MSG
__MSG1=Error_MSG.MISSING_PATH_INSTANCE.format('dicom_instance')
# \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
[docs]
def __call__(self, path:str, skip_empty:bool=True, verbose:bool=False,
) -> Self:
"""
Read a `.dcm` DICOM file and extracts metadata, raw waveforms, and
median beats.
see `constants.DICOMTags` for the `METADATA`, `WAVE_FORMS`, and
`MEADIAN_BEATS` tags looked for.
Parameters
----------
path : `str`
The path to the .dcm file.
skip_empty : `bool`, default `True`
Whether empty tags should be skipped or throw an error.
verbose : `bool`, default `False`
Prints missing tags if skip_empty is set to `True`.
Attributes
----------
GeneralInfo : `list` [`str`]
A list of dcmread extracted attributes.
Waveforms : `dict` [`str`, `np.array`]
The lead specific ECG waveforms.
MedianWaveforms : `dict` [`str`, `np.array`]
The lead specific ECG median beats.
Returns
-------
self : `FixedDICOMReader` instance
Returns the class instance with updated attributes extracted
from `dcmread`.
"""
is_type(path, (pathlib.PosixPath, pathlib.WindowsPath, str), 'path')
is_type(skip_empty, bool, 'skip_empty')
is_type(verbose, bool, 'verbose')
self.verbose=verbose
# confirm file is readable
_check_readable(path)
# #### Read DICOM
ECG, results_dict, empty_metadata = self.get_metadata(
path, skip_empty=skip_empty)
# #### Extract waveforms
_, wave_dict, empty_wave_forms = self.get_waveforms(
dicom_instance=ECG, skip_empty=skip_empty)
results_dict.update(wave_dict)
# #### Extract Median beats
_, median_dict, empty_median_beats = self.get_median_beats(
dicom_instance=ECG, skip_empty=skip_empty)
results_dict.update(median_dict)
# #### add duration,
try:
results_dict[CProc.Duration] = (
results_dict[CMeta.SN_W] /
results_dict[CMeta.SF]
)
except TypeError:
results_dict[CProc.Duration] = None
# #### do we need to resample
results_dict[CProc.SF_NEW] = None
if self.resample_500 == True:
# confirm SF is present
if not CMeta.SF in results_dict or results_dict[CMeta.SF] is None:
raise KeyError(f'`{CMeta.SF}` is necessary to resample '
'the ECG signal.')
# if it is, check whether there is a need to resample the signals.
if int(results_dict[CMeta.SF]) != 500 and\
not results_dict[CProc.Duration] is None:
# update the waveforms
if not getattr(self, CTypes.WaveForms) is None:
wave_dict = ecg_utils.resampling_500hz(
wave_dict, duration=results_dict[CProc.Duration])
setattr(self, CTypes.WaveForms, wave_dict)
if not getattr(self, CTypes.MedianBeats) is None:
median_dict = ecg_utils.resampling_500hz(
median_dict, duration=results_dict[CProc.Duration],
median=True)
setattr(self, CTypes.MedianBeats, median_dict)
results_dict[CProc.SF_NEW] = 500
# #### Extract standard ECG measurements
_, ecg_traits, missing_ecg_traits = self._get_waveform_annotation(
dicom_instance=ECG, skip_empty=skip_empty)
results_dict.update(ecg_traits)
empty_metadata = empty_metadata + missing_ecg_traits
# #### end of extractions, optionally printing tags which were missing
if verbose == True:
if len(empty_metadata) + len(empty_wave_forms) +\
len(empty_median_beats)> 0:
warnings.warn(
'The following DICOM tags could not be found: {}.'.format(
empty_metadata + empty_wave_forms +\
empty_median_beats))
# #### assign results_dict
setattr(self, CTypes.MetaData, results_dict)
# add the original dcmread instance
if self.retain_raw == True:
setattr(self, FRNames.ORIG_DCMREAD_INST, ECG)
# #### return stuff
return self
# \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
[docs]
def get_metadata(self, path:str|None=None,
dicom_instance:pydicom.dataset.FileDataset|None=None,
skip_empty:bool=True
) -> tuple[pydicom.dataset.FileDataset, dict[str, Any],
list[str]]:
'''
Takes a dicom file and extracts its metadata
Parameters
----------
path : `str`, default `NoneType`.
The path to the .dcm file.
dicom_instance : `DCM_Class`, default `NoneType`.
A DCM_Class instance.
Returns
-------
dict,DCM_Class
- A `DCM_Class` instance.
- A dictionary with extracted metadata.
- A list of missing `DCM_Class` attribute names.
Notes
-----
Either supply a path to a dicom file or a DCM_Class instance
'''
# #### check input
is_type(path, (type(None), pathlib.PosixPath, pathlib.WindowsPath, str))
is_type(dicom_instance, (type(None), pydicom.dataset.FileDataset))
is_type(skip_empty, bool)
results_dict = {}
if (not dicom_instance is None) and (not path is None):
raise ValueError(self.__MSG1)
# #### Read DICOM
if not path is None:
with open(path, 'rb') as dicom:
# reads standard dicom content
ECG=pydicom.dcmread(dicom)
else:
ECG=dicom_instance
# #### extract metadata
empty_metadata = []
for t, s in self.METADATA.items():
if hasattr(ECG, s):
# Assign if present
results_dict[t] = getattr(ECG, s)
elif skip_empty == False:
# Should an Error be returned
raise MissingTagError(s)
else:
# assign NA and append missing metadata
results_dict[t] = np.nan
empty_metadata.append(s)
# return
return ECG, results_dict, empty_metadata
# \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
[docs]
def get_waveforms(self, path:str|None=None,
dicom_instance: pydicom.dataset.FileDataset|None=None,
skip_empty:bool=True
) -> tuple[pydicom.dataset.FileDataset,
dict[str, Any], list[str]]:
'''
Takes a dicom file and extracts the waveforms and waveform metadata.
Parameters
----------
path : `str`, default `NoneType`.
The path to the .dcm file.
dicom_instance : `DCM_Class`, default `NoneType`.
A DCM_Class instance.
Returns
-------
results : dict,DCM_Class
- A `DCM_Class` instance.
- A dictionary with extracted metadata.
- A list of missing `DCM_Class` attribute names.
Notes
-----
Either supply a path to a dicom file or a DCM_Class instance
'''
# #### check input
is_type(path, (type(None), pathlib.PosixPath, pathlib.WindowsPath, str))
is_type(dicom_instance, (type(None), pydicom.dataset.FileDataset))
is_type(skip_empty, bool)
temp_results_dict = {}
if (not dicom_instance is None) and (not path is None):
raise ValueError(self.__MSG1)
# #### constants
LEAD_UNITS = 'WaveformUnits'
WAVE_FORM_ARR = 'waveform_array'
# #### Read DICOM
# NOTE `with` closes automatically if an error is raised
if not path is None:
with open(path, 'rb') as dicom:
# reads standard dicom content
ECG=dcmread(dicom)
else:
ECG=dicom_instance
# #### extract waveforms
try:
WAVE_TEMP = getattr(ECG, WAVE_FORM_ARR)(0).T
except:
raise AttributeError(Error_MSG.MISSING_DICOM.format(WAVE_FORM_ARR))
WAVE = getattr(ECG, FRNames.WAVE_FORM_SEQ)[0]
SETTINGS = getattr(WAVE, FRNames.CHANNEL_DEF_SEQ)[0]
# #### Extract waveform metadata
empty_wave_forms = []
for t, s in self.WAVE_FORMS.items():
# if present in WAVE or SETTINGS assign
if hasattr(WAVE, s):
temp_results_dict[t] = getattr(WAVE, s)
elif hasattr(SETTINGS, s):
temp_results_dict[t] = getattr(SETTINGS, s)
elif skip_empty == False:
# Should an Error be returned
raise MissingTagError(s)
else:
# assign NA and append missing metadata
temp_results_dict[t] = np.nan
empty_wave_forms.append(s)
# #### Add the lead strings
channel_seq = getattr(getattr(ECG, FRNames.WAVE_FORM_SEQ)[0],
FRNames.CHANNEL_DEF_SEQ)
lead_info_waveform, lead_units=self._get_lead_info(channel_seq)
temp_results_dict[LEAD_UNITS] = lead_units
WAVE_LEADS = {}
for k, lead in enumerate(WAVE_TEMP):
WAVE_LEADS[lead_info_waveform[k]] = lead
del WAVE_TEMP
# #### do we want to extract the augmented leads
if getattr(self, PDNames.AUG_LEADS) == True and\
len(WAVE_LEADS) < 12:
WAVE_LEADS = ecg_utils.get_limb_leads(WAVE_LEADS)
# #### set the processed WAVE FORM data
setattr(self, CTypes.WaveForms, WAVE_LEADS)
# return
return ECG, temp_results_dict, empty_wave_forms
# \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
[docs]
def get_median_beats(self, path:str|None=None,
dicom_instance: pydicom.dataset.FileDataset|None=None,
skip_empty:bool=True
) -> tuple[pydicom.dataset.FileDataset,
dict[str, Any], list[str]]:
'''
Takes a dicom file and extracts the median beats and its metadata.
Parameters
----------
path : `str`, default `NoneType`.
The path to the .dcm file.
dicom_instance : `DCM_Class`, default `NoneType`.
A DCM_Class instance.
Returns
-------
results : dict,DCM_Class
- A `DCM_Class` instance.
- A dictionary with extracted metadata.
- A list of missing `DCM_Class` attribute names.
Notes
-----
Either supply a path to a dicom file or a DCM_Class instance
'''
# #### check input
is_type(path, (type(None), pathlib.PosixPath, pathlib.WindowsPath, str))
is_type(dicom_instance, (type(None), pydicom.dataset.FileDataset))
is_type(skip_empty, bool)
temp_results_dict = {}
if (not dicom_instance is None) and (not path is None):
raise ValueError(self.__MSG1)
# #### constants
WAVE_FORM_ARR = 'waveform_array'
LEAD_UNITS2 = 'MedianWaveformUnits'
# #### Read DICOM
# NOTE `with` closes automatically if an error is raised
if not path is None:
with open(path, 'rb') as dicom:
# reads standard dicom content
ECG=pydicom.dcmread(dicom)
else:
ECG=dicom_instance
# ##### extract the median beats data
# check if the wave form is present and if the index is correct
# the median beats should be index 1 (starting at 0)
temp_results_dict = {k: None for k in self.MEDIAN_BEATS}
empty_median_beats = list(self.MEDIAN_BEATS.keys())
sccss = True
try:
TEMP_MEDIAN = getattr(ECG, WAVE_FORM_ARR)(1).T
except:
if skip_empty == True:
setattr(self, CTypes.MedianBeats, None)
sccss = False
pass
else:
raise AttributeError(
Error_MSG.MISSING_DICOM.format(WAVE_FORM_ARR))
# ##### getting WaveformSequence and ChannelDefinitionSequence attributes
if sccss == True:
WAVE_M = getattr(ECG, FRNames.WAVE_FORM_SEQ)[1]
SETTINGS_M = getattr(WAVE_M, FRNames.CHANNEL_DEF_SEQ)[1]
for t, s in self.MEDIAN_BEATS.items():
# if present in WAVE or SETTINGS assign
if hasattr(WAVE_M, s):
temp_results_dict[t] = getattr(WAVE_M, s)
empty_median_beats.remove(t)
elif hasattr(SETTINGS_M, s):
temp_results_dict[t] = getattr(SETTINGS_M, s)
empty_median_beats.remove(t)
elif skip_empty == False:
# Should an Error be returned
raise MissingTagError(s)
else:
# the dict and list have already been pre-populated
pass
# #### Add the lead strings
channel_seq_median=getattr(getattr(ECG, FRNames.WAVE_FORM_SEQ)[0],
FRNames.CHANNEL_DEF_SEQ)
lead_info_waveform, lead_units2=\
self._get_lead_info(channel_seq_median)
temp_results_dict[LEAD_UNITS2] = lead_units2
MEDIAN_LEAD = {}
for k, lead in enumerate(TEMP_MEDIAN):
MEDIAN_LEAD[lead_info_waveform[k]] = lead
del TEMP_MEDIAN
# #### do we want to extract the augmented leads
if getattr(self, PDNames.AUG_LEADS) == True and\
len(MEDIAN_LEAD) < 12:
MEDIAN_LEAD = ecg_utils.get_limb_leads(MEDIAN_LEAD)
# #### set median beats
setattr(self, CTypes.MedianBeats, MEDIAN_LEAD)
# #### return
return ECG, temp_results_dict, empty_median_beats
# \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
def _get_waveform_annotation(
self, path:str|None=None,
dicom_instance: pydicom.dataset.FileDataset|None=None,
skip_empty:bool=True,) -> tuple[pydicom.dataset.FileDataset,
dict[str, Any], list[str]]:
'''
Extract information from the `WaveformAnnotationSequence` attribute
of an dicom file.
Parameters
----------
path : `str`, default `NoneType`.
The path to the .dcm file.
dicom_instance : `DCM_Class`, default `NoneType`.
A DCM_Class instance.
Returns
-------
results : dict,DCM_Class
- A `DCM_Class` instance.
- A dictionary with the extracted data.
- A list of missing `DCM_Class` attribute names.
Notes
-----
Either supply a path to a dicom file or a DCM_Class instance
'''
# #### check input
is_type(path, (type(None), pathlib.PosixPath, pathlib.WindowsPath, str))
is_type(dicom_instance, (type(None), pydicom.dataset.FileDataset))
is_type(skip_empty, bool)
default_results_dict = {}
temp_results_dict = {}
temp_unit_dict = {}
if (not dicom_instance is None) and (not path is None):
raise ValueError(self.__MSG1)
# #### constants
ECG_UNIT_STRING = ' UNIT'
PACEMAKER_SPIKE = 'Pacemaker Spike'
ECG_INTERPERTATION = 'WaveformAnnotationSequence'
ECG_CONCEPTNAME = 'ConceptNameCodeSequence'
CODE_MEANING = 'CodeMeaning'
ECG_UNIT = 'MeasurementUnitsCodeSequence'
ECG_TRAIT_VALUE = 'NumericValue'
REFERENCED_POS = 'ReferencedSamplePositions'
FREE_TEXT = 'UnformattedTextValue'
# #### Read DICOM
# NOTE `with` closes automatically if an error is raised
if not path is None:
with open(path, 'rb') as dicom:
# reads standard dicom content
ECG=pydicom.dcmread(dicom)
else:
ECG=dicom_instance
# first set everything to NA
for e in self.ECG_TRAIT_DICT:
default_results_dict[e] = np.nan
default_results_dict[e+ ECG_UNIT_STRING] = np.nan
default_results_dict[FREE_TEXT] = None
default_results_dict[PACEMAKER_SPIKE]=np.nan
# next see if we can extract some interpretations.
if hasattr(ECG, ECG_INTERPERTATION):
free_text = ''
# get the values lower-case strings (which are lists)
ECG_CMPR_LWR = [it.lower() for sl\
in self.ECG_TRAIT_DICT.values() for it in sl]
for w in getattr(ECG, ECG_INTERPERTATION):
# # set annot_count to -1 if not there
# # ANNOT GROUP of > 0 represents annotations (1), arrythmia
# # markers (2), or pacemaker spikes (3).
# # group 0 will be the free text.
# annot_count = getattr(w, PDNames.ANNOTATION_GROUP, -1)
# if hasattr(w, PDNames.ECG_CONCEPTNAME) and annot_count > 0:
if hasattr(w, ECG_CONCEPTNAME):
ecg_int = getattr(w, ECG_CONCEPTNAME)[0]
try:
ecg_trait = getattr(ecg_int, CODE_MEANING)
# find matching element - using lower case again
if ecg_trait.lower() in ECG_CMPR_LWR:
# get the measurement and set to float, will
# skip float conversion if None type.
try:
temp_results_dict[ecg_trait.lower()] = float(
getattr(w, ECG_TRAIT_VALUE)
)
except TypeError:
temp_results_dict[ecg_trait.lower()] = \
getattr(w, ECG_TRAIT_VALUE)
# see if we need to warn
if self.verbose == True:
warnings.warn(Warn_MSG.NUMB_IS_NONE.\
format(ecg_trait.lower()))
# get the unit
try:
temp_unit_dict[
ecg_trait.lower()+\
ECG_UNIT_STRING]=getattr(
getattr(w, ECG_UNIT)[0],
CODE_MEANING)
except (AttributeError, IndexError):
pass
# #### ancillary info
# see if there is a PaceMakerSpike
if ecg_trait.lower() == PACEMAKER_SPIKE.lower():
try:
v = default_results_dict[
PACEMAKER_SPIKE]
nv = str(getattr(w, REFERENCED_POS))
default_results_dict[
PACEMAKER_SPIKE] = string_concat(
v, nv, sep=', ')
del v, nv
except AttributeError:
pass
except AttributeError:
pass
# get free text
# if annot_count == 0:
if hasattr(w, FREE_TEXT):
try:
v = default_results_dict[FREE_TEXT]
nv = getattr(w, FREE_TEXT)
default_results_dict[FREE_TEXT] =\
string_concat(old=v, new=nv, sep='\n')
del v, nv
except AttributeError:
pass
# now assign temp_results_dict todefault_results_dict dealing with
# keys with more than one matching string.
if len(temp_results_dict) > 0:
for k, idx in self.ECG_TRAIT_DICT.items():
# Check if idx has more than one entry
# extract all entries and make sure they are the
# same - NOTE using set comprehension to get the
# unique elements
unique_set = list({temp_results_dict[el.lower()] for el in\
idx if el.lower() in temp_results_dict})
if len(unique_set) == 1:
# if only one unique entry simply assign this to k
for e in idx:
try:
default_results_dict[k] =\
temp_results_dict[e.lower()]
default_results_dict[k+ECG_UNIT_STRING] =\
temp_unit_dict[
e.lower()+ECG_UNIT_STRING]
except KeyError:
# NOTE the KeyError is expected behaviour,
# some of the `e`'s will not be in the
# temp_results_dict
pass
elif len(unique_set) > 1:
# given that the results are not unique
# we will return all using the individual `idx` elements
# instead of `k` - NOTE the index without call to lower
# is intended.
for e in idx:
try:
default_results_dict[e] =\
temp_results_dict[e.lower()]
default_results_dict[e+ECG_UNIT_STRING] =\
temp_unit_dict[
e.lower()+ECG_UNIT_STRING]
except KeyError:
pass
# which ECG traits are still nan
missing_ecg_traits =\
[k for k,v in default_results_dict.items() if pd.isna(v)]
if skip_empty == False and len(missing_ecg_traits) > 0:
# Should an Error be returned
raise ValueError('The following ECG measurments are '
'unavailable: {}'.format(missing_ecg_traits))
# return
return ECG, default_results_dict, missing_ecg_traits
# \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
def _get_lead_info(self, channel_seq:pydicom.dataset.FileDataset,
) -> tuple[dict[int, str], str]:
"""
Extracts the lead names from a `dcmread` instance.
Parameters
----------
channel_seq : `DCM_Class`
A `pydicom.sequence.Sequence` instance.
Returns
-------
`tuple` ['dict', `str`]:
- leadnames : dict [`int`, `str`]
A dictionary with numerical (interval) keys and the lead names
as values.
- leadunit : str
The measurment unit of the ECG leads.
"""
# #### extracting the lead names
leadnames = {}
leadunits = {}
# #### constants
CHANNEL_SOURCE_SEQ = 'ChannelSourceSequence'
CHANNEL_CODE_MEANING = 'CodeMeaning'
CHANNEL_SENS_UNIT = 'ChannelSensitivityUnitsSequence'
CODE_MEANING = 'CodeMeaning'
LEAD = 'Lead'
for k, channel in enumerate(channel_seq):
# extracting lead names
source = getattr(getattr(channel,CHANNEL_SOURCE_SEQ)[0],
CHANNEL_CODE_MEANING)
# extracting units
if hasattr(channel, CHANNEL_SENS_UNIT):
unit = getattr(
getattr(channel, CHANNEL_SENS_UNIT)[0], CODE_MEANING)
else:
unit = np.nan
# assign lead names to numericals
if k == 'I (Einthoven)':
k = CLeads.I
leadnames[k] = source.replace(LEAD, '').strip()
leadunits[leadnames[k]] = unit
# confirm the units are all the same
unique_unit = list(set(leadunits.values()))
if len(unique_unit) != 1:
raise ValueError('The ECG leads were measured using different '
'units: `{}`.'.format(unique_unit))
# return stuff
return leadnames, unique_unit[0]