Source code for gpsea.preprocessing._patient

import abc

import typing

from stairval.notepad import Notepad

from gpsea.model import Patient, Cohort

T = typing.TypeVar('T')
"""
The input for `PatientCreator`.

It can be any object that contains the patient data (e.g. a phenopacket).
"""


[docs] class PatientCreator(typing.Generic[T], metaclass=abc.ABCMeta): """ `PatientCreator` can create a `Patient` from some input `T`. """
[docs] @abc.abstractmethod def process( self, item: T, notepad: Notepad, ) -> typing.Optional[Patient]: pass
[docs] class CohortCreator(typing.Generic[T]): """ `CohortCreator` creates a cohort from an iterable of some `T` where `T` represents a cohort member. """ def __init__( self, patient_creator: PatientCreator[T], ): # Check that we're getting a `PatientCreator`. # Unfortunately, we cannot check that `T`s of `PatientCreator` and `CohortCreator` actually match # due to Python's loosey-goosey nature. assert isinstance(patient_creator, PatientCreator) self._pc = patient_creator
[docs] def process( self, inputs: typing.Iterable[T], notepad: Notepad, ) -> Cohort: patients = [] patient_labels = set() duplicate_pat_labels = set() for i, pp in enumerate(inputs): sub = notepad.add_subsection(f'patient #{i}') patient = self._pc.process(pp, sub) if patient is not None: if patient.labels in patient_labels: duplicate_pat_labels.add(patient.labels) patient_labels.add(patient.labels) patients.append(patient) # What happens if a sample has if len(duplicate_pat_labels) > 0: label_summaries = [d.label_summary() for d in duplicate_pat_labels] label_summaries.sort() notepad.add_error( f"Patient ID/s {', '.join(label_summaries)} have a duplicate", "Please verify every patient has an unique ID.", ) # We should have >1 patients in the cohort, right? if len(patients) <= 1: notepad.add_error( f'Cohort must include {len(patients)}>1 members', 'Fix issues in patients to enable the analysis', ) return Cohort.from_patients(patients)