This class calculates the age of onset anntoations for the HPOA file
Source code in pyphetools/visualization/onset_calculator.py
| class OnsetCalculator:
"""
This class calculates the age of onset anntoations for the HPOA file
"""
def __init__(self, phenopacket_list):
if not isinstance(phenopacket_list, list):
raise ValueError(f"Malformed individual_list argument -- needs to be list but was {type(phenopacket_list)} ")
self._pmid_to_onsetlist_d = defaultdict(list)
for ppack in phenopacket_list:
mdata = ppack.meta_data
pmid = None
if len(mdata.external_references) == 1:
eref = mdata.external_references[0]
pmid = eref.id
else:
print("Warning: Could not identify pmid")
if len(ppack.diseases) == 0:
print("Warning: Could not identify disease element")
elif len(ppack.diseases) > 1:
print("Warning: Identified multiple disease element")
disease = ppack.diseases[0]
if disease.HasField("onset"):
# onset is a GA4GH TimeElement
# In pyphetools, it can be an OntologyClass, an Age, or a GestationalAge
onset = disease.onset
if onset.HasField("ontology_class"):
onset_term = onset.ontology_class
hpo_onset_term = HpTerm(hpo_id=onset_term.id, label=onset_term.label)
self._pmid_to_onsetlist_d[pmid].append(hpo_onset_term)
elif onset.HasField("age"):
hpo_onset_term = self._get_hpo_onset_term_from_iso8601(onset.age.iso8601duration)
self._pmid_to_onsetlist_d[pmid].append(hpo_onset_term)
elif onset.HasField("gestational_age"):
hpo_onset_term = self._get_hpo_onset_term_from_gestational_age(onset.age.iso8601duration)
self._pmid_to_onsetlist_d[pmid].append(hpo_onset_term)
else:
raise ValueError(f"onset was present but could not be decoded: {onset}")
def _get_hpo_onset_term_from_iso8601(self, isostring):
# the following regex gets years, months, days - optionally (when we get to this point in pyphetools, we cannot have weeks)
ISO8601_REGEX = r"^P(\d+Y)?(\d+M)?(\d+D)?"
match = re.search(ISO8601_REGEX, isostring)
if match:
y = match.group(1) or "0Y"
m = match.group(2) or "0M"
d = match.group(3) or "0D"
y = int(y[0:-1]) # all but last character
m = int(m[0:-1])
d = int(d[0:-1])
label = None
if y >= 60:
label = "Late onset"
elif y >= 40:
label = "Middle age onset"
elif y >= 16:
label = "Young adult onset"
elif y >= 5:
label = "Juvenile onset"
elif y >= 1:
label = "Childhood onset"
elif m >= 1:
label = "Infantile onset"
elif d >= 1:
label = "Neonatal onset"
elif d == 0:
label = "Congenital onset"
else:
raise ValueError(f"[ERROR] Could not parse iso8601 \"{isostring}\"")
if label not in HPO_ONSET_TERMS:
# should never happen ...
raise ValueError(f"Could not identify onset label {label}")
hpo_id = HPO_ONSET_TERMS.get(label)
return HpTerm(hpo_id=hpo_id, label=label)
def _get_hpo_onset_term_from_gestational_age(self, gestational_age):
weeks = gestational_age.weeks
# days not relevant to identifying the HPO Onset term
label = None
if weeks >= 28:
# prior to birth during the third trimester, which is defined as 28 weeks and zero days (28+0) of gestation and beyond.
label = "Third trimester onset" # HP:0034197
elif weeks >= 14:
# prior to birth during the second trimester, which comprises the range of gestational ages from 14 0/7 weeks to 27 6/7 (inclusive).
label = "Second trimester onset" # HP:0034198
elif weeks >= 11:
# 11 0/7 to 13 6/7 weeks of gestation (inclusive).
label = "Late first trimester onset" # HP:0034199
else:
label = "Embryonal onset"
if label not in HPO_ONSET_TERMS:
# should never happen ...
raise ValueError(f"Could not identify onset label {label}")
hpo_id = HPO_ONSET_TERMS.get(label)
return HpTerm(hpo_id=hpo_id, label=label)
def get_pmid_to_onset_d(self)-> Dict[str, List[HpTerm]]:
return self._pmid_to_onsetlist_d
|