Source code for gpsea.analysis.pscore._hpo

import typing

import hpotk

from gpsea.model import Patient
from ._api import PhenotypeScorer

"""
A module with HPO-driven phenotype scores. The score generally work with the phenotypes of an individual
"""



[docs]
class CountingPhenotypeScorer(PhenotypeScorer):
    """
    `CountingPhenotypeScorer` assigns the patient with a phenotype score
    that is equivalent to the count of observed phenotypes that are either
    an exact match to the `query` terms or their descendants.

    For instance, we may want to count whether an individual has brain, liver, kidney, and skin abnormalities.
    In the case, the query would include the corresponding terms (e.g., Abnormal brain morphology HP:0012443).
    An individual can then have between 0 and 4 phenotype group abnormalities.
    This predicate is intended to be used with the Mann Whitney U test.
    """


[docs]
    @staticmethod
    def from_query_curies(
        hpo: hpotk.MinimalOntology,
        query: typing.Iterable[typing.Union[str, hpotk.TermId]],
    ):
        """
        Create a scorer to test for the number of phenotype terms that fall into the phenotype groups.

        :param hpo: HPO as represented by :class:`~hpotk.ontology.MinimalOntology` of HPO toolkit.
        :param query: an iterable of the top-level terms, either represented as CURIEs (`str`)
          or as term IDs.
        """
        query_term_ids = set()
        for q in query:
            # First check if the query items are Term IDs or curies.
            if isinstance(q, str):
                q = hpotk.TermId.from_curie(q)
            elif isinstance(q, hpotk.TermId):
                pass
            else:
                raise ValueError(
                    f"query argument must be iterable of hpotk TermId's or strings but we found {type(q)}"
                )

            # Now check that the term IDs are HPO term IDs.
            if q not in hpo:
                raise ValueError(f"The query {q} was not found in the HPO")
            query_term_ids.add(q)

        if len(query_term_ids) == 0:
            raise ValueError("`query` must not be empty")

        # the query terms must not include a term and its ancestor
        for q in query_term_ids:
            for anc in hpo.graph.get_ancestors(q):
                if anc in query_term_ids:
                    raise ValueError(
                        f"Both {q} and its ancestor term {anc} were found in the query, "
                        + "but query terms must not include a term and its ancestor"
                    )

        return CountingPhenotypeScorer(
            hpo=hpo,
            query=query_term_ids,
        )


    def __init__(
            self,
            hpo: hpotk.MinimalOntology,
            query: typing.Iterable[hpotk.TermId],
    ):
        self._hpo = hpo
        self._query = set(query)

    @property
    def name(self) -> str:
        return "HPO Group Count"

    @property
    def description(self) -> str:
        return (
            "Assign a phenotype score that is equivalent to the count "
            "of present phenotypes that are either an exact match to "
            "the query terms or their descendants"
        )

    @property
    def variable_name(self) -> str:
        return "HPO group count"


[docs]
    def score(
            self,
            patient: Patient,
    ) -> float:
        """
        Get the count (number) of terms in the query set
        that have matching terms (exact matches or descendants) in the affected individual.
        Do not double count if the individual has two terms
        (e.g., two different descendants) of one of the query terms.
        """
        count = 0
        for q in self._query:
            for pf in patient.present_phenotypes():
                hpo_id = pf.identifier
                if hpo_id == q or any(
                        anc == q for anc in self._hpo.graph.get_ancestors(hpo_id)
                ):
                    count += 1
                    # We break the inner loop to continue the outer.
                    break

        # A sanity check - we cannot produce more counts than there are categories!
        assert 0 <= count <= len(self._query)

        return count





[docs]
class DeVriesPhenotypeScorer(PhenotypeScorer):
    """
    `DeVriesPhenotypeScorer` computes "adapted De Vries Score"
    as described in `Feenstra et al. <https://pubmed.ncbi.nlm.nih.gov/21712853>`_.

    See more in :ref:`devries-scorer` section.
    """

    def __init__(
        self,
        hpo: hpotk.MinimalOntology,
    ):
        self._hpo = hpo

        # severe and profound GDD
        self._gdd_tids = {
            'HP:0011344': 2, 'HP:0012736': 2,
            'HP:0011342': 1, 'HP:0011343': 1, 'HP:0001263': 1,
        }

        # mild, moderate, and unspecified GDD (borderline has 0.5)
        self._idd_tids = {
            'HP:0010864': 2, 'HP:0002187': 2,
            'HP:0001256': 1, 'HP:0002342': 1, 'HP:0001249': 1,
            'HP:0006889': 0.5,
        }

    @property
    def name(self) -> str:
        return "De Vries Score"

    @property
    def description(self) -> str:
        return (
            "A phenotypic severity score for individuals with intellectual disability"
        )

    @property
    def variable_name(self) -> str:
        return "De Vries score"

    def _developmental_delay_score(
        self,
        observed_term_ids: typing.Iterable[str],
    ) -> float:
        """
        Calculate the dev delay component of the score

        Args:
            observed_term_ids: terms observed in patient

        Returns: a score between 0 and 2
        """
        # Check GDD terms with higher priority than ID terms.
        # Global developmental delay
        for t in observed_term_ids:
            if t in self._gdd_tids:
                return self._gdd_tids[t]

        # Intellectual disability
        for t in observed_term_ids:
            if t in self._idd_tids:
                return self._idd_tids[t]

        return 0

    def _term_or_descendant_count(
        self,
        target_tid: str,
        observed_term_ids: typing.Iterable[str],
    ) -> int:
        """
        Args:
            target_tid: term of interest
            observed_term_ids: all terms observed in patient

        Returns:
            1 if at least one term is equal to or descending from the target_tid, otherwise 0
        """
        for term_id in observed_term_ids:
            if term_id == target_tid or self._hpo.graph.is_descendant_of(term_id, target_tid):
                return 1
        return 0

    def _postnatal_growth_score(
        self,
        observed_term_ids: typing.Iterable[str],
    ) -> int:
        """
        Calculate the postnatal growth component of the score.

        Args:
            observed_term_ids: terms observed in patient

        Returns: an `int` (between 0 and 2)
        """
        microcephaly = 'HP:0000252'
        short_stature = 'HP:0004322'
        macrocephaly = 'HP:0000256'
        tall_stature = 'HP:0000098'
        total_count = 0
        for tid in (microcephaly, short_stature, macrocephaly, tall_stature):
            total_count += self._term_or_descendant_count(tid, observed_term_ids)
        if total_count > 2:
            raise ValueError(f"Inconsistent annotations for postnatal growth score {total_count}:  {observed_term_ids}")
        return total_count

    def _facial_dysmorphism_score(
        self,
        observed_term_ids: typing.Collection[str],
    ) -> int:
        """
        This section assigns two points if two or more anomalies are identified in the following
        categories: hypertelorism, nasal anomalies and ear anomalies. Our implementation counts the total
        number of terms or descendants of the hypertelorism, Abnormal external nose morphology, and
        Abnormal pinna morphology.

        Args:
            observed_term_ids: terms observed in patient

        Returns: facial dysmorphism score (between 0 and 2)

        """
        globe_location = 'HP:0100886'  # include Hypertelorism and others
        lip = 'HP:0000159' # Abnormal lip morphology HP:0000159
        external_nose = 'HP:0010938'
        pinna_morphology = 'HP:0000377'
        facial_shape = 'HP:0001999'  # Abnormal facial shape
        midface = 'HP:0000309'  # Abnormal midface morphology
        chin = 'HP:0000306'  # Abnormality of the chin

        total_count = self._term_or_descendant_count(target_tid=globe_location, observed_term_ids=observed_term_ids)
        total_count += self._term_or_descendant_count(target_tid=lip, observed_term_ids=observed_term_ids)
        total_count += self._term_or_descendant_count(target_tid=external_nose, observed_term_ids=observed_term_ids)
        total_count += self._term_or_descendant_count(target_tid=pinna_morphology, observed_term_ids=observed_term_ids)
        total_count += self._term_or_descendant_count(target_tid=facial_shape, observed_term_ids=observed_term_ids)
        total_count += self._term_or_descendant_count(target_tid=midface, observed_term_ids=observed_term_ids)
        total_count += self._term_or_descendant_count(target_tid=chin, observed_term_ids=observed_term_ids)

        if total_count > 1:
            return 2
        else:
            return 0

    def _congenital_score(
        self,
        observed_term_ids: typing.Iterable[str],
    ) -> int:
        """
        Non-facial dysmorphism and congenital abnormalities component.
        One point is assigned for either the corresponding HPO terms or any of their descendents up to a maximum of 2.

        Args:
            observed_term_ids:  terms observed in patient

        Returns:   Non-facial dysmorphism and congenital abnormalities score (between 0 and 2)

        """
        abn_external_genitalia = 'HP:0000811'  # Abnormal external genitalia
        abnormal_hand_morphology = 'HP:0005922'
        abnormal_heart_morphology = 'HP:0001627'

        total_count = self._term_or_descendant_count(target_tid=abn_external_genitalia, observed_term_ids=observed_term_ids,)
        total_count += self._term_or_descendant_count(target_tid=abnormal_hand_morphology, observed_term_ids=observed_term_ids)
        total_count += self._term_or_descendant_count(target_tid=abnormal_heart_morphology, observed_term_ids=observed_term_ids)
        return min(2, total_count)

    def _prenatal_growth_score(
        self,
        observed_term_ids: typing.Iterable[str],
    ) -> int:
        """
        Two points are assigned if Prenatal-onset growth retardation is present.

        Args:
            observed_term_ids: list of strings with term identifiers or observed HPO terms

        Returns: score between 0 and 2

        """
        small_for_gestational_age = 'HP:0001518'
        intrauterine_growth_retardation = 'HP:0001511'
        targets = (small_for_gestational_age, intrauterine_growth_retardation)
        for tid in observed_term_ids:
            if tid in targets:
                return 2
        return 0


[docs]
    def score(self, patient: Patient) -> float:
        """
        Calculate score based on list of strings with term identifiers or observed HPO terms.

        Args:
            patient: list of strings with term identifiers or observed HPO terms

        Returns: de Vries score between 0 and 10

        """
        observed_term_ids = tuple(tid.identifier.value for tid in patient.present_phenotypes())

        delay_score = self._developmental_delay_score(observed_term_ids)
        growth_score = self._postnatal_growth_score(observed_term_ids)
        facial_score = self._facial_dysmorphism_score(observed_term_ids)
        congen_score = self._congenital_score(observed_term_ids)
        prenatal_score = self._prenatal_growth_score(observed_term_ids)

        return delay_score + growth_score + facial_score + congen_score + prenatal_score