Disease prioritisation analysis

`AssessDiseasePrioritisation`

Class for assessing disease prioritisation based on thresholds and scoring orders.

Source code in src/pheval/analyse/disease_prioritisation_analysis.py

class AssessDiseasePrioritisation:
    """Class for assessing disease prioritisation based on thresholds and scoring orders."""

    def __init__(
        self,
        phenopacket_path: Path,
        results_dir: Path,
        standardised_disease_results: List[RankedPhEvalDiseaseResult],
        threshold: float,
        score_order: str,
        proband_diseases: List[ProbandDisease],
    ):
        """
        Initialise AssessDiseasePrioritisation class

        Args:
            phenopacket_path (Path): Path to the phenopacket file
            results_dir (Path): Path to the results directory
            standardised_disease_results (List[RankedPhEvalDiseaseResult]): List of ranked PhEval disease results
            threshold (float): Threshold for scores
            score_order (str): Score order for results, either ascending or descending
            proband_diseases (List[ProbandDisease]): List of proband diseases

        """
        self.phenopacket_path = phenopacket_path
        self.results_dir = results_dir
        self.standardised_disease_results = standardised_disease_results
        self.threshold = threshold
        self.score_order = score_order
        self.proband_diseases = proband_diseases

    def _record_disease_prioritisation_match(
        self,
        disease: ProbandDisease,
        result_entry: RankedPhEvalDiseaseResult,
        rank_stats: RankStats,
    ) -> DiseasePrioritisationResult:
        """
        Record the disease prioritisation rank if found within the results
        Args:
            disease (ProbandDisease): Diagnosed proband disease
            result_entry (RankedPhEvalDiseaseResult): Ranked PhEval disease result entry
            rank_stats (RankStats): RankStats class instance
        Returns:
            DiseasePrioritisationResult: Recorded correct disease prioritisation rank result
        """
        rank = result_entry.rank
        rank_stats.add_rank(rank)
        return DiseasePrioritisationResult(self.phenopacket_path, disease, rank)

    def _assess_disease_with_threshold_ascending_order(
        self,
        result_entry: RankedPhEvalDiseaseResult,
        disease: ProbandDisease,
        rank_stats: RankStats,
    ) -> DiseasePrioritisationResult:
        """
        Record the disease prioritisation rank if it meets the ascending order threshold.

        This method checks if the disease prioritisation rank meets the ascending order threshold.
        If the score of the result entry is less than the threshold, it records the disease rank.

        Args:
            result_entry (RankedPhEvalDiseaseResult): Ranked PhEval disease result entry
            disease (ProbandDisease): Diagnosed proband disease
            rank_stats (RankStats): RankStats class instance

        Returns:
            DiseasePrioritisationResult: Recorded correct disease prioritisation rank result
        """
        if float(self.threshold) > float(result_entry.score):
            return self._record_disease_prioritisation_match(disease, result_entry, rank_stats)

    def _assess_disease_with_threshold(
        self,
        result_entry: RankedPhEvalDiseaseResult,
        disease: ProbandDisease,
        rank_stats: RankStats,
    ) -> DiseasePrioritisationResult:
        """
        Record the disease prioritisation rank if it meets the score threshold.

        This method checks if the disease prioritisation rank meets the score threshold.
        If the score of the result entry is greater than the threshold, it records the disease rank.

        Args:
            result_entry (RankedPhEvalDiseaseResult): Ranked PhEval disease result entry
            disease (ProbandDisease): Diagnosed proband disease
            rank_stats (RankStats): RankStats class instance

        Returns:
            DiseasePrioritisationResult: Recorded correct disease prioritisation rank result
        """
        if float(self.threshold) < float(result_entry.score):
            return self._record_disease_prioritisation_match(disease, result_entry, rank_stats)

    def _record_matched_disease(
        self,
        disease: ProbandDisease,
        rank_stats: RankStats,
        standardised_disease_result: RankedPhEvalDiseaseResult,
    ) -> DiseasePrioritisationResult:
        """
        Return the disease rank result - handling the specification of a threshold.

        This method determines and returns the disease rank result based on the specified threshold
        and score order. If the threshold is 0.0, it records the disease rank directly.
        Otherwise, it assesses the disease with the threshold based on the score order.

        Args:
            disease (ProbandDisease): Diagnosed proband disease
            rank_stats (RankStats): RankStats class instance
            standardised_disease_result (RankedPhEvalDiseaseResult): Ranked PhEval disease result entry

        Returns:
            DiseasePrioritisationResult: Recorded correct disease prioritisation rank result
        """
        if float(self.threshold) == 0.0:
            return self._record_disease_prioritisation_match(
                disease, standardised_disease_result, rank_stats
            )
        else:
            return (
                self._assess_disease_with_threshold(
                    standardised_disease_result, disease, rank_stats
                )
                if self.score_order != "ascending"
                else self._assess_disease_with_threshold_ascending_order(
                    standardised_disease_result, disease, rank_stats
                )
            )

    def assess_disease_prioritisation(
        self,
        rank_stats: RankStats,
        rank_records: defaultdict,
        binary_classification_stats: BinaryClassificationStats,
    ) -> None:
        """
        Assess disease prioritisation.

        This method assesses the prioritisation of diseases based on the provided criteria
        and records ranks using a PrioritisationRankRecorder.

        Args:
            rank_stats (RankStats): RankStats class instance
            rank_records (defaultdict): A defaultdict to store the correct ranked results.
            binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance.
        """
        relevant_ranks = []
        for disease in self.proband_diseases:
            rank_stats.total += 1
            disease_match = DiseasePrioritisationResult(self.phenopacket_path, disease)
            for standardised_disease_result in self.standardised_disease_results:
                if (
                    disease.disease_identifier == standardised_disease_result.disease_identifier
                    or disease.disease_name == standardised_disease_result.disease_name
                ):
                    disease_match = self._record_matched_disease(
                        disease, rank_stats, standardised_disease_result
                    )
                    (
                        relevant_ranks.append(disease_match.rank)
                        if disease_match
                        else relevant_ranks.append(0)
                    )
                    break
            PrioritisationRankRecorder(
                rank_stats.total,
                self.results_dir,
                (
                    DiseasePrioritisationResult(self.phenopacket_path, disease)
                    if disease_match is None
                    else disease_match
                ),
                rank_records,
            ).record_rank()
        rank_stats.relevant_result_ranks.append(relevant_ranks)
        binary_classification_stats.add_classification(
            self.standardised_disease_results, relevant_ranks
        )

`init(phenopacket_path, results_dir, standardised_disease_results, threshold, score_order, proband_diseases)`

Initialise AssessDiseasePrioritisation class

Parameters:

Name	Type	Description	Default
`phenopacket_path`	`Path`	Path to the phenopacket file	required
`results_dir`	`Path`	Path to the results directory	required
`standardised_disease_results`	`List[RankedPhEvalDiseaseResult]`	List of ranked PhEval disease results	required
`threshold`	`float`	Threshold for scores	required
`score_order`	`str`	Score order for results, either ascending or descending	required
`proband_diseases`	`List[ProbandDisease]`	List of proband diseases	required

Source code in src/pheval/analyse/disease_prioritisation_analysis.py

def __init__(
    self,
    phenopacket_path: Path,
    results_dir: Path,
    standardised_disease_results: List[RankedPhEvalDiseaseResult],
    threshold: float,
    score_order: str,
    proband_diseases: List[ProbandDisease],
):
    """
    Initialise AssessDiseasePrioritisation class

    Args:
        phenopacket_path (Path): Path to the phenopacket file
        results_dir (Path): Path to the results directory
        standardised_disease_results (List[RankedPhEvalDiseaseResult]): List of ranked PhEval disease results
        threshold (float): Threshold for scores
        score_order (str): Score order for results, either ascending or descending
        proband_diseases (List[ProbandDisease]): List of proband diseases

    """
    self.phenopacket_path = phenopacket_path
    self.results_dir = results_dir
    self.standardised_disease_results = standardised_disease_results
    self.threshold = threshold
    self.score_order = score_order
    self.proband_diseases = proband_diseases

`assess_disease_prioritisation(rank_stats, rank_records, binary_classification_stats)`

Assess disease prioritisation.

This method assesses the prioritisation of diseases based on the provided criteria and records ranks using a PrioritisationRankRecorder.

Parameters:

Name	Type	Description	Default
`rank_stats`	`RankStats`	RankStats class instance	required
`rank_records`	`defaultdict`	A defaultdict to store the correct ranked results.	required
`binary_classification_stats`	`BinaryClassificationStats`	BinaryClassificationStats class instance.	required

Source code in src/pheval/analyse/disease_prioritisation_analysis.py

def assess_disease_prioritisation(
    self,
    rank_stats: RankStats,
    rank_records: defaultdict,
    binary_classification_stats: BinaryClassificationStats,
) -> None:
    """
    Assess disease prioritisation.

    This method assesses the prioritisation of diseases based on the provided criteria
    and records ranks using a PrioritisationRankRecorder.

    Args:
        rank_stats (RankStats): RankStats class instance
        rank_records (defaultdict): A defaultdict to store the correct ranked results.
        binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance.
    """
    relevant_ranks = []
    for disease in self.proband_diseases:
        rank_stats.total += 1
        disease_match = DiseasePrioritisationResult(self.phenopacket_path, disease)
        for standardised_disease_result in self.standardised_disease_results:
            if (
                disease.disease_identifier == standardised_disease_result.disease_identifier
                or disease.disease_name == standardised_disease_result.disease_name
            ):
                disease_match = self._record_matched_disease(
                    disease, rank_stats, standardised_disease_result
                )
                (
                    relevant_ranks.append(disease_match.rank)
                    if disease_match
                    else relevant_ranks.append(0)
                )
                break
        PrioritisationRankRecorder(
            rank_stats.total,
            self.results_dir,
            (
                DiseasePrioritisationResult(self.phenopacket_path, disease)
                if disease_match is None
                else disease_match
            ),
            rank_records,
        ).record_rank()
    rank_stats.relevant_result_ranks.append(relevant_ranks)
    binary_classification_stats.add_classification(
        self.standardised_disease_results, relevant_ranks
    )

`assess_phenopacket_disease_prioritisation(phenopacket_path, score_order, results_dir_and_input, threshold, disease_rank_stats, disease_rank_comparison, disease_binary_classification_stats)`

Assess disease prioritisation for a Phenopacket by comparing PhEval standardised disease results against the recorded causative diseases for a proband in the Phenopacket.

Parameters:

Name	Type	Description	Default
`phenopacket_path`	`Path`	Path to the Phenopacket.	required
`score_order`	`str`	The order in which scores are arranged, either ascending or descending.	required
`results_dir_and_input`	`TrackInputOutputDirectories`	Input and output directories.	required
`threshold`	`float`	Threshold for assessment.	required
`disease_rank_stats`	`RankStats`	RankStats class instance.	required
`disease_rank_comparison`	`defaultdict`	Default dictionary for disease rank comparisons.	required
`disease_binary_classification_stats`	`BinaryClassificationStats`	BinaryClassificationStats class instance.	required

Source code in src/pheval/analyse/disease_prioritisation_analysis.py

def assess_phenopacket_disease_prioritisation(
    phenopacket_path: Path,
    score_order: str,
    results_dir_and_input: TrackInputOutputDirectories,
    threshold: float,
    disease_rank_stats: RankStats,
    disease_rank_comparison: defaultdict,
    disease_binary_classification_stats: BinaryClassificationStats,
) -> None:
    """
    Assess disease prioritisation for a Phenopacket by comparing PhEval standardised disease results
    against the recorded causative diseases for a proband in the Phenopacket.

    Args:
        phenopacket_path (Path): Path to the Phenopacket.
        score_order (str): The order in which scores are arranged, either ascending or descending.
        results_dir_and_input (TrackInputOutputDirectories): Input and output directories.
        threshold (float): Threshold for assessment.
        disease_rank_stats (RankStats): RankStats class instance.
        disease_rank_comparison (defaultdict): Default dictionary for disease rank comparisons.
        disease_binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance.
    """
    standardised_disease_result = results_dir_and_input.results_dir.joinpath(
        f"pheval_disease_results/{phenopacket_path.stem}-pheval_disease_result.tsv"
    )
    pheval_disease_result = read_standardised_result(standardised_disease_result)
    proband_diseases = _obtain_causative_diseases(phenopacket_path)
    AssessDiseasePrioritisation(
        phenopacket_path,
        results_dir_and_input.results_dir.joinpath("pheval_disease_results/"),
        parse_pheval_result(RankedPhEvalDiseaseResult, pheval_disease_result),
        threshold,
        score_order,
        proband_diseases,
    ).assess_disease_prioritisation(
        disease_rank_stats, disease_rank_comparison, disease_binary_classification_stats
    )

`benchmark_disease_prioritisation(results_directory_and_input, score_order, threshold, disease_rank_comparison)`

Benchmark a directory based on disease prioritisation results.

Parameters:

Name	Type	Description	Default
`results_directory_and_input`	`TrackInputOutputDirectories`	Input and output directories.	required
`score_order`	`str`	The order in which scores are arranged.	required
`threshold`	`float`	Threshold for assessment.	required
`disease_rank_comparison`	`defaultdict`	Default dictionary for disease rank comparisons.	required

Returns:

Name	Type	Description
`BenchmarkRunResults`		An object containing benchmarking results for disease prioritisation,
		including ranks and rank statistics for the benchmarked directory.

Source code in src/pheval/analyse/disease_prioritisation_analysis.py

def benchmark_disease_prioritisation(
    results_directory_and_input: TrackInputOutputDirectories,
    score_order: str,
    threshold: float,
    disease_rank_comparison: defaultdict,
):
    """
    Benchmark a directory based on disease prioritisation results.

    Args:
        results_directory_and_input (TrackInputOutputDirectories): Input and output directories.
        score_order (str): The order in which scores are arranged.
        threshold (float): Threshold for assessment.
        disease_rank_comparison (defaultdict): Default dictionary for disease rank comparisons.

    Returns:
        BenchmarkRunResults: An object containing benchmarking results for disease prioritisation,
        including ranks and rank statistics for the benchmarked directory.
    """
    disease_rank_stats = RankStats()
    disease_binary_classification_stats = BinaryClassificationStats()
    for phenopacket_path in all_files(results_directory_and_input.phenopacket_dir):
        assess_phenopacket_disease_prioritisation(
            phenopacket_path,
            score_order,
            results_directory_and_input,
            threshold,
            disease_rank_stats,
            disease_rank_comparison,
            disease_binary_classification_stats,
        )
    return BenchmarkRunResults(
        results_dir=results_directory_and_input.results_dir,
        ranks=disease_rank_comparison,
        rank_stats=disease_rank_stats,
        binary_classification_stats=disease_binary_classification_stats,
    )

Disease prioritisation analysis

AssessDiseasePrioritisation

__init__(phenopacket_path, results_dir, standardised_disease_results, threshold, score_order, proband_diseases)

assess_disease_prioritisation(rank_stats, rank_records, binary_classification_stats)

assess_phenopacket_disease_prioritisation(phenopacket_path, score_order, results_dir_and_input, threshold, disease_rank_stats, disease_rank_comparison, disease_binary_classification_stats)

benchmark_disease_prioritisation(results_directory_and_input, score_order, threshold, disease_rank_comparison)

`AssessDiseasePrioritisation`

`init(phenopacket_path, results_dir, standardised_disease_results, threshold, score_order, proband_diseases)`

`assess_disease_prioritisation(rank_stats, rank_records, binary_classification_stats)`

`assess_phenopacket_disease_prioritisation(phenopacket_path, score_order, results_dir_and_input, threshold, disease_rank_stats, disease_rank_comparison, disease_binary_classification_stats)`

`benchmark_disease_prioritisation(results_directory_and_input, score_order, threshold, disease_rank_comparison)`