Variant prioritisation analysis

`AssessVariantPrioritisation`

Class for assessing variant prioritisation based on thresholds and scoring orders.

Source code in src/pheval/analyse/variant_prioritisation_analysis.py

class AssessVariantPrioritisation:
    """Class for assessing variant prioritisation based on thresholds and scoring orders."""

    def __init__(
        self,
        phenopacket_path: Path,
        results_dir: Path,
        standardised_variant_results: List[RankedPhEvalVariantResult],
        threshold: float,
        score_order: str,
        proband_causative_variants: List[GenomicVariant],
    ):
        """
        Initialise AssessVariantPrioritisation class

        Args:
            phenopacket_path (Path): Path to the phenopacket file
            results_dir (Path): Path to the results directory
            standardised_variant_results (List[RankedPhEvalVariantResult]): List of ranked PhEval variant results
            threshold (float): Threshold for scores
            score_order (str): Score order for results, either ascending or descending
            proband_causative_variants (List[GenomicVariant]): List of proband variants

        """
        self.phenopacket_path = phenopacket_path
        self.results_dir = results_dir
        self.standardised_variant_results = standardised_variant_results
        self.threshold = threshold
        self.score_order = score_order
        self.proband_causative_variants = proband_causative_variants

    def _record_variant_prioritisation_match(
        self,
        result_entry: RankedPhEvalVariantResult,
        rank_stats: RankStats,
    ) -> VariantPrioritisationResult:
        """
        Record the variant prioritisation rank if found within the results
        Args:
            result_entry (RankedPhEvalVariantResult): Ranked PhEval variant result entry
            rank_stats (RankStats): RankStats class instance
        Returns:
            VariantPrioritisationResult: Recorded correct variant prioritisation rank result
        """
        rank = result_entry.rank
        rank_stats.add_rank(rank)
        return VariantPrioritisationResult(
            self.phenopacket_path,
            GenomicVariant(
                chrom=result_entry.chromosome,
                pos=result_entry.start,
                ref=result_entry.ref,
                alt=result_entry.alt,
            ),
            rank,
        )

    def _assess_variant_with_threshold_ascending_order(
        self, result_entry: RankedPhEvalVariantResult, rank_stats: RankStats
    ) -> VariantPrioritisationResult:
        """
        Record the variant prioritisation rank if it meets the ascending order threshold.

        This method checks if the variant prioritisation rank meets the ascending order threshold.
        If the score of the result entry is less than the threshold, it records the variant rank.

        Args:
            result_entry (RankedPhEvalVariantResult): Ranked PhEval variant result entry
            rank_stats (RankStats): RankStats class instance

        Returns:
            VariantPrioritisationResult: Recorded correct variant prioritisation rank result
        """
        if float(self.threshold) > float(result_entry.score):
            return self._record_variant_prioritisation_match(result_entry, rank_stats)

    def _assess_variant_with_threshold(
        self, result_entry: RankedPhEvalVariantResult, rank_stats: RankStats
    ) -> VariantPrioritisationResult:
        """
        Record the variant prioritisation rank if it meets the score threshold.

        This method checks if the variant prioritisation rank meets the score threshold.
        If the score of the result entry is greater than the threshold, it records the variant rank.

        Args:
            result_entry (RankedPhEvalVariantResult): Ranked PhEval variant result entry
            rank_stats (RankStats): RankStats class instance

        Returns:
            VariantPrioritisationResult: Recorded correct variant prioritisation rank result
        """
        if float(self.threshold) < float(result_entry.score):
            return self._record_variant_prioritisation_match(result_entry, rank_stats)

    def _record_matched_variant(
        self, rank_stats: RankStats, standardised_variant_result: RankedPhEvalVariantResult
    ) -> VariantPrioritisationResult:
        """
        Return the variant rank result - handling the specification of a threshold.

        This method determines and returns the variant rank result based on the specified threshold
        and score order. If the threshold is 0.0, it records the variant rank directly.
        Otherwise, it assesses the variant with the threshold based on the score order.

        Args:
            rank_stats (RankStats): RankStats class instance
            standardised_variant_result (RankedPhEvalVariantResult): Ranked PhEval variant result entry

        Returns:
            VariantPrioritisationResult: Recorded correct variant prioritisation rank result
        """
        if float(self.threshold) == 0.0:
            return self._record_variant_prioritisation_match(
                standardised_variant_result, rank_stats
            )
        else:
            return (
                self._assess_variant_with_threshold(standardised_variant_result, rank_stats)
                if self.score_order != "ascending"
                else self._assess_variant_with_threshold_ascending_order(
                    standardised_variant_result, rank_stats
                )
            )

    def assess_variant_prioritisation(
        self,
        rank_stats: RankStats,
        rank_records: defaultdict,
        binary_classification_stats: BinaryClassificationStats,
    ) -> None:
        """
        Assess variant prioritisation.

        This method assesses the prioritisation of variants based on the provided criteria
        and records ranks using a PrioritisationRankRecorder.

        Args:
            rank_stats (RankStats): RankStats class instance
            rank_records (defaultdict): A defaultdict to store the correct ranked results.
            binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance.
        """
        relevant_ranks = []
        for variant in self.proband_causative_variants:
            rank_stats.total += 1
            variant_match = VariantPrioritisationResult(self.phenopacket_path, variant)
            for result in self.standardised_variant_results:
                result_variant = GenomicVariant(
                    chrom=result.chromosome,
                    pos=result.start,
                    ref=result.ref,
                    alt=result.alt,
                )
                if variant == result_variant:
                    variant_match = self._record_matched_variant(rank_stats, result)
                    (
                        relevant_ranks.append(variant_match.rank)
                        if variant_match
                        else relevant_ranks.append(0)
                    )
                    break
            PrioritisationRankRecorder(
                rank_stats.total,
                self.results_dir,
                (
                    VariantPrioritisationResult(self.phenopacket_path, variant)
                    if variant_match is None
                    else variant_match
                ),
                rank_records,
            ).record_rank()
        rank_stats.relevant_result_ranks.append(relevant_ranks)
        binary_classification_stats.add_classification(
            self.standardised_variant_results, relevant_ranks
        )

`init(phenopacket_path, results_dir, standardised_variant_results, threshold, score_order, proband_causative_variants)`

Initialise AssessVariantPrioritisation class

Parameters:

Name	Type	Description	Default
`phenopacket_path`	`Path`	Path to the phenopacket file	required
`results_dir`	`Path`	Path to the results directory	required
`standardised_variant_results`	`List[RankedPhEvalVariantResult]`	List of ranked PhEval variant results	required
`threshold`	`float`	Threshold for scores	required
`score_order`	`str`	Score order for results, either ascending or descending	required
`proband_causative_variants`	`List[GenomicVariant]`	List of proband variants	required

Source code in src/pheval/analyse/variant_prioritisation_analysis.py

def __init__(
    self,
    phenopacket_path: Path,
    results_dir: Path,
    standardised_variant_results: List[RankedPhEvalVariantResult],
    threshold: float,
    score_order: str,
    proband_causative_variants: List[GenomicVariant],
):
    """
    Initialise AssessVariantPrioritisation class

    Args:
        phenopacket_path (Path): Path to the phenopacket file
        results_dir (Path): Path to the results directory
        standardised_variant_results (List[RankedPhEvalVariantResult]): List of ranked PhEval variant results
        threshold (float): Threshold for scores
        score_order (str): Score order for results, either ascending or descending
        proband_causative_variants (List[GenomicVariant]): List of proband variants

    """
    self.phenopacket_path = phenopacket_path
    self.results_dir = results_dir
    self.standardised_variant_results = standardised_variant_results
    self.threshold = threshold
    self.score_order = score_order
    self.proband_causative_variants = proband_causative_variants

`assess_variant_prioritisation(rank_stats, rank_records, binary_classification_stats)`

Assess variant prioritisation.

This method assesses the prioritisation of variants based on the provided criteria and records ranks using a PrioritisationRankRecorder.

Parameters:

Name	Type	Description	Default
`rank_stats`	`RankStats`	RankStats class instance	required
`rank_records`	`defaultdict`	A defaultdict to store the correct ranked results.	required
`binary_classification_stats`	`BinaryClassificationStats`	BinaryClassificationStats class instance.	required

Source code in src/pheval/analyse/variant_prioritisation_analysis.py

def assess_variant_prioritisation(
    self,
    rank_stats: RankStats,
    rank_records: defaultdict,
    binary_classification_stats: BinaryClassificationStats,
) -> None:
    """
    Assess variant prioritisation.

    This method assesses the prioritisation of variants based on the provided criteria
    and records ranks using a PrioritisationRankRecorder.

    Args:
        rank_stats (RankStats): RankStats class instance
        rank_records (defaultdict): A defaultdict to store the correct ranked results.
        binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance.
    """
    relevant_ranks = []
    for variant in self.proband_causative_variants:
        rank_stats.total += 1
        variant_match = VariantPrioritisationResult(self.phenopacket_path, variant)
        for result in self.standardised_variant_results:
            result_variant = GenomicVariant(
                chrom=result.chromosome,
                pos=result.start,
                ref=result.ref,
                alt=result.alt,
            )
            if variant == result_variant:
                variant_match = self._record_matched_variant(rank_stats, result)
                (
                    relevant_ranks.append(variant_match.rank)
                    if variant_match
                    else relevant_ranks.append(0)
                )
                break
        PrioritisationRankRecorder(
            rank_stats.total,
            self.results_dir,
            (
                VariantPrioritisationResult(self.phenopacket_path, variant)
                if variant_match is None
                else variant_match
            ),
            rank_records,
        ).record_rank()
    rank_stats.relevant_result_ranks.append(relevant_ranks)
    binary_classification_stats.add_classification(
        self.standardised_variant_results, relevant_ranks
    )

`assess_phenopacket_variant_prioritisation(phenopacket_path, score_order, results_dir_and_input, threshold, variant_rank_stats, variant_rank_comparison, variant_binary_classification_stats)`

Assess variant prioritisation for a Phenopacket by comparing PhEval standardised variant results against the recorded causative variants for a proband in the Phenopacket.

Parameters:

Name	Type	Description	Default
`phenopacket_path`	`Path`	Path to the Phenopacket.	required
`score_order`	`str`	The order in which scores are arranged, either ascending or descending.	required
`results_dir_and_input`	`TrackInputOutputDirectories`	Input and output directories.	required
`threshold`	`float`	Threshold for assessment.	required
`variant_rank_stats`	`RankStats`	RankStats class instance.	required
`variant_rank_comparison`	`defaultdict`	Default dictionary for variant rank comparisons.	required
`variant_binary_classification_stats`	`BinaryClassificationStats`	BinaryClassificationStats class instance.	required

Source code in src/pheval/analyse/variant_prioritisation_analysis.py

def assess_phenopacket_variant_prioritisation(
    phenopacket_path: Path,
    score_order: str,
    results_dir_and_input: TrackInputOutputDirectories,
    threshold: float,
    variant_rank_stats: RankStats,
    variant_rank_comparison: defaultdict,
    variant_binary_classification_stats: BinaryClassificationStats,
) -> None:
    """
    Assess variant prioritisation for a Phenopacket by comparing PhEval standardised variant results
    against the recorded causative variants for a proband in the Phenopacket.

    Args:
        phenopacket_path (Path): Path to the Phenopacket.
        score_order (str): The order in which scores are arranged, either ascending or descending.
        results_dir_and_input (TrackInputOutputDirectories): Input and output directories.
        threshold (float): Threshold for assessment.
        variant_rank_stats (RankStats): RankStats class instance.
        variant_rank_comparison (defaultdict): Default dictionary for variant rank comparisons.
        variant_binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance.
    """
    proband_causative_variants = _obtain_causative_variants(phenopacket_path)
    standardised_variant_result = results_dir_and_input.results_dir.joinpath(
        f"pheval_variant_results/{phenopacket_path.stem}-pheval_variant_result.tsv"
    )
    pheval_variant_result = read_standardised_result(standardised_variant_result)
    AssessVariantPrioritisation(
        phenopacket_path,
        results_dir_and_input.results_dir.joinpath("pheval_variant_results/"),
        parse_pheval_result(RankedPhEvalVariantResult, pheval_variant_result),
        threshold,
        score_order,
        proband_causative_variants,
    ).assess_variant_prioritisation(
        variant_rank_stats, variant_rank_comparison, variant_binary_classification_stats
    )

`benchmark_variant_prioritisation(results_directory_and_input, score_order, threshold, variant_rank_comparison)`

Benchmark a directory based on variant prioritisation results.

Parameters:

Name	Type	Description	Default
`results_directory_and_input`	`TrackInputOutputDirectories`	Input and output directories.	required
`score_order`	`str`	The order in which scores are arranged.	required
`threshold`	`float`	Threshold for assessment.	required
`variant_rank_comparison`	`defaultdict`	Default dictionary for variant rank comparisons.	required

Returns:

Name	Type	Description
`BenchmarkRunResults`		An object containing benchmarking results for variant prioritisation,
		including ranks and rank statistics for the benchmarked directory.

Source code in src/pheval/analyse/variant_prioritisation_analysis.py

def benchmark_variant_prioritisation(
    results_directory_and_input: TrackInputOutputDirectories,
    score_order: str,
    threshold: float,
    variant_rank_comparison: defaultdict,
):
    """
    Benchmark a directory based on variant prioritisation results.

    Args:
        results_directory_and_input (TrackInputOutputDirectories): Input and output directories.
        score_order (str): The order in which scores are arranged.
        threshold (float): Threshold for assessment.
        variant_rank_comparison (defaultdict): Default dictionary for variant rank comparisons.

    Returns:
        BenchmarkRunResults: An object containing benchmarking results for variant prioritisation,
        including ranks and rank statistics for the benchmarked directory.
    """
    variant_rank_stats = RankStats()
    variant_binary_classification_stats = BinaryClassificationStats()
    for phenopacket_path in all_files(results_directory_and_input.phenopacket_dir):
        assess_phenopacket_variant_prioritisation(
            phenopacket_path,
            score_order,
            results_directory_and_input,
            threshold,
            variant_rank_stats,
            variant_rank_comparison,
            variant_binary_classification_stats,
        )
    return BenchmarkRunResults(
        results_dir=results_directory_and_input.results_dir,
        ranks=variant_rank_comparison,
        rank_stats=variant_rank_stats,
        binary_classification_stats=variant_binary_classification_stats,
    )

Variant prioritisation analysis

AssessVariantPrioritisation

__init__(phenopacket_path, results_dir, standardised_variant_results, threshold, score_order, proband_causative_variants)

assess_variant_prioritisation(rank_stats, rank_records, binary_classification_stats)

assess_phenopacket_variant_prioritisation(phenopacket_path, score_order, results_dir_and_input, threshold, variant_rank_stats, variant_rank_comparison, variant_binary_classification_stats)

benchmark_variant_prioritisation(results_directory_and_input, score_order, threshold, variant_rank_comparison)

`AssessVariantPrioritisation`

`init(phenopacket_path, results_dir, standardised_variant_results, threshold, score_order, proband_causative_variants)`

`assess_variant_prioritisation(rank_stats, rank_records, binary_classification_stats)`

`assess_phenopacket_variant_prioritisation(phenopacket_path, score_order, results_dir_and_input, threshold, variant_rank_stats, variant_rank_comparison, variant_binary_classification_stats)`

`benchmark_variant_prioritisation(results_directory_and_input, score_order, threshold, variant_rank_comparison)`