Skip to content

Variant prioritisation analysis

AssessVariantPrioritisation

Class for assessing variant prioritisation based on thresholds and scoring orders.

Source code in src/pheval/analyse/variant_prioritisation_analysis.py
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
class AssessVariantPrioritisation:
    """Class for assessing variant prioritisation based on thresholds and scoring orders."""

    def __init__(
        self,
        phenopacket_path: Path,
        results_dir: Path,
        standardised_variant_results: List[RankedPhEvalVariantResult],
        threshold: float,
        score_order: str,
        proband_causative_variants: List[GenomicVariant],
    ):
        """
        Initialise AssessVariantPrioritisation class

        Args:
            phenopacket_path (Path): Path to the phenopacket file
            results_dir (Path): Path to the results directory
            standardised_variant_results (List[RankedPhEvalVariantResult]): List of ranked PhEval variant results
            threshold (float): Threshold for scores
            score_order (str): Score order for results, either ascending or descending
            proband_causative_variants (List[GenomicVariant]): List of proband variants

        """
        self.phenopacket_path = phenopacket_path
        self.results_dir = results_dir
        self.standardised_variant_results = standardised_variant_results
        self.threshold = threshold
        self.score_order = score_order
        self.proband_causative_variants = proband_causative_variants

    def _record_variant_prioritisation_match(
        self,
        result_entry: RankedPhEvalVariantResult,
        rank_stats: RankStats,
    ) -> VariantPrioritisationResult:
        """
        Record the variant prioritisation rank if found within the results
        Args:
            result_entry (RankedPhEvalVariantResult): Ranked PhEval variant result entry
            rank_stats (RankStats): RankStats class instance
        Returns:
            VariantPrioritisationResult: Recorded correct variant prioritisation rank result
        """
        rank = result_entry.rank
        rank_stats.add_rank(rank)
        return VariantPrioritisationResult(
            self.phenopacket_path,
            GenomicVariant(
                chrom=result_entry.chromosome,
                pos=result_entry.start,
                ref=result_entry.ref,
                alt=result_entry.alt,
            ),
            rank,
        )

    def _assess_variant_with_threshold_ascending_order(
        self, result_entry: RankedPhEvalVariantResult, rank_stats: RankStats
    ) -> VariantPrioritisationResult:
        """
        Record the variant prioritisation rank if it meets the ascending order threshold.

        This method checks if the variant prioritisation rank meets the ascending order threshold.
        If the score of the result entry is less than the threshold, it records the variant rank.

        Args:
            result_entry (RankedPhEvalVariantResult): Ranked PhEval variant result entry
            rank_stats (RankStats): RankStats class instance

        Returns:
            VariantPrioritisationResult: Recorded correct variant prioritisation rank result
        """
        if float(self.threshold) > float(result_entry.score):
            return self._record_variant_prioritisation_match(result_entry, rank_stats)

    def _assess_variant_with_threshold(
        self, result_entry: RankedPhEvalVariantResult, rank_stats: RankStats
    ) -> VariantPrioritisationResult:
        """
        Record the variant prioritisation rank if it meets the score threshold.

        This method checks if the variant prioritisation rank meets the score threshold.
        If the score of the result entry is greater than the threshold, it records the variant rank.

        Args:
            result_entry (RankedPhEvalVariantResult): Ranked PhEval variant result entry
            rank_stats (RankStats): RankStats class instance

        Returns:
            VariantPrioritisationResult: Recorded correct variant prioritisation rank result
        """
        if float(self.threshold) < float(result_entry.score):
            return self._record_variant_prioritisation_match(result_entry, rank_stats)

    def _record_matched_variant(
        self, rank_stats: RankStats, standardised_variant_result: RankedPhEvalVariantResult
    ) -> VariantPrioritisationResult:
        """
        Return the variant rank result - handling the specification of a threshold.

        This method determines and returns the variant rank result based on the specified threshold
        and score order. If the threshold is 0.0, it records the variant rank directly.
        Otherwise, it assesses the variant with the threshold based on the score order.

        Args:
            rank_stats (RankStats): RankStats class instance
            standardised_variant_result (RankedPhEvalVariantResult): Ranked PhEval variant result entry

        Returns:
            VariantPrioritisationResult: Recorded correct variant prioritisation rank result
        """
        if float(self.threshold) == 0.0:
            return self._record_variant_prioritisation_match(
                standardised_variant_result, rank_stats
            )
        else:
            return (
                self._assess_variant_with_threshold(standardised_variant_result, rank_stats)
                if self.score_order != "ascending"
                else self._assess_variant_with_threshold_ascending_order(
                    standardised_variant_result, rank_stats
                )
            )

    def assess_variant_prioritisation(
        self,
        rank_stats: RankStats,
        rank_records: defaultdict,
        binary_classification_stats: BinaryClassificationStats,
    ) -> None:
        """
        Assess variant prioritisation.

        This method assesses the prioritisation of variants based on the provided criteria
        and records ranks using a PrioritisationRankRecorder.

        Args:
            rank_stats (RankStats): RankStats class instance
            rank_records (defaultdict): A defaultdict to store the correct ranked results.
            binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance.
        """
        relevant_ranks = []
        for variant in self.proband_causative_variants:
            rank_stats.total += 1
            variant_match = VariantPrioritisationResult(self.phenopacket_path, variant)
            for result in self.standardised_variant_results:
                result_variant = GenomicVariant(
                    chrom=result.chromosome,
                    pos=result.start,
                    ref=result.ref,
                    alt=result.alt,
                )
                if variant == result_variant:
                    variant_match = self._record_matched_variant(rank_stats, result)
                    (
                        relevant_ranks.append(variant_match.rank)
                        if variant_match
                        else relevant_ranks.append(0)
                    )
                    break
            PrioritisationRankRecorder(
                rank_stats.total,
                self.results_dir,
                (
                    VariantPrioritisationResult(self.phenopacket_path, variant)
                    if variant_match is None
                    else variant_match
                ),
                rank_records,
            ).record_rank()
        rank_stats.relevant_result_ranks.append(relevant_ranks)
        binary_classification_stats.add_classification(
            self.standardised_variant_results, relevant_ranks
        )

__init__(phenopacket_path, results_dir, standardised_variant_results, threshold, score_order, proband_causative_variants)

Initialise AssessVariantPrioritisation class

Parameters:

Name Type Description Default
phenopacket_path Path

Path to the phenopacket file

required
results_dir Path

Path to the results directory

required
standardised_variant_results List[RankedPhEvalVariantResult]

List of ranked PhEval variant results

required
threshold float

Threshold for scores

required
score_order str

Score order for results, either ascending or descending

required
proband_causative_variants List[GenomicVariant]

List of proband variants

required
Source code in src/pheval/analyse/variant_prioritisation_analysis.py
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
def __init__(
    self,
    phenopacket_path: Path,
    results_dir: Path,
    standardised_variant_results: List[RankedPhEvalVariantResult],
    threshold: float,
    score_order: str,
    proband_causative_variants: List[GenomicVariant],
):
    """
    Initialise AssessVariantPrioritisation class

    Args:
        phenopacket_path (Path): Path to the phenopacket file
        results_dir (Path): Path to the results directory
        standardised_variant_results (List[RankedPhEvalVariantResult]): List of ranked PhEval variant results
        threshold (float): Threshold for scores
        score_order (str): Score order for results, either ascending or descending
        proband_causative_variants (List[GenomicVariant]): List of proband variants

    """
    self.phenopacket_path = phenopacket_path
    self.results_dir = results_dir
    self.standardised_variant_results = standardised_variant_results
    self.threshold = threshold
    self.score_order = score_order
    self.proband_causative_variants = proband_causative_variants

assess_variant_prioritisation(rank_stats, rank_records, binary_classification_stats)

Assess variant prioritisation.

This method assesses the prioritisation of variants based on the provided criteria and records ranks using a PrioritisationRankRecorder.

Parameters:

Name Type Description Default
rank_stats RankStats

RankStats class instance

required
rank_records defaultdict

A defaultdict to store the correct ranked results.

required
binary_classification_stats BinaryClassificationStats

BinaryClassificationStats class instance.

required
Source code in src/pheval/analyse/variant_prioritisation_analysis.py
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
def assess_variant_prioritisation(
    self,
    rank_stats: RankStats,
    rank_records: defaultdict,
    binary_classification_stats: BinaryClassificationStats,
) -> None:
    """
    Assess variant prioritisation.

    This method assesses the prioritisation of variants based on the provided criteria
    and records ranks using a PrioritisationRankRecorder.

    Args:
        rank_stats (RankStats): RankStats class instance
        rank_records (defaultdict): A defaultdict to store the correct ranked results.
        binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance.
    """
    relevant_ranks = []
    for variant in self.proband_causative_variants:
        rank_stats.total += 1
        variant_match = VariantPrioritisationResult(self.phenopacket_path, variant)
        for result in self.standardised_variant_results:
            result_variant = GenomicVariant(
                chrom=result.chromosome,
                pos=result.start,
                ref=result.ref,
                alt=result.alt,
            )
            if variant == result_variant:
                variant_match = self._record_matched_variant(rank_stats, result)
                (
                    relevant_ranks.append(variant_match.rank)
                    if variant_match
                    else relevant_ranks.append(0)
                )
                break
        PrioritisationRankRecorder(
            rank_stats.total,
            self.results_dir,
            (
                VariantPrioritisationResult(self.phenopacket_path, variant)
                if variant_match is None
                else variant_match
            ),
            rank_records,
        ).record_rank()
    rank_stats.relevant_result_ranks.append(relevant_ranks)
    binary_classification_stats.add_classification(
        self.standardised_variant_results, relevant_ranks
    )

assess_phenopacket_variant_prioritisation(phenopacket_path, score_order, results_dir_and_input, threshold, variant_rank_stats, variant_rank_comparison, variant_binary_classification_stats)

Assess variant prioritisation for a Phenopacket by comparing PhEval standardised variant results against the recorded causative variants for a proband in the Phenopacket.

Parameters:

Name Type Description Default
phenopacket_path Path

Path to the Phenopacket.

required
score_order str

The order in which scores are arranged, either ascending or descending.

required
results_dir_and_input TrackInputOutputDirectories

Input and output directories.

required
threshold float

Threshold for assessment.

required
variant_rank_stats RankStats

RankStats class instance.

required
variant_rank_comparison defaultdict

Default dictionary for variant rank comparisons.

required
variant_binary_classification_stats BinaryClassificationStats

BinaryClassificationStats class instance.

required
Source code in src/pheval/analyse/variant_prioritisation_analysis.py
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
def assess_phenopacket_variant_prioritisation(
    phenopacket_path: Path,
    score_order: str,
    results_dir_and_input: TrackInputOutputDirectories,
    threshold: float,
    variant_rank_stats: RankStats,
    variant_rank_comparison: defaultdict,
    variant_binary_classification_stats: BinaryClassificationStats,
) -> None:
    """
    Assess variant prioritisation for a Phenopacket by comparing PhEval standardised variant results
    against the recorded causative variants for a proband in the Phenopacket.

    Args:
        phenopacket_path (Path): Path to the Phenopacket.
        score_order (str): The order in which scores are arranged, either ascending or descending.
        results_dir_and_input (TrackInputOutputDirectories): Input and output directories.
        threshold (float): Threshold for assessment.
        variant_rank_stats (RankStats): RankStats class instance.
        variant_rank_comparison (defaultdict): Default dictionary for variant rank comparisons.
        variant_binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance.
    """
    proband_causative_variants = _obtain_causative_variants(phenopacket_path)
    standardised_variant_result = results_dir_and_input.results_dir.joinpath(
        f"pheval_variant_results/{phenopacket_path.stem}-pheval_variant_result.tsv"
    )
    pheval_variant_result = read_standardised_result(standardised_variant_result)
    AssessVariantPrioritisation(
        phenopacket_path,
        results_dir_and_input.results_dir.joinpath("pheval_variant_results/"),
        parse_pheval_result(RankedPhEvalVariantResult, pheval_variant_result),
        threshold,
        score_order,
        proband_causative_variants,
    ).assess_variant_prioritisation(
        variant_rank_stats, variant_rank_comparison, variant_binary_classification_stats
    )

benchmark_variant_prioritisation(results_directory_and_input, score_order, threshold, variant_rank_comparison)

Benchmark a directory based on variant prioritisation results.

Parameters:

Name Type Description Default
results_directory_and_input TrackInputOutputDirectories

Input and output directories.

required
score_order str

The order in which scores are arranged.

required
threshold float

Threshold for assessment.

required
variant_rank_comparison defaultdict

Default dictionary for variant rank comparisons.

required

Returns:

Name Type Description
BenchmarkRunResults

An object containing benchmarking results for variant prioritisation,

including ranks and rank statistics for the benchmarked directory.

Source code in src/pheval/analyse/variant_prioritisation_analysis.py
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
def benchmark_variant_prioritisation(
    results_directory_and_input: TrackInputOutputDirectories,
    score_order: str,
    threshold: float,
    variant_rank_comparison: defaultdict,
):
    """
    Benchmark a directory based on variant prioritisation results.

    Args:
        results_directory_and_input (TrackInputOutputDirectories): Input and output directories.
        score_order (str): The order in which scores are arranged.
        threshold (float): Threshold for assessment.
        variant_rank_comparison (defaultdict): Default dictionary for variant rank comparisons.

    Returns:
        BenchmarkRunResults: An object containing benchmarking results for variant prioritisation,
        including ranks and rank statistics for the benchmarked directory.
    """
    variant_rank_stats = RankStats()
    variant_binary_classification_stats = BinaryClassificationStats()
    for phenopacket_path in all_files(results_directory_and_input.phenopacket_dir):
        assess_phenopacket_variant_prioritisation(
            phenopacket_path,
            score_order,
            results_directory_and_input,
            threshold,
            variant_rank_stats,
            variant_rank_comparison,
            variant_binary_classification_stats,
        )
    return BenchmarkRunResults(
        results_dir=results_directory_and_input.results_dir,
        ranks=variant_rank_comparison,
        rank_stats=variant_rank_stats,
        binary_classification_stats=variant_binary_classification_stats,
    )