Skip to content

Gene prioritisation analysis

AssessGenePrioritisation

Class for assessing gene prioritisation based on thresholds and scoring orders.

Source code in src/pheval/analyse/gene_prioritisation_analysis.py
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
class AssessGenePrioritisation:
    """Class for assessing gene prioritisation based on thresholds and scoring orders."""

    def __init__(
        self,
        phenopacket_path: Path,
        results_dir: Path,
        standardised_gene_results: List[RankedPhEvalGeneResult],
        threshold: float,
        score_order: str,
        proband_causative_genes: List[ProbandCausativeGene],
    ):
        """
        Initialise AssessGenePrioritisation class.

        Args:
            phenopacket_path (Path): Path to the phenopacket file
            results_dir (Path): Path to the results directory
            standardised_gene_results (List[RankedPhEvalGeneResult]): List of ranked PhEval gene results
            threshold (float): Threshold for scores
            score_order (str): Score order for results, either ascending or descending
            proband_causative_genes (List[ProbandCausativeGene]): List of proband causative genes
        """
        self.phenopacket_path = phenopacket_path
        self.results_dir = results_dir
        self.standardised_gene_results = standardised_gene_results
        self.threshold = threshold
        self.score_order = score_order
        self.proband_causative_genes = proband_causative_genes

    def _record_gene_prioritisation_match(
        self,
        gene: ProbandCausativeGene,
        result_entry: RankedPhEvalGeneResult,
        rank_stats: RankStats,
    ) -> GenePrioritisationResult:
        """
        Record the gene prioritisation rank if found within the results

        Args:
            gene (ProbandCausativeGene): Diagnosed proband gene
            result_entry (RankedPhEvalGeneResult): Ranked PhEval gene result entry
            rank_stats (RankStats): RankStats class instance

        Returns:
            GenePrioritisationResult: Recorded correct gene prioritisation rank result
        """
        rank = result_entry.rank
        rank_stats.add_rank(rank)
        return GenePrioritisationResult(self.phenopacket_path, gene.gene_symbol, rank)

    def _assess_gene_with_threshold_ascending_order(
        self,
        result_entry: RankedPhEvalGeneResult,
        gene: ProbandCausativeGene,
        rank_stats: RankStats,
    ) -> GenePrioritisationResult:
        """
        Record the gene prioritisation rank if it meets the ascending order threshold.

        This method checks if the gene prioritisation rank meets the ascending order threshold.
        If the score of the result entry is less than the threshold, it records the gene rank.

        Args:
            result_entry (RankedPhEvalGeneResult): Ranked PhEval gene result entry
            gene (ProbandCausativeGene): Diagnosed proband gene
            rank_stats (RankStats): RankStats class instance
        Returns:
            GenePrioritisationResult: Recorded correct gene prioritisation rank result
        """
        if float(self.threshold) > float(result_entry.score):
            return self._record_gene_prioritisation_match(gene, result_entry, rank_stats)

    def _assess_gene_with_threshold(
        self,
        result_entry: RankedPhEvalGeneResult,
        gene: ProbandCausativeGene,
        rank_stats: RankStats,
    ) -> GenePrioritisationResult:
        """
        Record the gene prioritisation rank if it meets the score threshold.
        This method checks if the gene prioritisation rank meets the score threshold.
        If the score of the result entry is greater than the threshold, it records the gene rank.

        Args:
            result_entry (RankedPhEvalResult): Ranked PhEval gene result entry
            gene (ProbandCausativeGene): Diagnosed proband gene
            rank_stats (RankStats): RankStats class instance

        Returns:
            GenePrioritisationResult: Recorded correct gene prioritisation rank result
        """
        if float(self.threshold) < float(result_entry.score):
            return self._record_gene_prioritisation_match(gene, result_entry, rank_stats)

    def _record_matched_gene(
        self,
        gene: ProbandCausativeGene,
        rank_stats: RankStats,
        standardised_gene_result: RankedPhEvalGeneResult,
    ) -> GenePrioritisationResult:
        """
        Return the gene rank result - handling the specification of a threshold.
        This method determines and returns the gene rank result based on the specified threshold
        and score order. If the threshold is 0.0, it records the gene rank directly.
        Otherwise, it assesses the gene with the threshold based on the score order.
        Args:
            gene (ProbandCausativeGene): Diagnosed proband gene
            rank_stats (RankStats): RankStats class instance
            standardised_gene_result (RankedPhEvalGeneResult): Ranked PhEval gene result entry
        Returns:
            GenePrioritisationResult: Recorded correct gene prioritisation rank result
        """
        if float(self.threshold) == 0.0:
            return self._record_gene_prioritisation_match(
                gene, standardised_gene_result, rank_stats
            )
        else:
            return (
                self._assess_gene_with_threshold(standardised_gene_result, gene, rank_stats)
                if self.score_order != "ascending"
                else self._assess_gene_with_threshold_ascending_order(
                    standardised_gene_result, gene, rank_stats
                )
            )

    def assess_gene_prioritisation(
        self,
        rank_stats: RankStats,
        rank_records: defaultdict,
        binary_classification_stats: BinaryClassificationStats,
    ) -> None:
        """
        Assess gene prioritisation.
        This method assesses the prioritisation of genes based on the provided criteria
        and records ranks using a PrioritisationRankRecorder.

        Args:
            rank_stats (RankStats): RankStats class instance
            rank_records (defaultdict): A defaultdict to store the correct ranked results.
            binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance.
        """
        relevant_ranks = []
        for gene in self.proband_causative_genes:
            rank_stats.total += 1
            gene_match = GenePrioritisationResult(self.phenopacket_path, gene.gene_symbol)
            for standardised_gene_result in self.standardised_gene_results:
                if (
                    gene.gene_identifier == standardised_gene_result.gene_identifier
                    or gene.gene_symbol == standardised_gene_result.gene_symbol
                ):
                    gene_match = self._record_matched_gene(
                        gene, rank_stats, standardised_gene_result
                    )
                    (
                        relevant_ranks.append(gene_match.rank)
                        if gene_match
                        else relevant_ranks.append(0)
                    )
                    break
            PrioritisationRankRecorder(
                rank_stats.total,
                self.results_dir,
                (
                    GenePrioritisationResult(self.phenopacket_path, gene.gene_symbol)
                    if gene_match is None
                    else gene_match
                ),
                rank_records,
            ).record_rank()
        rank_stats.relevant_result_ranks.append(relevant_ranks)
        binary_classification_stats.add_classification(
            pheval_results=self.standardised_gene_results, relevant_ranks=relevant_ranks
        )

__init__(phenopacket_path, results_dir, standardised_gene_results, threshold, score_order, proband_causative_genes)

Initialise AssessGenePrioritisation class.

Parameters:

Name Type Description Default
phenopacket_path Path

Path to the phenopacket file

required
results_dir Path

Path to the results directory

required
standardised_gene_results List[RankedPhEvalGeneResult]

List of ranked PhEval gene results

required
threshold float

Threshold for scores

required
score_order str

Score order for results, either ascending or descending

required
proband_causative_genes List[ProbandCausativeGene]

List of proband causative genes

required
Source code in src/pheval/analyse/gene_prioritisation_analysis.py
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
def __init__(
    self,
    phenopacket_path: Path,
    results_dir: Path,
    standardised_gene_results: List[RankedPhEvalGeneResult],
    threshold: float,
    score_order: str,
    proband_causative_genes: List[ProbandCausativeGene],
):
    """
    Initialise AssessGenePrioritisation class.

    Args:
        phenopacket_path (Path): Path to the phenopacket file
        results_dir (Path): Path to the results directory
        standardised_gene_results (List[RankedPhEvalGeneResult]): List of ranked PhEval gene results
        threshold (float): Threshold for scores
        score_order (str): Score order for results, either ascending or descending
        proband_causative_genes (List[ProbandCausativeGene]): List of proband causative genes
    """
    self.phenopacket_path = phenopacket_path
    self.results_dir = results_dir
    self.standardised_gene_results = standardised_gene_results
    self.threshold = threshold
    self.score_order = score_order
    self.proband_causative_genes = proband_causative_genes

assess_gene_prioritisation(rank_stats, rank_records, binary_classification_stats)

Assess gene prioritisation. This method assesses the prioritisation of genes based on the provided criteria and records ranks using a PrioritisationRankRecorder.

Parameters:

Name Type Description Default
rank_stats RankStats

RankStats class instance

required
rank_records defaultdict

A defaultdict to store the correct ranked results.

required
binary_classification_stats BinaryClassificationStats

BinaryClassificationStats class instance.

required
Source code in src/pheval/analyse/gene_prioritisation_analysis.py
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
def assess_gene_prioritisation(
    self,
    rank_stats: RankStats,
    rank_records: defaultdict,
    binary_classification_stats: BinaryClassificationStats,
) -> None:
    """
    Assess gene prioritisation.
    This method assesses the prioritisation of genes based on the provided criteria
    and records ranks using a PrioritisationRankRecorder.

    Args:
        rank_stats (RankStats): RankStats class instance
        rank_records (defaultdict): A defaultdict to store the correct ranked results.
        binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance.
    """
    relevant_ranks = []
    for gene in self.proband_causative_genes:
        rank_stats.total += 1
        gene_match = GenePrioritisationResult(self.phenopacket_path, gene.gene_symbol)
        for standardised_gene_result in self.standardised_gene_results:
            if (
                gene.gene_identifier == standardised_gene_result.gene_identifier
                or gene.gene_symbol == standardised_gene_result.gene_symbol
            ):
                gene_match = self._record_matched_gene(
                    gene, rank_stats, standardised_gene_result
                )
                (
                    relevant_ranks.append(gene_match.rank)
                    if gene_match
                    else relevant_ranks.append(0)
                )
                break
        PrioritisationRankRecorder(
            rank_stats.total,
            self.results_dir,
            (
                GenePrioritisationResult(self.phenopacket_path, gene.gene_symbol)
                if gene_match is None
                else gene_match
            ),
            rank_records,
        ).record_rank()
    rank_stats.relevant_result_ranks.append(relevant_ranks)
    binary_classification_stats.add_classification(
        pheval_results=self.standardised_gene_results, relevant_ranks=relevant_ranks
    )

assess_phenopacket_gene_prioritisation(phenopacket_path, score_order, results_dir_and_input, threshold, gene_rank_stats, gene_rank_comparison, gene_binary_classification_stats)

Assess gene prioritisation for a Phenopacket by comparing PhEval standardised gene results against the recorded causative genes for a proband in the Phenopacket.

Parameters:

Name Type Description Default
phenopacket_path Path

Path to the Phenopacket.

required
score_order str

The order in which scores are arranged, either ascending or descending.

required
results_dir_and_input TrackInputOutputDirectories

Input and output directories.

required
threshold float

Threshold for assessment.

required
gene_rank_stats RankStats

RankStats class instance.

required
gene_rank_comparison defaultdict

Default dictionary for gene rank comparisons.

required
gene_binary_classification_stats BinaryClassificationStats

BinaryClassificationStats class instance.

required
Source code in src/pheval/analyse/gene_prioritisation_analysis.py
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
def assess_phenopacket_gene_prioritisation(
    phenopacket_path: Path,
    score_order: str,
    results_dir_and_input: TrackInputOutputDirectories,
    threshold: float,
    gene_rank_stats: RankStats,
    gene_rank_comparison: defaultdict,
    gene_binary_classification_stats: BinaryClassificationStats,
) -> None:
    """
    Assess gene prioritisation for a Phenopacket by comparing PhEval standardised gene results
    against the recorded causative genes for a proband in the Phenopacket.

    Args:
        phenopacket_path (Path): Path to the Phenopacket.
        score_order (str): The order in which scores are arranged, either ascending or descending.
        results_dir_and_input (TrackInputOutputDirectories): Input and output directories.
        threshold (float): Threshold for assessment.
        gene_rank_stats (RankStats): RankStats class instance.
        gene_rank_comparison (defaultdict): Default dictionary for gene rank comparisons.
        gene_binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance.
    """
    standardised_gene_result = results_dir_and_input.results_dir.joinpath(
        f"pheval_gene_results/{phenopacket_path.stem}-pheval_gene_result.tsv"
    )
    pheval_gene_result = read_standardised_result(standardised_gene_result)
    proband_causative_genes = _obtain_causative_genes(phenopacket_path)
    AssessGenePrioritisation(
        phenopacket_path,
        results_dir_and_input.results_dir.joinpath("pheval_gene_results/"),
        parse_pheval_result(RankedPhEvalGeneResult, pheval_gene_result),
        threshold,
        score_order,
        proband_causative_genes,
    ).assess_gene_prioritisation(
        gene_rank_stats, gene_rank_comparison, gene_binary_classification_stats
    )

benchmark_gene_prioritisation(results_directory_and_input, score_order, threshold, gene_rank_comparison)

Benchmark a directory based on gene prioritisation results. Args: results_directory_and_input (TrackInputOutputDirectories): Input and output directories. score_order (str): The order in which scores are arranged. threshold (float): Threshold for assessment. gene_rank_comparison (defaultdict): Default dictionary for gene rank comparisons. Returns: BenchmarkRunResults: An object containing benchmarking results for gene prioritisation, including ranks and rank statistics for the benchmarked directory.

Source code in src/pheval/analyse/gene_prioritisation_analysis.py
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
def benchmark_gene_prioritisation(
    results_directory_and_input: TrackInputOutputDirectories,
    score_order: str,
    threshold: float,
    gene_rank_comparison: defaultdict,
) -> BenchmarkRunResults:
    """
    Benchmark a directory based on gene prioritisation results.
     Args:
         results_directory_and_input (TrackInputOutputDirectories): Input and output directories.
         score_order (str): The order in which scores are arranged.
         threshold (float): Threshold for assessment.
         gene_rank_comparison (defaultdict): Default dictionary for gene rank comparisons.
     Returns:
         BenchmarkRunResults: An object containing benchmarking results for gene prioritisation,
         including ranks and rank statistics for the benchmarked directory.
    """
    gene_rank_stats = RankStats()
    gene_binary_classification_stats = BinaryClassificationStats()
    for phenopacket_path in all_files(results_directory_and_input.phenopacket_dir):
        assess_phenopacket_gene_prioritisation(
            phenopacket_path,
            score_order,
            results_directory_and_input,
            threshold,
            gene_rank_stats,
            gene_rank_comparison,
            gene_binary_classification_stats,
        )
    return BenchmarkRunResults(
        results_dir=results_directory_and_input.results_dir,
        ranks=gene_rank_comparison,
        rank_stats=gene_rank_stats,
        binary_classification_stats=gene_binary_classification_stats,
    )