Skip to content

Disease prioritisation analysis

AssessDiseasePrioritisation

Class for assessing disease prioritisation based on thresholds and scoring orders.

Source code in src/pheval/analyse/disease_prioritisation_analysis.py
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
class AssessDiseasePrioritisation:
    """Class for assessing disease prioritisation based on thresholds and scoring orders."""

    def __init__(
        self,
        phenopacket_path: Path,
        results_dir: Path,
        standardised_disease_results: List[RankedPhEvalDiseaseResult],
        threshold: float,
        score_order: str,
        proband_diseases: List[ProbandDisease],
    ):
        """
        Initialise AssessDiseasePrioritisation class

        Args:
            phenopacket_path (Path): Path to the phenopacket file
            results_dir (Path): Path to the results directory
            standardised_disease_results (List[RankedPhEvalDiseaseResult]): List of ranked PhEval disease results
            threshold (float): Threshold for scores
            score_order (str): Score order for results, either ascending or descending
            proband_diseases (List[ProbandDisease]): List of proband diseases

        """
        self.phenopacket_path = phenopacket_path
        self.results_dir = results_dir
        self.standardised_disease_results = standardised_disease_results
        self.threshold = threshold
        self.score_order = score_order
        self.proband_diseases = proband_diseases

    def _record_disease_prioritisation_match(
        self,
        disease: ProbandDisease,
        result_entry: RankedPhEvalDiseaseResult,
        rank_stats: RankStats,
    ) -> DiseasePrioritisationResult:
        """
        Record the disease prioritisation rank if found within the results
        Args:
            disease (ProbandDisease): Diagnosed proband disease
            result_entry (RankedPhEvalDiseaseResult): Ranked PhEval disease result entry
            rank_stats (RankStats): RankStats class instance
        Returns:
            DiseasePrioritisationResult: Recorded correct disease prioritisation rank result
        """
        rank = result_entry.rank
        rank_stats.add_rank(rank)
        return DiseasePrioritisationResult(self.phenopacket_path, disease, rank)

    def _assess_disease_with_threshold_ascending_order(
        self,
        result_entry: RankedPhEvalDiseaseResult,
        disease: ProbandDisease,
        rank_stats: RankStats,
    ) -> DiseasePrioritisationResult:
        """
        Record the disease prioritisation rank if it meets the ascending order threshold.

        This method checks if the disease prioritisation rank meets the ascending order threshold.
        If the score of the result entry is less than the threshold, it records the disease rank.

        Args:
            result_entry (RankedPhEvalDiseaseResult): Ranked PhEval disease result entry
            disease (ProbandDisease): Diagnosed proband disease
            rank_stats (RankStats): RankStats class instance

        Returns:
            DiseasePrioritisationResult: Recorded correct disease prioritisation rank result
        """
        if float(self.threshold) > float(result_entry.score):
            return self._record_disease_prioritisation_match(disease, result_entry, rank_stats)

    def _assess_disease_with_threshold(
        self,
        result_entry: RankedPhEvalDiseaseResult,
        disease: ProbandDisease,
        rank_stats: RankStats,
    ) -> DiseasePrioritisationResult:
        """
        Record the disease prioritisation rank if it meets the score threshold.

        This method checks if the disease prioritisation rank meets the score threshold.
        If the score of the result entry is greater than the threshold, it records the disease rank.

        Args:
            result_entry (RankedPhEvalDiseaseResult): Ranked PhEval disease result entry
            disease (ProbandDisease): Diagnosed proband disease
            rank_stats (RankStats): RankStats class instance

        Returns:
            DiseasePrioritisationResult: Recorded correct disease prioritisation rank result
        """
        if float(self.threshold) < float(result_entry.score):
            return self._record_disease_prioritisation_match(disease, result_entry, rank_stats)

    def _record_matched_disease(
        self,
        disease: ProbandDisease,
        rank_stats: RankStats,
        standardised_disease_result: RankedPhEvalDiseaseResult,
    ) -> DiseasePrioritisationResult:
        """
        Return the disease rank result - handling the specification of a threshold.

        This method determines and returns the disease rank result based on the specified threshold
        and score order. If the threshold is 0.0, it records the disease rank directly.
        Otherwise, it assesses the disease with the threshold based on the score order.

        Args:
            disease (ProbandDisease): Diagnosed proband disease
            rank_stats (RankStats): RankStats class instance
            standardised_disease_result (RankedPhEvalDiseaseResult): Ranked PhEval disease result entry

        Returns:
            DiseasePrioritisationResult: Recorded correct disease prioritisation rank result
        """
        if float(self.threshold) == 0.0:
            return self._record_disease_prioritisation_match(
                disease, standardised_disease_result, rank_stats
            )
        else:
            return (
                self._assess_disease_with_threshold(
                    standardised_disease_result, disease, rank_stats
                )
                if self.score_order != "ascending"
                else self._assess_disease_with_threshold_ascending_order(
                    standardised_disease_result, disease, rank_stats
                )
            )

    def assess_disease_prioritisation(
        self,
        rank_stats: RankStats,
        rank_records: defaultdict,
        binary_classification_stats: BinaryClassificationStats,
    ) -> None:
        """
        Assess disease prioritisation.

        This method assesses the prioritisation of diseases based on the provided criteria
        and records ranks using a PrioritisationRankRecorder.

        Args:
            rank_stats (RankStats): RankStats class instance
            rank_records (defaultdict): A defaultdict to store the correct ranked results.
            binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance.
        """
        relevant_ranks = []
        for disease in self.proband_diseases:
            rank_stats.total += 1
            disease_match = DiseasePrioritisationResult(self.phenopacket_path, disease)
            for standardised_disease_result in self.standardised_disease_results:
                if (
                    disease.disease_identifier == standardised_disease_result.disease_identifier
                    or disease.disease_name == standardised_disease_result.disease_name
                ):
                    disease_match = self._record_matched_disease(
                        disease, rank_stats, standardised_disease_result
                    )
                    (
                        relevant_ranks.append(disease_match.rank)
                        if disease_match
                        else relevant_ranks.append(0)
                    )
                    break
            PrioritisationRankRecorder(
                rank_stats.total,
                self.results_dir,
                (
                    DiseasePrioritisationResult(self.phenopacket_path, disease)
                    if disease_match is None
                    else disease_match
                ),
                rank_records,
            ).record_rank()
        rank_stats.relevant_result_ranks.append(relevant_ranks)
        binary_classification_stats.add_classification(
            self.standardised_disease_results, relevant_ranks
        )

__init__(phenopacket_path, results_dir, standardised_disease_results, threshold, score_order, proband_diseases)

Initialise AssessDiseasePrioritisation class

Parameters:

Name Type Description Default
phenopacket_path Path

Path to the phenopacket file

required
results_dir Path

Path to the results directory

required
standardised_disease_results List[RankedPhEvalDiseaseResult]

List of ranked PhEval disease results

required
threshold float

Threshold for scores

required
score_order str

Score order for results, either ascending or descending

required
proband_diseases List[ProbandDisease]

List of proband diseases

required
Source code in src/pheval/analyse/disease_prioritisation_analysis.py
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
def __init__(
    self,
    phenopacket_path: Path,
    results_dir: Path,
    standardised_disease_results: List[RankedPhEvalDiseaseResult],
    threshold: float,
    score_order: str,
    proband_diseases: List[ProbandDisease],
):
    """
    Initialise AssessDiseasePrioritisation class

    Args:
        phenopacket_path (Path): Path to the phenopacket file
        results_dir (Path): Path to the results directory
        standardised_disease_results (List[RankedPhEvalDiseaseResult]): List of ranked PhEval disease results
        threshold (float): Threshold for scores
        score_order (str): Score order for results, either ascending or descending
        proband_diseases (List[ProbandDisease]): List of proband diseases

    """
    self.phenopacket_path = phenopacket_path
    self.results_dir = results_dir
    self.standardised_disease_results = standardised_disease_results
    self.threshold = threshold
    self.score_order = score_order
    self.proband_diseases = proband_diseases

assess_disease_prioritisation(rank_stats, rank_records, binary_classification_stats)

Assess disease prioritisation.

This method assesses the prioritisation of diseases based on the provided criteria and records ranks using a PrioritisationRankRecorder.

Parameters:

Name Type Description Default
rank_stats RankStats

RankStats class instance

required
rank_records defaultdict

A defaultdict to store the correct ranked results.

required
binary_classification_stats BinaryClassificationStats

BinaryClassificationStats class instance.

required
Source code in src/pheval/analyse/disease_prioritisation_analysis.py
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
def assess_disease_prioritisation(
    self,
    rank_stats: RankStats,
    rank_records: defaultdict,
    binary_classification_stats: BinaryClassificationStats,
) -> None:
    """
    Assess disease prioritisation.

    This method assesses the prioritisation of diseases based on the provided criteria
    and records ranks using a PrioritisationRankRecorder.

    Args:
        rank_stats (RankStats): RankStats class instance
        rank_records (defaultdict): A defaultdict to store the correct ranked results.
        binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance.
    """
    relevant_ranks = []
    for disease in self.proband_diseases:
        rank_stats.total += 1
        disease_match = DiseasePrioritisationResult(self.phenopacket_path, disease)
        for standardised_disease_result in self.standardised_disease_results:
            if (
                disease.disease_identifier == standardised_disease_result.disease_identifier
                or disease.disease_name == standardised_disease_result.disease_name
            ):
                disease_match = self._record_matched_disease(
                    disease, rank_stats, standardised_disease_result
                )
                (
                    relevant_ranks.append(disease_match.rank)
                    if disease_match
                    else relevant_ranks.append(0)
                )
                break
        PrioritisationRankRecorder(
            rank_stats.total,
            self.results_dir,
            (
                DiseasePrioritisationResult(self.phenopacket_path, disease)
                if disease_match is None
                else disease_match
            ),
            rank_records,
        ).record_rank()
    rank_stats.relevant_result_ranks.append(relevant_ranks)
    binary_classification_stats.add_classification(
        self.standardised_disease_results, relevant_ranks
    )

assess_phenopacket_disease_prioritisation(phenopacket_path, score_order, results_dir_and_input, threshold, disease_rank_stats, disease_rank_comparison, disease_binary_classification_stats)

Assess disease prioritisation for a Phenopacket by comparing PhEval standardised disease results against the recorded causative diseases for a proband in the Phenopacket.

Parameters:

Name Type Description Default
phenopacket_path Path

Path to the Phenopacket.

required
score_order str

The order in which scores are arranged, either ascending or descending.

required
results_dir_and_input TrackInputOutputDirectories

Input and output directories.

required
threshold float

Threshold for assessment.

required
disease_rank_stats RankStats

RankStats class instance.

required
disease_rank_comparison defaultdict

Default dictionary for disease rank comparisons.

required
disease_binary_classification_stats BinaryClassificationStats

BinaryClassificationStats class instance.

required
Source code in src/pheval/analyse/disease_prioritisation_analysis.py
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
def assess_phenopacket_disease_prioritisation(
    phenopacket_path: Path,
    score_order: str,
    results_dir_and_input: TrackInputOutputDirectories,
    threshold: float,
    disease_rank_stats: RankStats,
    disease_rank_comparison: defaultdict,
    disease_binary_classification_stats: BinaryClassificationStats,
) -> None:
    """
    Assess disease prioritisation for a Phenopacket by comparing PhEval standardised disease results
    against the recorded causative diseases for a proband in the Phenopacket.

    Args:
        phenopacket_path (Path): Path to the Phenopacket.
        score_order (str): The order in which scores are arranged, either ascending or descending.
        results_dir_and_input (TrackInputOutputDirectories): Input and output directories.
        threshold (float): Threshold for assessment.
        disease_rank_stats (RankStats): RankStats class instance.
        disease_rank_comparison (defaultdict): Default dictionary for disease rank comparisons.
        disease_binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance.
    """
    standardised_disease_result = results_dir_and_input.results_dir.joinpath(
        f"pheval_disease_results/{phenopacket_path.stem}-pheval_disease_result.tsv"
    )
    pheval_disease_result = read_standardised_result(standardised_disease_result)
    proband_diseases = _obtain_causative_diseases(phenopacket_path)
    AssessDiseasePrioritisation(
        phenopacket_path,
        results_dir_and_input.results_dir.joinpath("pheval_disease_results/"),
        parse_pheval_result(RankedPhEvalDiseaseResult, pheval_disease_result),
        threshold,
        score_order,
        proband_diseases,
    ).assess_disease_prioritisation(
        disease_rank_stats, disease_rank_comparison, disease_binary_classification_stats
    )

benchmark_disease_prioritisation(results_directory_and_input, score_order, threshold, disease_rank_comparison)

Benchmark a directory based on disease prioritisation results.

Parameters:

Name Type Description Default
results_directory_and_input TrackInputOutputDirectories

Input and output directories.

required
score_order str

The order in which scores are arranged.

required
threshold float

Threshold for assessment.

required
disease_rank_comparison defaultdict

Default dictionary for disease rank comparisons.

required

Returns:

Name Type Description
BenchmarkRunResults

An object containing benchmarking results for disease prioritisation,

including ranks and rank statistics for the benchmarked directory.

Source code in src/pheval/analyse/disease_prioritisation_analysis.py
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
def benchmark_disease_prioritisation(
    results_directory_and_input: TrackInputOutputDirectories,
    score_order: str,
    threshold: float,
    disease_rank_comparison: defaultdict,
):
    """
    Benchmark a directory based on disease prioritisation results.

    Args:
        results_directory_and_input (TrackInputOutputDirectories): Input and output directories.
        score_order (str): The order in which scores are arranged.
        threshold (float): Threshold for assessment.
        disease_rank_comparison (defaultdict): Default dictionary for disease rank comparisons.

    Returns:
        BenchmarkRunResults: An object containing benchmarking results for disease prioritisation,
        including ranks and rank statistics for the benchmarked directory.
    """
    disease_rank_stats = RankStats()
    disease_binary_classification_stats = BinaryClassificationStats()
    for phenopacket_path in all_files(results_directory_and_input.phenopacket_dir):
        assess_phenopacket_disease_prioritisation(
            phenopacket_path,
            score_order,
            results_directory_and_input,
            threshold,
            disease_rank_stats,
            disease_rank_comparison,
            disease_binary_classification_stats,
        )
    return BenchmarkRunResults(
        results_dir=results_directory_and_input.results_dir,
        ranks=disease_rank_comparison,
        rank_stats=disease_rank_stats,
        binary_classification_stats=disease_binary_classification_stats,
    )