Skip to content

Rank stats

RankStats dataclass

Store statistics related to ranking.

Attributes:

Name Type Description
top int

Count of top-ranked matches.

top3 int

Count of matches within the top 3 ranks.

top5 int

Count of matches within the top 5 ranks.

top10 int

Count of matches within the top 10 ranks.

found int

Count of found matches.

total int

Total count of matches.

reciprocal_ranks List[float]

List of reciprocal ranks.

relevant_ranks List[List[int]]

Nested list of ranks for the known entities for all cases in a run.

mrr float

Mean Reciprocal Rank (MRR). Defaults to None.

Source code in src/pheval/analyse/rank_stats.py
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
@dataclass
class RankStats:
    """Store statistics related to ranking.

    Attributes:
        top (int): Count of top-ranked matches.
        top3 (int): Count of matches within the top 3 ranks.
        top5 (int): Count of matches within the top 5 ranks.
        top10 (int): Count of matches within the top 10 ranks.
        found (int): Count of found matches.
        total (int): Total count of matches.
        reciprocal_ranks (List[float]): List of reciprocal ranks.
        relevant_ranks List[List[int]]: Nested list of ranks for the known entities for all cases in a run.
        mrr (float): Mean Reciprocal Rank (MRR). Defaults to None.
    """

    top: int = 0
    top3: int = 0
    top5: int = 0
    top10: int = 0
    found: int = 0
    total: int = 0
    reciprocal_ranks: List = field(default_factory=list)
    relevant_result_ranks: List[List[int]] = field(default_factory=list)
    mrr: float = None

    def add_rank(self, rank: int) -> None:
        """
        Add rank for matched result.

        Args:
            rank (int): The rank value to be added.

        Notes:
            This method updates the internal attributes of the RankStats object based on the provided rank value.
            It calculates various statistics such as the count of top ranks (1, 3, 5, and 10),
            the total number of ranks found,and the reciprocal rank.
            This function modifies the object's state by updating the internal attributes.
        """
        self.reciprocal_ranks.append(1 / rank)
        self.found += 1
        if rank == 1:
            self.top += 1
        if rank != "" and rank <= 3:
            self.top3 += 1
        if rank != "" and rank <= 5:
            self.top5 += 1
        if rank != "" and rank <= 10:
            self.top10 += 1

    def percentage_rank(self, value: int) -> float:
        """
        Calculate the percentage rank.

        Args:
            value (int): The value for which the percentage rank needs to be calculated.

        Returns:
            float: The calculated percentage rank based on the provided value and the total count.
        """
        return 100 * value / self.total

    def percentage_top(self) -> float:
        """
        Calculate the percentage of top matches.

        Returns:
            float: The percentage of top matches compared to the total count.
        """
        return self.percentage_rank(self.top)

    def percentage_top3(self) -> float:
        """
        Calculate the percentage of matches within the top 3.

        Returns:
            float: The percentage of matches within the top 3 compared to the total count.
        """
        return self.percentage_rank(self.top3)

    def percentage_top5(self) -> float:
        """
        Calculate the percentage of matches within the top 5.

        Returns:
            float: The percentage of matches within the top 5 compared to the total count.
        """
        return self.percentage_rank(self.top5)

    def percentage_top10(self) -> float:
        """
        Calculate the percentage of matches within the top 10.

        Returns:
            float: The percentage of matches within the top 10 compared to the total count.
        """
        return self.percentage_rank(self.top10)

    def percentage_found(self) -> float:
        """
        Calculate the percentage of matches found.

        Returns:
            float: The percentage of matches found compared to the total count.
        """
        return self.percentage_rank(self.found)

    @staticmethod
    def percentage_difference(percentage_value_1: float, percentage_value_2: float) -> float:
        """
        Calculate the percentage difference between two percentage values.

        Args:
            percentage_value_1 (float): The first percentage value.
            percentage_value_2 (float): The second percentage value.

        Returns:
            float: The difference between the two percentage values.
        """
        return percentage_value_1 - percentage_value_2

    def mean_reciprocal_rank(self) -> float:
        """
        Calculate the Mean Reciprocal Rank (MRR) for the stored ranks.

        The Mean Reciprocal Rank is computed as the mean of the reciprocal ranks
        for the found cases.

        If the total number of cases differs from the number of found cases,
        this method extends the reciprocal ranks list with zeroes for missing cases.

        Returns:
            float: The calculated Mean Reciprocal Rank.
        """
        if len(self.reciprocal_ranks) != self.total:
            missing_cases = self.total - self.found
            self.reciprocal_ranks.extend([0] * missing_cases)
            return mean(self.reciprocal_ranks)
        return mean(self.reciprocal_ranks)

    def return_mean_reciprocal_rank(self) -> float:
        """
        Retrieve or calculate the Mean Reciprocal Rank (MRR).

        If a pre-calculated MRR value exists (stored in the 'mrr' attribute), this method returns that value.
        Otherwise, it computes the Mean Reciprocal Rank using the 'mean_reciprocal_rank' method.

        Returns:
            float: The Mean Reciprocal Rank value.
        """
        if self.mrr is not None:
            return self.mrr
        else:
            return self.mean_reciprocal_rank()

    def precision_at_k(self, k: int) -> float:
        """
        Calculate the precision at k.
        Precision at k is the ratio of relevant items in the top-k predictions to the total number of predictions.
        It measures the accuracy of the top-k predictions made by a model.

        Args:
            k (int): The number of top predictions to consider.

        Returns:
            float: The precision at k, ranging from 0.0 to 1.0.
            A higher precision indicates a better performance in identifying relevant items in the top-k predictions.
        """
        k_attr = getattr(self, f"top{k}") if k > 1 else self.top
        return k_attr / (self.total * k)

    @staticmethod
    def _average_precision_at_k(
        number_of_relevant_entities_at_k: int, precision_at_k: float
    ) -> float:
        """
        Calculate the Average Precision at k.

        Average Precision at k (AP@k) is a metric used to evaluate the precision of a ranked retrieval system.
        It measures the precision at each relevant position up to k and takes the average.

        Args:
            number_of_relevant_entities_at_k (int): The count of relevant entities in the top-k predictions.
            precision_at_k (float): The precision at k - the sum of the precision values at each relevant position.

        Returns:
            float: The Average Precision at k, ranging from 0.0 to 1.0.
                   A higher value indicates better precision in the top-k predictions.
        """
        return (
            (1 / number_of_relevant_entities_at_k) * precision_at_k
            if number_of_relevant_entities_at_k > 0
            else 0.0
        )

    def mean_average_precision_at_k(self, k: int) -> float:
        """
        Calculate the Mean Average Precision at k.

        Mean Average Precision at k (MAP@k) is a performance metric for ranked data.
        It calculates the average precision at k for each result rank and then takes the mean across all queries.

        Args:
            k (int): The number of top predictions to consider for precision calculation.

        Returns:
            float: The Mean Average Precision at k, ranging from 0.0 to 1.0.
                   A higher value indicates better performance in ranking relevant entities higher in the predictions.
        """
        cumulative_average_precision_scores = 0
        for result_ranks in self.relevant_result_ranks:
            precision_at_k, number_of_relevant_entities_at_k = 0, 0
            for rank in result_ranks:
                if 0 < rank <= k:
                    number_of_relevant_entities_at_k += 1
                    precision_at_k += number_of_relevant_entities_at_k / rank
                cumulative_average_precision_scores += self._average_precision_at_k(
                    number_of_relevant_entities_at_k, precision_at_k
                )
        return (1 / self.total) * cumulative_average_precision_scores

    def f_beta_score_at_k(self, percentage_at_k: float, k: int) -> float:
        """
        Calculate the F-beta score at k.

        The F-beta score is a metric that combines precision and recall,
        with beta controlling the emphasis on precision.
        The Beta value is set to the value of 1 to allow for equal weighting for both precision and recall.
        This method computes the F-beta score at a specific percentage threshold within the top-k predictions.

        Args:
            percentage_at_k (float): The percentage of true positive predictions within the top-k.
            k (int): The number of top predictions to consider.

        Returns:
            float: The F-beta score at k, ranging from 0.0 to 1.0.
                   A higher score indicates better trade-off between precision and recall.
        """
        precision = self.precision_at_k(k)
        recall_at_k = percentage_at_k / 100
        return (
            (2 * precision * recall_at_k) / (precision + recall_at_k)
            if (precision + recall_at_k) > 0
            else 0
        )

    def mean_normalised_discounted_cumulative_gain(self, k: int) -> float:
        """
        Calculate the mean Normalised Discounted Cumulative Gain (NDCG) for a given rank cutoff.

        NDCG measures the effectiveness of a ranking by considering both the relevance and the order of items.

        Args:
            k (int): The rank cutoff for calculating NDCG.

        Returns:
            float: The mean NDCG score across all query results.
        """
        ndcg_scores = []
        for result_ranks in self.relevant_result_ranks:
            result_ranks = [rank for rank in result_ranks if rank <= k]
            result_ranks = [3 if i in result_ranks else 0 for i in range(k)]
            ideal_ranking = sorted(result_ranks, reverse=True)
            ndcg_scores.append(ndcg_score(np.asarray([ideal_ranking]), np.asarray([result_ranks])))
        return np.mean(ndcg_scores)

add_rank(rank)

Add rank for matched result.

Parameters:

Name Type Description Default
rank int

The rank value to be added.

required
Notes

This method updates the internal attributes of the RankStats object based on the provided rank value. It calculates various statistics such as the count of top ranks (1, 3, 5, and 10), the total number of ranks found,and the reciprocal rank. This function modifies the object's state by updating the internal attributes.

Source code in src/pheval/analyse/rank_stats.py
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
def add_rank(self, rank: int) -> None:
    """
    Add rank for matched result.

    Args:
        rank (int): The rank value to be added.

    Notes:
        This method updates the internal attributes of the RankStats object based on the provided rank value.
        It calculates various statistics such as the count of top ranks (1, 3, 5, and 10),
        the total number of ranks found,and the reciprocal rank.
        This function modifies the object's state by updating the internal attributes.
    """
    self.reciprocal_ranks.append(1 / rank)
    self.found += 1
    if rank == 1:
        self.top += 1
    if rank != "" and rank <= 3:
        self.top3 += 1
    if rank != "" and rank <= 5:
        self.top5 += 1
    if rank != "" and rank <= 10:
        self.top10 += 1

f_beta_score_at_k(percentage_at_k, k)

Calculate the F-beta score at k.

The F-beta score is a metric that combines precision and recall, with beta controlling the emphasis on precision. The Beta value is set to the value of 1 to allow for equal weighting for both precision and recall. This method computes the F-beta score at a specific percentage threshold within the top-k predictions.

Parameters:

Name Type Description Default
percentage_at_k float

The percentage of true positive predictions within the top-k.

required
k int

The number of top predictions to consider.

required

Returns:

Name Type Description
float float

The F-beta score at k, ranging from 0.0 to 1.0. A higher score indicates better trade-off between precision and recall.

Source code in src/pheval/analyse/rank_stats.py
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
def f_beta_score_at_k(self, percentage_at_k: float, k: int) -> float:
    """
    Calculate the F-beta score at k.

    The F-beta score is a metric that combines precision and recall,
    with beta controlling the emphasis on precision.
    The Beta value is set to the value of 1 to allow for equal weighting for both precision and recall.
    This method computes the F-beta score at a specific percentage threshold within the top-k predictions.

    Args:
        percentage_at_k (float): The percentage of true positive predictions within the top-k.
        k (int): The number of top predictions to consider.

    Returns:
        float: The F-beta score at k, ranging from 0.0 to 1.0.
               A higher score indicates better trade-off between precision and recall.
    """
    precision = self.precision_at_k(k)
    recall_at_k = percentage_at_k / 100
    return (
        (2 * precision * recall_at_k) / (precision + recall_at_k)
        if (precision + recall_at_k) > 0
        else 0
    )

mean_average_precision_at_k(k)

Calculate the Mean Average Precision at k.

Mean Average Precision at k (MAP@k) is a performance metric for ranked data. It calculates the average precision at k for each result rank and then takes the mean across all queries.

Parameters:

Name Type Description Default
k int

The number of top predictions to consider for precision calculation.

required

Returns:

Name Type Description
float float

The Mean Average Precision at k, ranging from 0.0 to 1.0. A higher value indicates better performance in ranking relevant entities higher in the predictions.

Source code in src/pheval/analyse/rank_stats.py
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
def mean_average_precision_at_k(self, k: int) -> float:
    """
    Calculate the Mean Average Precision at k.

    Mean Average Precision at k (MAP@k) is a performance metric for ranked data.
    It calculates the average precision at k for each result rank and then takes the mean across all queries.

    Args:
        k (int): The number of top predictions to consider for precision calculation.

    Returns:
        float: The Mean Average Precision at k, ranging from 0.0 to 1.0.
               A higher value indicates better performance in ranking relevant entities higher in the predictions.
    """
    cumulative_average_precision_scores = 0
    for result_ranks in self.relevant_result_ranks:
        precision_at_k, number_of_relevant_entities_at_k = 0, 0
        for rank in result_ranks:
            if 0 < rank <= k:
                number_of_relevant_entities_at_k += 1
                precision_at_k += number_of_relevant_entities_at_k / rank
            cumulative_average_precision_scores += self._average_precision_at_k(
                number_of_relevant_entities_at_k, precision_at_k
            )
    return (1 / self.total) * cumulative_average_precision_scores

mean_normalised_discounted_cumulative_gain(k)

Calculate the mean Normalised Discounted Cumulative Gain (NDCG) for a given rank cutoff.

NDCG measures the effectiveness of a ranking by considering both the relevance and the order of items.

Parameters:

Name Type Description Default
k int

The rank cutoff for calculating NDCG.

required

Returns:

Name Type Description
float float

The mean NDCG score across all query results.

Source code in src/pheval/analyse/rank_stats.py
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
def mean_normalised_discounted_cumulative_gain(self, k: int) -> float:
    """
    Calculate the mean Normalised Discounted Cumulative Gain (NDCG) for a given rank cutoff.

    NDCG measures the effectiveness of a ranking by considering both the relevance and the order of items.

    Args:
        k (int): The rank cutoff for calculating NDCG.

    Returns:
        float: The mean NDCG score across all query results.
    """
    ndcg_scores = []
    for result_ranks in self.relevant_result_ranks:
        result_ranks = [rank for rank in result_ranks if rank <= k]
        result_ranks = [3 if i in result_ranks else 0 for i in range(k)]
        ideal_ranking = sorted(result_ranks, reverse=True)
        ndcg_scores.append(ndcg_score(np.asarray([ideal_ranking]), np.asarray([result_ranks])))
    return np.mean(ndcg_scores)

mean_reciprocal_rank()

Calculate the Mean Reciprocal Rank (MRR) for the stored ranks.

The Mean Reciprocal Rank is computed as the mean of the reciprocal ranks for the found cases.

If the total number of cases differs from the number of found cases, this method extends the reciprocal ranks list with zeroes for missing cases.

Returns:

Name Type Description
float float

The calculated Mean Reciprocal Rank.

Source code in src/pheval/analyse/rank_stats.py
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
def mean_reciprocal_rank(self) -> float:
    """
    Calculate the Mean Reciprocal Rank (MRR) for the stored ranks.

    The Mean Reciprocal Rank is computed as the mean of the reciprocal ranks
    for the found cases.

    If the total number of cases differs from the number of found cases,
    this method extends the reciprocal ranks list with zeroes for missing cases.

    Returns:
        float: The calculated Mean Reciprocal Rank.
    """
    if len(self.reciprocal_ranks) != self.total:
        missing_cases = self.total - self.found
        self.reciprocal_ranks.extend([0] * missing_cases)
        return mean(self.reciprocal_ranks)
    return mean(self.reciprocal_ranks)

percentage_difference(percentage_value_1, percentage_value_2) staticmethod

Calculate the percentage difference between two percentage values.

Parameters:

Name Type Description Default
percentage_value_1 float

The first percentage value.

required
percentage_value_2 float

The second percentage value.

required

Returns:

Name Type Description
float float

The difference between the two percentage values.

Source code in src/pheval/analyse/rank_stats.py
120
121
122
123
124
125
126
127
128
129
130
131
132
@staticmethod
def percentage_difference(percentage_value_1: float, percentage_value_2: float) -> float:
    """
    Calculate the percentage difference between two percentage values.

    Args:
        percentage_value_1 (float): The first percentage value.
        percentage_value_2 (float): The second percentage value.

    Returns:
        float: The difference between the two percentage values.
    """
    return percentage_value_1 - percentage_value_2

percentage_found()

Calculate the percentage of matches found.

Returns:

Name Type Description
float float

The percentage of matches found compared to the total count.

Source code in src/pheval/analyse/rank_stats.py
111
112
113
114
115
116
117
118
def percentage_found(self) -> float:
    """
    Calculate the percentage of matches found.

    Returns:
        float: The percentage of matches found compared to the total count.
    """
    return self.percentage_rank(self.found)

percentage_rank(value)

Calculate the percentage rank.

Parameters:

Name Type Description Default
value int

The value for which the percentage rank needs to be calculated.

required

Returns:

Name Type Description
float float

The calculated percentage rank based on the provided value and the total count.

Source code in src/pheval/analyse/rank_stats.py
63
64
65
66
67
68
69
70
71
72
73
def percentage_rank(self, value: int) -> float:
    """
    Calculate the percentage rank.

    Args:
        value (int): The value for which the percentage rank needs to be calculated.

    Returns:
        float: The calculated percentage rank based on the provided value and the total count.
    """
    return 100 * value / self.total

percentage_top()

Calculate the percentage of top matches.

Returns:

Name Type Description
float float

The percentage of top matches compared to the total count.

Source code in src/pheval/analyse/rank_stats.py
75
76
77
78
79
80
81
82
def percentage_top(self) -> float:
    """
    Calculate the percentage of top matches.

    Returns:
        float: The percentage of top matches compared to the total count.
    """
    return self.percentage_rank(self.top)

percentage_top10()

Calculate the percentage of matches within the top 10.

Returns:

Name Type Description
float float

The percentage of matches within the top 10 compared to the total count.

Source code in src/pheval/analyse/rank_stats.py
102
103
104
105
106
107
108
109
def percentage_top10(self) -> float:
    """
    Calculate the percentage of matches within the top 10.

    Returns:
        float: The percentage of matches within the top 10 compared to the total count.
    """
    return self.percentage_rank(self.top10)

percentage_top3()

Calculate the percentage of matches within the top 3.

Returns:

Name Type Description
float float

The percentage of matches within the top 3 compared to the total count.

Source code in src/pheval/analyse/rank_stats.py
84
85
86
87
88
89
90
91
def percentage_top3(self) -> float:
    """
    Calculate the percentage of matches within the top 3.

    Returns:
        float: The percentage of matches within the top 3 compared to the total count.
    """
    return self.percentage_rank(self.top3)

percentage_top5()

Calculate the percentage of matches within the top 5.

Returns:

Name Type Description
float float

The percentage of matches within the top 5 compared to the total count.

Source code in src/pheval/analyse/rank_stats.py
 93
 94
 95
 96
 97
 98
 99
100
def percentage_top5(self) -> float:
    """
    Calculate the percentage of matches within the top 5.

    Returns:
        float: The percentage of matches within the top 5 compared to the total count.
    """
    return self.percentage_rank(self.top5)

precision_at_k(k)

Calculate the precision at k. Precision at k is the ratio of relevant items in the top-k predictions to the total number of predictions. It measures the accuracy of the top-k predictions made by a model.

Parameters:

Name Type Description Default
k int

The number of top predictions to consider.

required

Returns:

Name Type Description
float float

The precision at k, ranging from 0.0 to 1.0.

float

A higher precision indicates a better performance in identifying relevant items in the top-k predictions.

Source code in src/pheval/analyse/rank_stats.py
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
def precision_at_k(self, k: int) -> float:
    """
    Calculate the precision at k.
    Precision at k is the ratio of relevant items in the top-k predictions to the total number of predictions.
    It measures the accuracy of the top-k predictions made by a model.

    Args:
        k (int): The number of top predictions to consider.

    Returns:
        float: The precision at k, ranging from 0.0 to 1.0.
        A higher precision indicates a better performance in identifying relevant items in the top-k predictions.
    """
    k_attr = getattr(self, f"top{k}") if k > 1 else self.top
    return k_attr / (self.total * k)

return_mean_reciprocal_rank()

Retrieve or calculate the Mean Reciprocal Rank (MRR).

If a pre-calculated MRR value exists (stored in the 'mrr' attribute), this method returns that value. Otherwise, it computes the Mean Reciprocal Rank using the 'mean_reciprocal_rank' method.

Returns:

Name Type Description
float float

The Mean Reciprocal Rank value.

Source code in src/pheval/analyse/rank_stats.py
153
154
155
156
157
158
159
160
161
162
163
164
165
166
def return_mean_reciprocal_rank(self) -> float:
    """
    Retrieve or calculate the Mean Reciprocal Rank (MRR).

    If a pre-calculated MRR value exists (stored in the 'mrr' attribute), this method returns that value.
    Otherwise, it computes the Mean Reciprocal Rank using the 'mean_reciprocal_rank' method.

    Returns:
        float: The Mean Reciprocal Rank value.
    """
    if self.mrr is not None:
        return self.mrr
    else:
        return self.mean_reciprocal_rank()

RankStatsWriter

Class for writing the rank stats to a file.

Source code in src/pheval/analyse/rank_stats.py
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
class RankStatsWriter:
    """Class for writing the rank stats to a file."""

    def __init__(self, file: Path):
        """
        Initialise the RankStatsWriter class
        Args:
            file (Path): Path to the file where rank stats will be written
        """
        self.file = open(file, "w")
        self.writer = csv.writer(self.file, delimiter="\t")
        self.writer.writerow(
            [
                "results_directory_path",
                "top",
                "top3",
                "top5",
                "top10",
                "found",
                "total",
                "mean_reciprocal_rank",
                "percentage_top",
                "percentage_top3",
                "percentage_top5",
                "percentage_top10",
                "percentage_found",
                "precision@1",
                "precision@3",
                "precision@5",
                "precision@10",
                "MAP@1",
                "MAP@3",
                "MAP@5",
                "MAP@10",
                "f_beta_score@1",
                "f_beta_score@3",
                "f_beta_score@5",
                "f_beta_score@10",
                "NDCG@3",
                "NDCG@5",
                "NDCG@10",
                "true_positives",
                "false_positives",
                "true_negatives",
                "false_negatives",
                "sensitivity",
                "specificity",
                "precision",
                "negative_predictive_value",
                "false_positive_rate",
                "false_discovery_rate",
                "false_negative_rate",
                "accuracy",
                "f1_score",
                "matthews_correlation_coefficient",
            ]
        )

    def write_row(
        self,
        directory: Path,
        rank_stats: RankStats,
        binary_classification: BinaryClassificationStats,
    ) -> None:
        """
        Write summary rank statistics row for a run to the file.

        Args:
            directory (Path): Path to the results directory corresponding to the run
            rank_stats (RankStats): RankStats instance containing rank statistics corresponding to the run

        Raises:
            IOError: If there is an error writing to the file.
        """
        try:
            self.writer.writerow(
                [
                    directory,
                    rank_stats.top,
                    rank_stats.top3,
                    rank_stats.top5,
                    rank_stats.top10,
                    rank_stats.found,
                    rank_stats.total,
                    rank_stats.mean_reciprocal_rank(),
                    rank_stats.percentage_top(),
                    rank_stats.percentage_top3(),
                    rank_stats.percentage_top5(),
                    rank_stats.percentage_top10(),
                    rank_stats.percentage_found(),
                    rank_stats.precision_at_k(1),
                    rank_stats.precision_at_k(3),
                    rank_stats.precision_at_k(5),
                    rank_stats.precision_at_k(10),
                    rank_stats.mean_average_precision_at_k(1),
                    rank_stats.mean_average_precision_at_k(3),
                    rank_stats.mean_average_precision_at_k(5),
                    rank_stats.mean_average_precision_at_k(10),
                    rank_stats.f_beta_score_at_k(rank_stats.percentage_top(), 1),
                    rank_stats.f_beta_score_at_k(rank_stats.percentage_top3(), 3),
                    rank_stats.f_beta_score_at_k(rank_stats.percentage_top5(), 5),
                    rank_stats.f_beta_score_at_k(rank_stats.percentage_top10(), 10),
                    rank_stats.mean_normalised_discounted_cumulative_gain(3),
                    rank_stats.mean_normalised_discounted_cumulative_gain(5),
                    rank_stats.mean_normalised_discounted_cumulative_gain(10),
                    binary_classification.true_positives,
                    binary_classification.false_positives,
                    binary_classification.true_negatives,
                    binary_classification.false_negatives,
                    binary_classification.sensitivity(),
                    binary_classification.specificity(),
                    binary_classification.precision(),
                    binary_classification.negative_predictive_value(),
                    binary_classification.false_positive_rate(),
                    binary_classification.false_discovery_rate(),
                    binary_classification.false_negative_rate(),
                    binary_classification.accuracy(),
                    binary_classification.f1_score(),
                    binary_classification.matthews_correlation_coefficient(),
                ]
            )
        except IOError:
            print("Error writing ", self.file)

    def close(self) -> None:
        """
        Close the file used for writing rank statistics.

        Raises:
            IOError: If there's an error while closing the file.
        """
        try:
            self.file.close()
        except IOError:
            print("Error closing ", self.file)

__init__(file)

Initialise the RankStatsWriter class

Parameters:

Name Type Description Default
file Path

Path to the file where rank stats will be written

required
Source code in src/pheval/analyse/rank_stats.py
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
def __init__(self, file: Path):
    """
    Initialise the RankStatsWriter class
    Args:
        file (Path): Path to the file where rank stats will be written
    """
    self.file = open(file, "w")
    self.writer = csv.writer(self.file, delimiter="\t")
    self.writer.writerow(
        [
            "results_directory_path",
            "top",
            "top3",
            "top5",
            "top10",
            "found",
            "total",
            "mean_reciprocal_rank",
            "percentage_top",
            "percentage_top3",
            "percentage_top5",
            "percentage_top10",
            "percentage_found",
            "precision@1",
            "precision@3",
            "precision@5",
            "precision@10",
            "MAP@1",
            "MAP@3",
            "MAP@5",
            "MAP@10",
            "f_beta_score@1",
            "f_beta_score@3",
            "f_beta_score@5",
            "f_beta_score@10",
            "NDCG@3",
            "NDCG@5",
            "NDCG@10",
            "true_positives",
            "false_positives",
            "true_negatives",
            "false_negatives",
            "sensitivity",
            "specificity",
            "precision",
            "negative_predictive_value",
            "false_positive_rate",
            "false_discovery_rate",
            "false_negative_rate",
            "accuracy",
            "f1_score",
            "matthews_correlation_coefficient",
        ]
    )

close()

Close the file used for writing rank statistics.

Raises:

Type Description
IOError

If there's an error while closing the file.

Source code in src/pheval/analyse/rank_stats.py
404
405
406
407
408
409
410
411
412
413
414
def close(self) -> None:
    """
    Close the file used for writing rank statistics.

    Raises:
        IOError: If there's an error while closing the file.
    """
    try:
        self.file.close()
    except IOError:
        print("Error closing ", self.file)

write_row(directory, rank_stats, binary_classification)

Write summary rank statistics row for a run to the file.

Parameters:

Name Type Description Default
directory Path

Path to the results directory corresponding to the run

required
rank_stats RankStats

RankStats instance containing rank statistics corresponding to the run

required

Raises:

Type Description
IOError

If there is an error writing to the file.

Source code in src/pheval/analyse/rank_stats.py
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
def write_row(
    self,
    directory: Path,
    rank_stats: RankStats,
    binary_classification: BinaryClassificationStats,
) -> None:
    """
    Write summary rank statistics row for a run to the file.

    Args:
        directory (Path): Path to the results directory corresponding to the run
        rank_stats (RankStats): RankStats instance containing rank statistics corresponding to the run

    Raises:
        IOError: If there is an error writing to the file.
    """
    try:
        self.writer.writerow(
            [
                directory,
                rank_stats.top,
                rank_stats.top3,
                rank_stats.top5,
                rank_stats.top10,
                rank_stats.found,
                rank_stats.total,
                rank_stats.mean_reciprocal_rank(),
                rank_stats.percentage_top(),
                rank_stats.percentage_top3(),
                rank_stats.percentage_top5(),
                rank_stats.percentage_top10(),
                rank_stats.percentage_found(),
                rank_stats.precision_at_k(1),
                rank_stats.precision_at_k(3),
                rank_stats.precision_at_k(5),
                rank_stats.precision_at_k(10),
                rank_stats.mean_average_precision_at_k(1),
                rank_stats.mean_average_precision_at_k(3),
                rank_stats.mean_average_precision_at_k(5),
                rank_stats.mean_average_precision_at_k(10),
                rank_stats.f_beta_score_at_k(rank_stats.percentage_top(), 1),
                rank_stats.f_beta_score_at_k(rank_stats.percentage_top3(), 3),
                rank_stats.f_beta_score_at_k(rank_stats.percentage_top5(), 5),
                rank_stats.f_beta_score_at_k(rank_stats.percentage_top10(), 10),
                rank_stats.mean_normalised_discounted_cumulative_gain(3),
                rank_stats.mean_normalised_discounted_cumulative_gain(5),
                rank_stats.mean_normalised_discounted_cumulative_gain(10),
                binary_classification.true_positives,
                binary_classification.false_positives,
                binary_classification.true_negatives,
                binary_classification.false_negatives,
                binary_classification.sensitivity(),
                binary_classification.specificity(),
                binary_classification.precision(),
                binary_classification.negative_predictive_value(),
                binary_classification.false_positive_rate(),
                binary_classification.false_discovery_rate(),
                binary_classification.false_negative_rate(),
                binary_classification.accuracy(),
                binary_classification.f1_score(),
                binary_classification.matthews_correlation_coefficient(),
            ]
        )
    except IOError:
        print("Error writing ", self.file)