Skip to content

Parse benchmark summary

parse_benchmark_result_summary(benchmarking_df)

Parse the summary benchmark DataFrame into a list of BenchmarkRunResults.

Parameters:

Name Type Description Default
benchmarking_df pd.DataFrame

Summary benchmark DataFrame containing columns such as 'results_directory_path', 'top', 'top3', 'top5', 'top10', 'found', 'total', 'mean_reciprocal_rank'.

required

Returns:

Type Description
List[BenchmarkRunResults]

List[BenchmarkRunResults]: A list of BenchmarkRunResults instances generated from the DataFrame.

Source code in src/pheval/analyse/parse_benchmark_summary.py
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
def parse_benchmark_result_summary(benchmarking_df: pd.DataFrame) -> List[BenchmarkRunResults]:
    """
    Parse the summary benchmark DataFrame into a list of BenchmarkRunResults.

    Args:
        benchmarking_df (pd.DataFrame): Summary benchmark DataFrame containing columns such as
                                        'results_directory_path', 'top', 'top3', 'top5', 'top10',
                                        'found', 'total', 'mean_reciprocal_rank'.

    Returns:
        List[BenchmarkRunResults]: A list of BenchmarkRunResults instances generated from the DataFrame.
    """
    benchmarking_results = []
    for _, row in benchmarking_df.iterrows():
        benchmarking_result = BenchmarkRunResults(
            rank_stats=RankStats(
                top=row["top"],
                top3=row["top3"],
                top5=row["top5"],
                top10=row["top10"],
                found=row["found"],
                total=row["total"],
                mrr=row["mean_reciprocal_rank"],
            ),
            ranks={},
            benchmark_name=row["results_directory_path"],
            binary_classification_stats=BinaryClassificationStats(),
        )
        benchmarking_results.append(benchmarking_result)
    return benchmarking_results

read_benchmark_tsv_result_summary(benchmarking_tsv)

Read the summary benchmark TSV output generated from the benchmark-comparison command.

Parameters:

Name Type Description Default
benchmarking_tsv Path

Path to the summary benchmark TSV output file.

required

Returns:

Type Description
pd.DataFrame

pd.DataFrame: A pandas DataFrame containing specific columns from the TSV file, including: 'results_directory_path', 'top', 'top3', 'top5', 'top10', 'found', 'total', 'mean_reciprocal_rank'.

Source code in src/pheval/analyse/parse_benchmark_summary.py
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
def read_benchmark_tsv_result_summary(benchmarking_tsv: Path) -> pd.DataFrame:
    """
    Read the summary benchmark TSV output generated from the benchmark-comparison command.

    Args:
        benchmarking_tsv (Path): Path to the summary benchmark TSV output file.

    Returns:
        pd.DataFrame: A pandas DataFrame containing specific columns from the TSV file, including:
                      'results_directory_path', 'top', 'top3', 'top5', 'top10', 'found',
                      'total', 'mean_reciprocal_rank'.
    """
    return pd.read_csv(
        benchmarking_tsv,
        delimiter="\t",
        usecols=[
            "results_directory_path",
            "top",
            "top3",
            "top5",
            "top10",
            "found",
            "total",
            "mean_reciprocal_rank",
        ],
    )