Skip to content

Generate summary outputs

RankComparisonGenerator

Class for writing the run comparison of rank assignment for prioritisation.

Source code in src/pheval/analyse/generate_summary_outputs.py
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
class RankComparisonGenerator:
    """Class for writing the run comparison of rank assignment for prioritisation."""

    def __init__(self, run_comparison: defaultdict):
        """
        Initialise the RankComparisonGenerator class.

        Args:
            run_comparison (defaultdict): A nested dictionary containing the run comparison data.
        """
        self.run_comparison = run_comparison

    def _generate_dataframe(self) -> pd.DataFrame:
        """
        Generate a Pandas DataFrame based on the run comparison data.

        Returns:
            pd.DataFrame: DataFrame containing the run comparison data.
        """
        return pd.DataFrame.from_dict(self.run_comparison, orient="index")

    def _calculate_rank_difference(self) -> pd.DataFrame:
        """
        Calculate the rank decrease for runs, taking the first directory as a baseline.

        Returns:
            pd.DataFrame: DataFrame containing the calculated rank differences.
        """
        comparison_df = self._generate_dataframe()
        comparison_df["rank_change"] = comparison_df.iloc[:, 2] - comparison_df.iloc[:, 3]
        comparison_df["rank_change"] = np.where(
            (comparison_df.iloc[:, 2] == 0) & (comparison_df.iloc[:, 3] != 0),
            "GAINED",
            np.where(
                (comparison_df.iloc[:, 3] == 0) & (comparison_df.iloc[:, 2] != 0),
                "LOST",
                comparison_df["rank_change"],
            ),
        )
        comparison_df["rank_change"] = comparison_df["rank_change"].apply(
            lambda x: int(x) if str(x).lstrip("-").isdigit() else x
        )
        return comparison_df

    def generate_output(self, prefix: str, suffix: str) -> None:
        """
        Generate output file from the run comparison data.

        Args:
            prefix (str): Prefix for the output file name.
            suffix (str): Suffix for the output file name.
        """
        self._generate_dataframe().to_csv(prefix + suffix, sep="\t")

    def generate_comparison_output(self, prefix: str, suffix: str) -> None:
        """
        Generate output file with calculated rank differences.

        Args:
            prefix (str): Prefix for the output file name.
            suffix (str): Suffix for the output file name.
        """
        self._calculate_rank_difference().to_csv(prefix + suffix, sep="\t")

__init__(run_comparison)

Initialise the RankComparisonGenerator class.

Parameters:

Name Type Description Default
run_comparison defaultdict

A nested dictionary containing the run comparison data.

required
Source code in src/pheval/analyse/generate_summary_outputs.py
18
19
20
21
22
23
24
25
def __init__(self, run_comparison: defaultdict):
    """
    Initialise the RankComparisonGenerator class.

    Args:
        run_comparison (defaultdict): A nested dictionary containing the run comparison data.
    """
    self.run_comparison = run_comparison

generate_comparison_output(prefix, suffix)

Generate output file with calculated rank differences.

Parameters:

Name Type Description Default
prefix str

Prefix for the output file name.

required
suffix str

Suffix for the output file name.

required
Source code in src/pheval/analyse/generate_summary_outputs.py
69
70
71
72
73
74
75
76
77
def generate_comparison_output(self, prefix: str, suffix: str) -> None:
    """
    Generate output file with calculated rank differences.

    Args:
        prefix (str): Prefix for the output file name.
        suffix (str): Suffix for the output file name.
    """
    self._calculate_rank_difference().to_csv(prefix + suffix, sep="\t")

generate_output(prefix, suffix)

Generate output file from the run comparison data.

Parameters:

Name Type Description Default
prefix str

Prefix for the output file name.

required
suffix str

Suffix for the output file name.

required
Source code in src/pheval/analyse/generate_summary_outputs.py
59
60
61
62
63
64
65
66
67
def generate_output(self, prefix: str, suffix: str) -> None:
    """
    Generate output file from the run comparison data.

    Args:
        prefix (str): Prefix for the output file name.
        suffix (str): Suffix for the output file name.
    """
    self._generate_dataframe().to_csv(prefix + suffix, sep="\t")

generate_benchmark_comparison_output(benchmarking_results, plot_type, benchmark_generator)

Generate prioritisation outputs for benchmarking multiple runs.

This function generates comparison outputs for benchmarking multiple runs. It compares the results between pairs of BenchmarkRunResults instances in benchmarking_results and generates rank comparison outputs using RankComparisonGenerator for each pair.

Parameters:

Name Type Description Default
benchmarking_results List[BenchmarkRunResults]

A list containing BenchmarkRunResults instances representing the benchmarking results of multiple runs.

required
plot_type str

The type of plot to be generated.

required
benchmark_generator BenchmarkRunOutputGenerator

Object containing benchmarking output generation details.

required
Source code in src/pheval/analyse/generate_summary_outputs.py
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
def generate_benchmark_comparison_output(
    benchmarking_results: List[BenchmarkRunResults],
    plot_type: str,
    benchmark_generator: BenchmarkRunOutputGenerator,
) -> None:
    """
    Generate prioritisation outputs for benchmarking multiple runs.

    This function generates comparison outputs for benchmarking multiple runs. It compares the results
    between pairs of `BenchmarkRunResults` instances in `benchmarking_results` and generates rank
    comparison outputs using `RankComparisonGenerator` for each pair.

    Args:
        benchmarking_results (List[BenchmarkRunResults]): A list containing BenchmarkRunResults instances
            representing the benchmarking results of multiple runs.
        plot_type (str): The type of plot to be generated.
        benchmark_generator (BenchmarkRunOutputGenerator): Object containing benchmarking output generation details.
    """
    output_prefix = benchmark_generator.prioritisation_type_file_prefix
    for pair in itertools.combinations(benchmarking_results, 2):
        result1 = pair[0]
        result2 = pair[1]
        merged_results = merge_results(
            deepcopy(result1.ranks),
            deepcopy(result2.ranks),
        )
        RankComparisonGenerator(merged_results).generate_comparison_output(
            f"{result1.results_dir.parents[0].name}_"
            f"{result1.results_dir.name}"
            f"_vs_{result2.results_dir.parents[0].name}_"
            f"{result2.results_dir.name}",
            f"-{output_prefix}{RANK_COMPARISON_FILE_SUFFIX}",
        )

    generate_plots(
        benchmarking_results,
        benchmark_generator,
        plot_type,
    )

generate_benchmark_output(benchmarking_results, plot_type, benchmark_generator)

Generate prioritisation outputs for a single benchmarking run.

Parameters:

Name Type Description Default
benchmarking_results BenchmarkRunResults

Results of a benchmarking run.

required
plot_type str

Type of plot to generate.

required
benchmark_generator BenchmarkRunOutputGenerator

Object containing benchmarking output generation details.

required
Source code in src/pheval/analyse/generate_summary_outputs.py
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
def generate_benchmark_output(
    benchmarking_results: BenchmarkRunResults,
    plot_type: str,
    benchmark_generator: BenchmarkRunOutputGenerator,
) -> None:
    """
    Generate prioritisation outputs for a single benchmarking run.

    Args:
        benchmarking_results (BenchmarkRunResults): Results of a benchmarking run.
        plot_type (str): Type of plot to generate.
        benchmark_generator (BenchmarkRunOutputGenerator): Object containing benchmarking output generation details.
    """
    rank_comparison_data = benchmarking_results.ranks
    results_dir_name = benchmarking_results.results_dir.name
    RankComparisonGenerator(rank_comparison_data).generate_output(
        f"{results_dir_name}",
        f"-{benchmark_generator.prioritisation_type_file_prefix}{RANK_COMPARISON_FILE_SUFFIX}",
    )
    generate_plots(
        [benchmarking_results],
        benchmark_generator,
        plot_type,
    )

merge_results(result1, result2)

Merge two nested dictionaries containing results on commonalities.

This function merges two dictionaries, result1 and result2, containing nested structures. It traverses the dictionaries recursively and merges their contents based on common keys. If a key is present in both dictionaries and points to another dictionary, the function will further merge their nested contents. If a key exists in result2 but not in result1, it will be added to result1.

Parameters:

Name Type Description Default
result1 dict

The first dictionary to be merged.

required
result2 dict

The second dictionary to be merged.

required

Returns:

Name Type Description
defaultdict defaultdict

The merged dictionary containing the combined contents of result1 and result2.

Source code in src/pheval/analyse/generate_summary_outputs.py
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
def merge_results(result1: dict, result2: dict) -> defaultdict:
    """
    Merge two nested dictionaries containing results on commonalities.

    This function merges two dictionaries, `result1` and `result2`, containing nested structures.
    It traverses the dictionaries recursively and merges their contents based on common keys.
    If a key is present in both dictionaries and points to another dictionary, the function
    will further merge their nested contents. If a key exists in `result2` but not in `result1`,
    it will be added to `result1`.

    Args:
        result1 (dict): The first dictionary to be merged.
        result2 (dict): The second dictionary to be merged.

    Returns:
        defaultdict: The merged dictionary containing the combined contents of `result1` and `result2`.
    """
    for key, val in result1.items():
        if type(val) == dict:
            if key in result2 and type(result2[key] == dict):
                merge_results(result1[key], result2[key])
        else:
            if key in result2:
                result1[key] = result2[key]

    for key, val in result2.items():
        if key not in result1:
            result1[key] = val
    return result1