Skip to content

Create noisy phenopackets

HpoRandomiser

Class for randomising phenopacket phenotypic features using Human Phenotype Ontology (HPO).

Source code in src/pheval/prepare/create_noisy_phenopackets.py
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
class HpoRandomiser:
    """Class for randomising phenopacket phenotypic features using Human Phenotype Ontology (HPO)."""

    def __init__(self, hpo_ontology: ProntoImplementation, scramble_factor: float):
        """
        Initialise the HpoRandomiser.

        Args:
            hpo_ontology (ProntoImplementation): The instance of the HPO ontology.
            scramble_factor (float): A factor for scrambling phenotypic features.
        """
        self.hpo_ontology = hpo_ontology
        self.phenotypic_abnormalities = set(hpo_ontology.roots(predicates=["HP:0000118"]))
        self.scramble_factor = scramble_factor

    def scramble_factor_proportions(self, phenotypic_features: list[PhenotypicFeature]) -> int:
        """
        Calculate the proportion of scrambled HPO terms based on the scramble factor.

        Args:
            phenotypic_features (list[PhenotypicFeature]): List of phenotypic features.

        Returns:
            int: The calculated number of phenotypic features to be scrambled.
        """
        if len(phenotypic_features) == 1:
            return 1
        else:
            return int(round(len(phenotypic_features) * self.scramble_factor, 0))

    def retrieve_hpo_term(self, hpo_id: str) -> PhenotypicFeature:
        """
        Retrieve an HPO term based on the provided HPO ID.

        Args:
            hpo_id (str): The HPO ID of the term to retrieve.

        Returns:
            PhenotypicFeature: The PhenotypicFeature object representing the retrieved HPO term.
        """
        rels = self.hpo_ontology.entity_alias_map(hpo_id)
        hpo_term = "".join(rels[(list(rels.keys())[0])])
        return PhenotypicFeature(type=OntologyClass(id=hpo_id, label=hpo_term))

    @staticmethod
    def retain_real_patient_terms(
        phenotypic_features: List[PhenotypicFeature],
        number_of_scrambled_terms: int,
    ) -> List[PhenotypicFeature]:
        """
        Return a list of real patient HPO terms, retaining a specific number of non-scrambled terms.

        Args:
            phenotypic_features (List[PhenotypicFeature]): List of phenotypic features.
            number_of_scrambled_terms (int): The count of scrambled HPO terms.

        Returns:
            List[PhenotypicFeature]: A list of non-scrambled (real patient) HPO terms.
        """
        if len(phenotypic_features) > 1:
            number_of_real_id = len(phenotypic_features) - number_of_scrambled_terms
        else:
            number_of_real_id = 1
        return random.sample(phenotypic_features, number_of_real_id)

    def convert_patient_terms_to_parent(
        self,
        phenotypic_features: List[PhenotypicFeature],
        retained_phenotypic_features: List[PhenotypicFeature],
        number_of_scrambled_terms: int,
    ) -> List[PhenotypicFeature]:
        """
        Convert a subset of patient HPO terms to their respective parent terms.

        Args:
            phenotypic_features (List[PhenotypicFeature]): List of all phenotypic features.
            retained_phenotypic_features (List[PhenotypicFeature]): List of retained non-scrambled phenotypic features.
            number_of_scrambled_terms (int): The count of scrambled HPO terms.

        Returns:
            List[PhenotypicFeature]: A list of HPO terms converted to their parent terms.

        Note:
            This method identifies a subset of patient HPO terms that are not retained among the
            non-scrambled phenotypic features and converts them to their respective parent terms.
            It then returns a list of parent HPO terms based on the provided scrambled terms count.
            If no remaining HPO terms are available for conversion, no parent terms are returned.
        """
        remaining_hpo = [i for i in phenotypic_features if i not in retained_phenotypic_features]
        if len(remaining_hpo) == 0:
            number_of_scrambled_terms = 0
        hpo_terms_to_be_changed = list(random.sample(remaining_hpo, number_of_scrambled_terms))
        parent_terms = []
        for term in hpo_terms_to_be_changed:
            if self.hpo_ontology.label(term.type.id).startswith("obsolete"):
                obsolete_term = self.hpo_ontology.entity_metadata_map(term.type.id)
                updated_term = list(obsolete_term.values())[0][0]
                parents = self.hpo_ontology.hierarchical_parents(updated_term)
            else:
                parents = self.hpo_ontology.hierarchical_parents(term.type.id)
            if not parents:
                parent_terms.append(term)
            else:
                parent_terms.append(self.retrieve_hpo_term(random.choice(parents)))
        return parent_terms

    def create_random_hpo_terms(self, number_of_scrambled_terms: int) -> List[PhenotypicFeature]:
        """
        Generate a list of random HPO terms.

        Args:
            number_of_scrambled_terms (int): The count of random HPO terms to be generated.

        Returns:
            List[PhenotypicFeature]: A list of randomly selected HPO terms.
        """
        random_ids = list(
            random.sample(sorted(self.phenotypic_abnormalities), number_of_scrambled_terms)
        )
        return [self.retrieve_hpo_term(random_id) for random_id in random_ids]

    def randomise_hpo_terms(
        self,
        phenotypic_features: List[PhenotypicFeature],
    ) -> List[PhenotypicFeature]:
        """
        Randomise the provided phenotypic features by combining retained, parent-converted, and random HPO terms.

        Args:
            phenotypic_features (List[PhenotypicFeature]): List of phenotypic features to be randomised.

        Returns:
            List[PhenotypicFeature]: A list of randomised HPO terms.

        Note:
            This method randomises the provided phenotypic features by incorporating three types of HPO terms:
            1. Retained Patient Terms: Non-scrambled (real patient) HPO terms retained based on the scramble factor.
            2. Converted to Parent Terms: Subset of HPO terms converted to their respective parent terms.
            3. Random HPO Terms: Newly generated random HPO terms based on the scramble factor.

            The method determines the count of terms for each category and combines them to form a final list
            of randomised HPO terms to be used in the phenotypic features.
        """
        number_of_scrambled_terms = self.scramble_factor_proportions(phenotypic_features)
        retained_patient_terms = self.retain_real_patient_terms(
            phenotypic_features, number_of_scrambled_terms
        )
        return (
            retained_patient_terms
            + self.convert_patient_terms_to_parent(
                phenotypic_features, retained_patient_terms, number_of_scrambled_terms
            )
            + self.create_random_hpo_terms(number_of_scrambled_terms)
        )

    def add_noise_to_phenotypic_profile(
        self,
        phenopacket: Union[Phenopacket, Family],
    ) -> Union[Phenopacket, Family]:
        """
        Randomise the phenotypic profile of a Phenopacket or Family.

        Args:
            phenopacket (Union[Phenopacket, Family]): The Phenopacket or Family to be randomised.

        Returns:
            Union[Phenopacket, Family]: The randomised Phenopacket or Family.
        """
        phenotypic_features = PhenopacketUtil(phenopacket).observed_phenotypic_features()
        random_phenotypes = self.randomise_hpo_terms(phenotypic_features)
        randomised_phenopacket = PhenopacketRebuilder(phenopacket).add_randomised_hpo(
            random_phenotypes
        )
        return randomised_phenopacket

    def create_scrambled_phenopacket(
        self,
        output_dir: Path,
        phenopacket_path: Path,
    ) -> None:
        """
        Create a scrambled version of a Phenopacket.

        Args:
            output_dir (Path): The directory to store the output scrambled Phenopacket.
            phenopacket_path (Path): The path to the original Phenopacket file.
        """
        phenopacket = phenopacket_reader(phenopacket_path)
        created_noisy_phenopacket = self.add_noise_to_phenotypic_profile(
            phenopacket,
        )
        write_phenopacket(
            created_noisy_phenopacket,
            output_dir.joinpath(phenopacket_path.name),
        )

    def create_scrambled_phenopackets(
        self,
        output_dir: Path,
        phenopacket_dir: Path,
    ) -> None:
        """
        Create scrambled versions of Phenopackets within a directory.

        Args:
            output_dir (Path): The directory to store the output scrambled Phenopackets.
            phenopacket_dir (Path): The directory containing the original Phenopacket files.
        """
        phenopacket_files = files_with_suffix(phenopacket_dir, ".json")
        for phenopacket_path in phenopacket_files:
            logger.info(f"Scrambling {phenopacket_path.name}.")
            phenopacket = phenopacket_reader(phenopacket_path)
            created_noisy_phenopacket = self.add_noise_to_phenotypic_profile(phenopacket)
            write_phenopacket(
                created_noisy_phenopacket,
                output_dir.joinpath(
                    phenopacket_path.name,
                ),
            )

__init__(hpo_ontology, scramble_factor)

Initialise the HpoRandomiser.

Parameters:

Name Type Description Default
hpo_ontology ProntoImplementation

The instance of the HPO ontology.

required
scramble_factor float

A factor for scrambling phenotypic features.

required
Source code in src/pheval/prepare/create_noisy_phenopackets.py
43
44
45
46
47
48
49
50
51
52
53
def __init__(self, hpo_ontology: ProntoImplementation, scramble_factor: float):
    """
    Initialise the HpoRandomiser.

    Args:
        hpo_ontology (ProntoImplementation): The instance of the HPO ontology.
        scramble_factor (float): A factor for scrambling phenotypic features.
    """
    self.hpo_ontology = hpo_ontology
    self.phenotypic_abnormalities = set(hpo_ontology.roots(predicates=["HP:0000118"]))
    self.scramble_factor = scramble_factor

add_noise_to_phenotypic_profile(phenopacket)

Randomise the phenotypic profile of a Phenopacket or Family.

Parameters:

Name Type Description Default
phenopacket Union[Phenopacket, Family]

The Phenopacket or Family to be randomised.

required

Returns:

Type Description
Union[Phenopacket, Family]

Union[Phenopacket, Family]: The randomised Phenopacket or Family.

Source code in src/pheval/prepare/create_noisy_phenopackets.py
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
def add_noise_to_phenotypic_profile(
    self,
    phenopacket: Union[Phenopacket, Family],
) -> Union[Phenopacket, Family]:
    """
    Randomise the phenotypic profile of a Phenopacket or Family.

    Args:
        phenopacket (Union[Phenopacket, Family]): The Phenopacket or Family to be randomised.

    Returns:
        Union[Phenopacket, Family]: The randomised Phenopacket or Family.
    """
    phenotypic_features = PhenopacketUtil(phenopacket).observed_phenotypic_features()
    random_phenotypes = self.randomise_hpo_terms(phenotypic_features)
    randomised_phenopacket = PhenopacketRebuilder(phenopacket).add_randomised_hpo(
        random_phenotypes
    )
    return randomised_phenopacket

convert_patient_terms_to_parent(phenotypic_features, retained_phenotypic_features, number_of_scrambled_terms)

Convert a subset of patient HPO terms to their respective parent terms.

Parameters:

Name Type Description Default
phenotypic_features List[PhenotypicFeature]

List of all phenotypic features.

required
retained_phenotypic_features List[PhenotypicFeature]

List of retained non-scrambled phenotypic features.

required
number_of_scrambled_terms int

The count of scrambled HPO terms.

required

Returns:

Type Description
List[PhenotypicFeature]

List[PhenotypicFeature]: A list of HPO terms converted to their parent terms.

Note

This method identifies a subset of patient HPO terms that are not retained among the non-scrambled phenotypic features and converts them to their respective parent terms. It then returns a list of parent HPO terms based on the provided scrambled terms count. If no remaining HPO terms are available for conversion, no parent terms are returned.

Source code in src/pheval/prepare/create_noisy_phenopackets.py
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
def convert_patient_terms_to_parent(
    self,
    phenotypic_features: List[PhenotypicFeature],
    retained_phenotypic_features: List[PhenotypicFeature],
    number_of_scrambled_terms: int,
) -> List[PhenotypicFeature]:
    """
    Convert a subset of patient HPO terms to their respective parent terms.

    Args:
        phenotypic_features (List[PhenotypicFeature]): List of all phenotypic features.
        retained_phenotypic_features (List[PhenotypicFeature]): List of retained non-scrambled phenotypic features.
        number_of_scrambled_terms (int): The count of scrambled HPO terms.

    Returns:
        List[PhenotypicFeature]: A list of HPO terms converted to their parent terms.

    Note:
        This method identifies a subset of patient HPO terms that are not retained among the
        non-scrambled phenotypic features and converts them to their respective parent terms.
        It then returns a list of parent HPO terms based on the provided scrambled terms count.
        If no remaining HPO terms are available for conversion, no parent terms are returned.
    """
    remaining_hpo = [i for i in phenotypic_features if i not in retained_phenotypic_features]
    if len(remaining_hpo) == 0:
        number_of_scrambled_terms = 0
    hpo_terms_to_be_changed = list(random.sample(remaining_hpo, number_of_scrambled_terms))
    parent_terms = []
    for term in hpo_terms_to_be_changed:
        if self.hpo_ontology.label(term.type.id).startswith("obsolete"):
            obsolete_term = self.hpo_ontology.entity_metadata_map(term.type.id)
            updated_term = list(obsolete_term.values())[0][0]
            parents = self.hpo_ontology.hierarchical_parents(updated_term)
        else:
            parents = self.hpo_ontology.hierarchical_parents(term.type.id)
        if not parents:
            parent_terms.append(term)
        else:
            parent_terms.append(self.retrieve_hpo_term(random.choice(parents)))
    return parent_terms

create_random_hpo_terms(number_of_scrambled_terms)

Generate a list of random HPO terms.

Parameters:

Name Type Description Default
number_of_scrambled_terms int

The count of random HPO terms to be generated.

required

Returns:

Type Description
List[PhenotypicFeature]

List[PhenotypicFeature]: A list of randomly selected HPO terms.

Source code in src/pheval/prepare/create_noisy_phenopackets.py
146
147
148
149
150
151
152
153
154
155
156
157
158
159
def create_random_hpo_terms(self, number_of_scrambled_terms: int) -> List[PhenotypicFeature]:
    """
    Generate a list of random HPO terms.

    Args:
        number_of_scrambled_terms (int): The count of random HPO terms to be generated.

    Returns:
        List[PhenotypicFeature]: A list of randomly selected HPO terms.
    """
    random_ids = list(
        random.sample(sorted(self.phenotypic_abnormalities), number_of_scrambled_terms)
    )
    return [self.retrieve_hpo_term(random_id) for random_id in random_ids]

create_scrambled_phenopacket(output_dir, phenopacket_path)

Create a scrambled version of a Phenopacket.

Parameters:

Name Type Description Default
output_dir Path

The directory to store the output scrambled Phenopacket.

required
phenopacket_path Path

The path to the original Phenopacket file.

required
Source code in src/pheval/prepare/create_noisy_phenopackets.py
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
def create_scrambled_phenopacket(
    self,
    output_dir: Path,
    phenopacket_path: Path,
) -> None:
    """
    Create a scrambled version of a Phenopacket.

    Args:
        output_dir (Path): The directory to store the output scrambled Phenopacket.
        phenopacket_path (Path): The path to the original Phenopacket file.
    """
    phenopacket = phenopacket_reader(phenopacket_path)
    created_noisy_phenopacket = self.add_noise_to_phenotypic_profile(
        phenopacket,
    )
    write_phenopacket(
        created_noisy_phenopacket,
        output_dir.joinpath(phenopacket_path.name),
    )

create_scrambled_phenopackets(output_dir, phenopacket_dir)

Create scrambled versions of Phenopackets within a directory.

Parameters:

Name Type Description Default
output_dir Path

The directory to store the output scrambled Phenopackets.

required
phenopacket_dir Path

The directory containing the original Phenopacket files.

required
Source code in src/pheval/prepare/create_noisy_phenopackets.py
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
def create_scrambled_phenopackets(
    self,
    output_dir: Path,
    phenopacket_dir: Path,
) -> None:
    """
    Create scrambled versions of Phenopackets within a directory.

    Args:
        output_dir (Path): The directory to store the output scrambled Phenopackets.
        phenopacket_dir (Path): The directory containing the original Phenopacket files.
    """
    phenopacket_files = files_with_suffix(phenopacket_dir, ".json")
    for phenopacket_path in phenopacket_files:
        logger.info(f"Scrambling {phenopacket_path.name}.")
        phenopacket = phenopacket_reader(phenopacket_path)
        created_noisy_phenopacket = self.add_noise_to_phenotypic_profile(phenopacket)
        write_phenopacket(
            created_noisy_phenopacket,
            output_dir.joinpath(
                phenopacket_path.name,
            ),
        )

randomise_hpo_terms(phenotypic_features)

Randomise the provided phenotypic features by combining retained, parent-converted, and random HPO terms.

Parameters:

Name Type Description Default
phenotypic_features List[PhenotypicFeature]

List of phenotypic features to be randomised.

required

Returns:

Type Description
List[PhenotypicFeature]

List[PhenotypicFeature]: A list of randomised HPO terms.

Note

This method randomises the provided phenotypic features by incorporating three types of HPO terms: 1. Retained Patient Terms: Non-scrambled (real patient) HPO terms retained based on the scramble factor. 2. Converted to Parent Terms: Subset of HPO terms converted to their respective parent terms. 3. Random HPO Terms: Newly generated random HPO terms based on the scramble factor.

The method determines the count of terms for each category and combines them to form a final list of randomised HPO terms to be used in the phenotypic features.

Source code in src/pheval/prepare/create_noisy_phenopackets.py
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
def randomise_hpo_terms(
    self,
    phenotypic_features: List[PhenotypicFeature],
) -> List[PhenotypicFeature]:
    """
    Randomise the provided phenotypic features by combining retained, parent-converted, and random HPO terms.

    Args:
        phenotypic_features (List[PhenotypicFeature]): List of phenotypic features to be randomised.

    Returns:
        List[PhenotypicFeature]: A list of randomised HPO terms.

    Note:
        This method randomises the provided phenotypic features by incorporating three types of HPO terms:
        1. Retained Patient Terms: Non-scrambled (real patient) HPO terms retained based on the scramble factor.
        2. Converted to Parent Terms: Subset of HPO terms converted to their respective parent terms.
        3. Random HPO Terms: Newly generated random HPO terms based on the scramble factor.

        The method determines the count of terms for each category and combines them to form a final list
        of randomised HPO terms to be used in the phenotypic features.
    """
    number_of_scrambled_terms = self.scramble_factor_proportions(phenotypic_features)
    retained_patient_terms = self.retain_real_patient_terms(
        phenotypic_features, number_of_scrambled_terms
    )
    return (
        retained_patient_terms
        + self.convert_patient_terms_to_parent(
            phenotypic_features, retained_patient_terms, number_of_scrambled_terms
        )
        + self.create_random_hpo_terms(number_of_scrambled_terms)
    )

retain_real_patient_terms(phenotypic_features, number_of_scrambled_terms) staticmethod

Return a list of real patient HPO terms, retaining a specific number of non-scrambled terms.

Parameters:

Name Type Description Default
phenotypic_features List[PhenotypicFeature]

List of phenotypic features.

required
number_of_scrambled_terms int

The count of scrambled HPO terms.

required

Returns:

Type Description
List[PhenotypicFeature]

List[PhenotypicFeature]: A list of non-scrambled (real patient) HPO terms.

Source code in src/pheval/prepare/create_noisy_phenopackets.py
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
@staticmethod
def retain_real_patient_terms(
    phenotypic_features: List[PhenotypicFeature],
    number_of_scrambled_terms: int,
) -> List[PhenotypicFeature]:
    """
    Return a list of real patient HPO terms, retaining a specific number of non-scrambled terms.

    Args:
        phenotypic_features (List[PhenotypicFeature]): List of phenotypic features.
        number_of_scrambled_terms (int): The count of scrambled HPO terms.

    Returns:
        List[PhenotypicFeature]: A list of non-scrambled (real patient) HPO terms.
    """
    if len(phenotypic_features) > 1:
        number_of_real_id = len(phenotypic_features) - number_of_scrambled_terms
    else:
        number_of_real_id = 1
    return random.sample(phenotypic_features, number_of_real_id)

retrieve_hpo_term(hpo_id)

Retrieve an HPO term based on the provided HPO ID.

Parameters:

Name Type Description Default
hpo_id str

The HPO ID of the term to retrieve.

required

Returns:

Name Type Description
PhenotypicFeature PhenotypicFeature

The PhenotypicFeature object representing the retrieved HPO term.

Source code in src/pheval/prepare/create_noisy_phenopackets.py
70
71
72
73
74
75
76
77
78
79
80
81
82
def retrieve_hpo_term(self, hpo_id: str) -> PhenotypicFeature:
    """
    Retrieve an HPO term based on the provided HPO ID.

    Args:
        hpo_id (str): The HPO ID of the term to retrieve.

    Returns:
        PhenotypicFeature: The PhenotypicFeature object representing the retrieved HPO term.
    """
    rels = self.hpo_ontology.entity_alias_map(hpo_id)
    hpo_term = "".join(rels[(list(rels.keys())[0])])
    return PhenotypicFeature(type=OntologyClass(id=hpo_id, label=hpo_term))

scramble_factor_proportions(phenotypic_features)

Calculate the proportion of scrambled HPO terms based on the scramble factor.

Parameters:

Name Type Description Default
phenotypic_features list[PhenotypicFeature]

List of phenotypic features.

required

Returns:

Name Type Description
int int

The calculated number of phenotypic features to be scrambled.

Source code in src/pheval/prepare/create_noisy_phenopackets.py
55
56
57
58
59
60
61
62
63
64
65
66
67
68
def scramble_factor_proportions(self, phenotypic_features: list[PhenotypicFeature]) -> int:
    """
    Calculate the proportion of scrambled HPO terms based on the scramble factor.

    Args:
        phenotypic_features (list[PhenotypicFeature]): List of phenotypic features.

    Returns:
        int: The calculated number of phenotypic features to be scrambled.
    """
    if len(phenotypic_features) == 1:
        return 1
    else:
        return int(round(len(phenotypic_features) * self.scramble_factor, 0))

load_ontology(local_cached_ontology=None)

Load the Human Phenotype Ontology (HPO). Args: local_cached_ontology(Path): Path to the local cached ontology. Returns: ProntoImplementation: An instance of ProntoImplementation containing the loaded HPO.

Source code in src/pheval/prepare/create_noisy_phenopackets.py
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
def load_ontology(local_cached_ontology: Path = None) -> ProntoImplementation:
    """
    Load the Human Phenotype Ontology (HPO).
    Args:
        local_cached_ontology(Path): Path to the local cached ontology.
    Returns:
        ProntoImplementation: An instance of ProntoImplementation containing the loaded HPO.
    """
    if local_cached_ontology is None:
        logger.warning("No local cached ontology found, using default ontology.")
        resource = OntologyResource(slug="hp.obo", local=False)
        return ProntoImplementation(resource)
    else:
        logger.info(f"Loading local ontology from {local_cached_ontology}.")
        resource = OntologyResource(slug=local_cached_ontology, local=True)
        return ProntoImplementation(resource)

scramble_phenopackets(output_dir, phenopacket_path, phenopacket_dir, scramble_factor, local_cached_ontology)

Create scrambled phenopackets from either a single phenopacket or a directory of phenopackets.

Parameters:

Name Type Description Default
output_dir Path

The directory to store the output scrambled Phenopackets.

required
phenopacket_path Path

The path to a single Phenopacket file (if applicable).

required
phenopacket_dir Path

The directory containing multiple Phenopacket files (if applicable).

required
scramble_factor float

A factor determining the level of scrambling for phenotypic features.

required
local_cached_ontology Path

The path to the local cached ontology.

required
Source code in src/pheval/prepare/create_noisy_phenopackets.py
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
def scramble_phenopackets(
    output_dir: Path,
    phenopacket_path: Path,
    phenopacket_dir: Path,
    scramble_factor: float,
    local_cached_ontology: Path,
) -> None:
    """
    Create scrambled phenopackets from either a single phenopacket or a directory of phenopackets.

    Args:
        output_dir (Path): The directory to store the output scrambled Phenopackets.
        phenopacket_path (Path): The path to a single Phenopacket file (if applicable).
        phenopacket_dir (Path): The directory containing multiple Phenopacket files (if applicable).
        scramble_factor (float): A factor determining the level of scrambling for phenotypic features.
        local_cached_ontology (Path): The path to the local cached ontology.
    """
    start_time = time.perf_counter()
    logger.info("Initiating scrambling.")
    logger.info(f"Created directory {output_dir}.")
    logger.info(f"Scramble factor set to {scramble_factor}.")
    output_dir.mkdir(exist_ok=True)
    ontology = load_ontology(local_cached_ontology)
    if phenopacket_path is not None:
        logger.info(f"Scrambling {phenopacket_path}.")
        HpoRandomiser(ontology, scramble_factor).create_scrambled_phenopacket(
            output_dir, phenopacket_path
        )
    elif phenopacket_dir is not None:
        logger.info(
            f"Scrambling {len(all_files(phenopacket_dir))} phenopackets in {phenopacket_dir}."
        )
        HpoRandomiser(ontology, scramble_factor).create_scrambled_phenopackets(
            output_dir,
            phenopacket_dir,
        )
    logger.info(f"Finished scrambling! Total time: {time.perf_counter() - start_time:.2f} seconds.")