Skip to content

ThresholdedColumnMapper

Bases: ColumnMapper

Source code in pyphetools/creation/thresholded_column_mapper.py
class ThresholdedColumnMapper(ColumnMapper):

    def __init__(self, column_name, thresholder:Thresholder):
        super().__init__(column_name=column_name)
        self._thresholder = thresholder

    def map_cell(self, cell_contents) -> List[HpTerm]:
        hpterm = self._thresholder.map_value(cell_contents=cell_contents)
        if hpterm is not None:
            return [hpterm]
        else:
            return []


    def preview_column(self, df:pd.DataFrame):
        if not isinstance(df, pd.DataFrame):
            raise ValueError("df argument must be pandas DataFrame, but was {type(column)}")
        column = df[self._column_name]
        mapping_counter = defaultdict(int)
        for _, value in column.items():
            results = self.map_cell(str(value))
            if len(results) > 0:
                hpterm = results[0]
                mapped = f"{hpterm.hpo_term_and_id}: {hpterm.display_value}"
                mapping_counter[mapped] += 1
        dlist = []
        colname = f"mapping: {self._thresholder.get_reference_range()}"
        for k, v in mapping_counter.items():
            d = {colname: k, "count": str(v)}
            dlist.append(d)
        return pd.DataFrame(dlist)