Skip to content

AgeColumnMapper

Map a column that contains information about the age of individuals.

Tables with information about genotype phenotype correlations typically contain a column with information about age. The columns often have formats such as 34 (integer with number of years or months) or 3Y2M (for three years and two months). This mapper ingests data from such columns and transforms the contents into ISO 8601 strings (e.g., P4Y2M1D for 4 years, 2 months, and 1 day).

There are many different formats to be parsed and so we have a small class hierarchy

Source code in pyphetools/creation/age_column_mapper.py
class AgeColumnMapper(metaclass=abc.ABCMeta):
    """
    Map a column that contains information about the age of individuals.

    Tables with information about genotype phenotype correlations typically
    contain a column with information about age. The columns often have formats
    such as 34 (integer with number of years or months) or 3Y2M (for three years
    and two months). This mapper ingests data from such columns and transforms the
    contents into ISO 8601 strings (e.g., P4Y2M1D for 4 years, 2 months, and 1 day).

    There are many different formats to be parsed and so we have a small class hierarchy

    """

    def __init__(self, column_name: str, string_to_iso_d=None) -> None:
        """
        :param column_name: Name of the Age column in the original table
        :type column_name: str
        :param string_to_iso_d: dictionary from free text (input table) to ISO8601 strings
        :type string_to_iso_d: Dict[str,str], optional
        """

        if string_to_iso_d is None:
            string_to_iso_d = {}
        if column_name is None:
            raise ValueError("Must provide non-null column_name argument")
        self._column_name = column_name
        self._string_to_iso_d = string_to_iso_d
        self._erroneous_input_counter = defaultdict(int)

    @abc.abstractmethod
    def map_cell(self, cell_contents) -> PyPheToolsAge:

        """
        Map a single cell of the table

        :param cell_contents: The text contained in a single cell of the table
        :type cell_contents: can be a string or numerical type
        """
        pass

    def _clean_contents(self, cell_contents):
        contents = str(cell_contents)
        contents = contents.strip()
        return contents

    def preview_column(self, df:pd.DataFrame) -> pd.DataFrame:
        if not isinstance(df, pd.DataFrame):
            raise ValueError("df argument must be pandas DataFrame, but was {type(column)}")
        preview_d = {}
        column = df[self.get_column_name()]
        for _, column_contents in column.items():
            pyphetools_age = self.map_cell(str(column_contents))
            preview_d[pyphetools_age.age_string] = column_contents
        dlist = []
        for k, v in preview_d.items():
            if v is None:
                dlist.append({"original column contents": k, "age": "n/a"})
            else:
                dlist.append({"original column contents": k, "age": v})
        return pd.DataFrame(dlist)

    def get_column_name(self) -> str:
        return self._column_name

    def has_error(self) -> bool:
        return len(self._erroneous_input_counter) > 0

    def error_summary(self):
        items = []
        for k, v in self._erroneous_input_counter.items():
            items.append(f"{k} (n={v})")
        return f"Could not parse the following as ISO8601 ages: {', '.join(items)}"


    @staticmethod
    def not_provided():
        """Create an object for cases where Age is not provided.
        """
        return NotProvidedAgeColumnMapper(column_name=Constants.NOT_PROVIDED)

    @staticmethod
    def by_year(column_name) -> "AgeColumnMapper":
        return YearAgeColumnMapper(column_name=column_name)

    @staticmethod
    def by_year_and_month(column_name) -> "AgeColumnMapper":
        return YearMonthAgeColumnMapper(column_name=column_name)

    @staticmethod
    def by_month(column_name) -> "AgeColumnMapper":
        return MonthAgeColumnMapper(column_name=column_name)

    @staticmethod
    def iso8601(column_name) -> "AgeColumnMapper":
        return Iso8601AgeColumnMapper(column_name=column_name)

    @staticmethod
    def hpo_onset(column_name) -> "AgeColumnMapper":
        return HpoAgeColumnMapper(column_name=column_name)

    @staticmethod
    def custom_dictionary(column_name, string_to_iso_d):
        """
        Create an AgeColumnMapper for free text input data such as Fetus, 1.5, birth, 51 days
        :param column_name: name of the age column in the input table
        :type column_name: str
        :param string_to_iso_d: dictionary with free text to ISO 8601
        :type string_to_iso_d: Dict[str,str)
        """
        return CustomAgeColumnMapper(column_name=column_name,
                            string_to_iso_d=string_to_iso_d)

__init__(column_name, string_to_iso_d=None)

Parameters:

Name Type Description Default
column_name str

Name of the Age column in the original table

required
string_to_iso_d Dict[str,str], optional

dictionary from free text (input table) to ISO8601 strings

None
Source code in pyphetools/creation/age_column_mapper.py
def __init__(self, column_name: str, string_to_iso_d=None) -> None:
    """
    :param column_name: Name of the Age column in the original table
    :type column_name: str
    :param string_to_iso_d: dictionary from free text (input table) to ISO8601 strings
    :type string_to_iso_d: Dict[str,str], optional
    """

    if string_to_iso_d is None:
        string_to_iso_d = {}
    if column_name is None:
        raise ValueError("Must provide non-null column_name argument")
    self._column_name = column_name
    self._string_to_iso_d = string_to_iso_d
    self._erroneous_input_counter = defaultdict(int)

custom_dictionary(column_name, string_to_iso_d) staticmethod

Create an AgeColumnMapper for free text input data such as Fetus, 1.5, birth, 51 days

Parameters:

Name Type Description Default
column_name str

name of the age column in the input table

required
string_to_iso_d Dict[str,str)

dictionary with free text to ISO 8601

required
Source code in pyphetools/creation/age_column_mapper.py
@staticmethod
def custom_dictionary(column_name, string_to_iso_d):
    """
    Create an AgeColumnMapper for free text input data such as Fetus, 1.5, birth, 51 days
    :param column_name: name of the age column in the input table
    :type column_name: str
    :param string_to_iso_d: dictionary with free text to ISO 8601
    :type string_to_iso_d: Dict[str,str)
    """
    return CustomAgeColumnMapper(column_name=column_name,
                        string_to_iso_d=string_to_iso_d)

map_cell(cell_contents) abstractmethod

Map a single cell of the table

Parameters:

Name Type Description Default
cell_contents can be a string | numerical type

The text contained in a single cell of the table

required
Source code in pyphetools/creation/age_column_mapper.py
@abc.abstractmethod
def map_cell(self, cell_contents) -> PyPheToolsAge:

    """
    Map a single cell of the table

    :param cell_contents: The text contained in a single cell of the table
    :type cell_contents: can be a string or numerical type
    """
    pass

not_provided() staticmethod

Create an object for cases where Age is not provided.

Source code in pyphetools/creation/age_column_mapper.py
@staticmethod
def not_provided():
    """Create an object for cases where Age is not provided.
    """
    return NotProvidedAgeColumnMapper(column_name=Constants.NOT_PROVIDED)