Map a column that contains information about the age of individuals.
Tables with information about genotype phenotype correlations typically
contain a column with information about age. The columns often have formats
such as 34 (integer with number of years or months) or 3Y2M (for three years
and two months). This mapper ingests data from such columns and transforms the
contents into ISO 8601 strings (e.g., P4Y2M1D for 4 years, 2 months, and 1 day).
There are many different formats to be parsed and so we have a small class hierarchy
Source code in pyphetools/creation/age_column_mapper.py
| class AgeColumnMapper(metaclass=abc.ABCMeta):
"""
Map a column that contains information about the age of individuals.
Tables with information about genotype phenotype correlations typically
contain a column with information about age. The columns often have formats
such as 34 (integer with number of years or months) or 3Y2M (for three years
and two months). This mapper ingests data from such columns and transforms the
contents into ISO 8601 strings (e.g., P4Y2M1D for 4 years, 2 months, and 1 day).
There are many different formats to be parsed and so we have a small class hierarchy
"""
def __init__(self, column_name: str, string_to_iso_d=None) -> None:
"""
:param column_name: Name of the Age column in the original table
:type column_name: str
:param string_to_iso_d: dictionary from free text (input table) to ISO8601 strings
:type string_to_iso_d: Dict[str,str], optional
"""
if string_to_iso_d is None:
string_to_iso_d = {}
if column_name is None:
raise ValueError("Must provide non-null column_name argument")
self._column_name = column_name
self._string_to_iso_d = string_to_iso_d
self._erroneous_input_counter = defaultdict(int)
@abc.abstractmethod
def map_cell(self, cell_contents) -> PyPheToolsAge:
"""
Map a single cell of the table
:param cell_contents: The text contained in a single cell of the table
:type cell_contents: can be a string or numerical type
"""
pass
def _clean_contents(self, cell_contents):
contents = str(cell_contents)
contents = contents.strip()
return contents
def preview_column(self, df:pd.DataFrame) -> pd.DataFrame:
if not isinstance(df, pd.DataFrame):
raise ValueError("df argument must be pandas DataFrame, but was {type(column)}")
preview_d = {}
column = df[self.get_column_name()]
for _, column_contents in column.items():
pyphetools_age = self.map_cell(str(column_contents))
preview_d[pyphetools_age.age_string] = column_contents
dlist = []
for k, v in preview_d.items():
if v is None:
dlist.append({"original column contents": k, "age": "n/a"})
else:
dlist.append({"original column contents": k, "age": v})
return pd.DataFrame(dlist)
def get_column_name(self) -> str:
return self._column_name
def has_error(self) -> bool:
return len(self._erroneous_input_counter) > 0
def error_summary(self):
items = []
for k, v in self._erroneous_input_counter.items():
items.append(f"{k} (n={v})")
return f"Could not parse the following as ISO8601 ages: {', '.join(items)}"
@staticmethod
def not_provided():
"""Create an object for cases where Age is not provided.
"""
return NotProvidedAgeColumnMapper(column_name=Constants.NOT_PROVIDED)
@staticmethod
def by_year(column_name) -> "AgeColumnMapper":
return YearAgeColumnMapper(column_name=column_name)
@staticmethod
def by_year_and_month(column_name) -> "AgeColumnMapper":
return YearMonthAgeColumnMapper(column_name=column_name)
@staticmethod
def by_month(column_name) -> "AgeColumnMapper":
return MonthAgeColumnMapper(column_name=column_name)
@staticmethod
def iso8601(column_name) -> "AgeColumnMapper":
return Iso8601AgeColumnMapper(column_name=column_name)
@staticmethod
def hpo_onset(column_name) -> "AgeColumnMapper":
return HpoAgeColumnMapper(column_name=column_name)
@staticmethod
def custom_dictionary(column_name, string_to_iso_d):
"""
Create an AgeColumnMapper for free text input data such as Fetus, 1.5, birth, 51 days
:param column_name: name of the age column in the input table
:type column_name: str
:param string_to_iso_d: dictionary with free text to ISO 8601
:type string_to_iso_d: Dict[str,str)
"""
return CustomAgeColumnMapper(column_name=column_name,
string_to_iso_d=string_to_iso_d)
|
Parameters:
Name |
Type |
Description |
Default |
column_name
|
str
|
Name of the Age column in the original table
|
required
|
string_to_iso_d
|
Dict[str,str], optional
|
dictionary from free text (input table) to ISO8601 strings
|
None
|
Source code in pyphetools/creation/age_column_mapper.py
| def __init__(self, column_name: str, string_to_iso_d=None) -> None:
"""
:param column_name: Name of the Age column in the original table
:type column_name: str
:param string_to_iso_d: dictionary from free text (input table) to ISO8601 strings
:type string_to_iso_d: Dict[str,str], optional
"""
if string_to_iso_d is None:
string_to_iso_d = {}
if column_name is None:
raise ValueError("Must provide non-null column_name argument")
self._column_name = column_name
self._string_to_iso_d = string_to_iso_d
self._erroneous_input_counter = defaultdict(int)
|
Create an AgeColumnMapper for free text input data such as Fetus, 1.5, birth, 51 days
Parameters:
Name |
Type |
Description |
Default |
column_name
|
str
|
name of the age column in the input table
|
required
|
string_to_iso_d
|
Dict[str,str)
|
dictionary with free text to ISO 8601
|
required
|
Source code in pyphetools/creation/age_column_mapper.py
| @staticmethod
def custom_dictionary(column_name, string_to_iso_d):
"""
Create an AgeColumnMapper for free text input data such as Fetus, 1.5, birth, 51 days
:param column_name: name of the age column in the input table
:type column_name: str
:param string_to_iso_d: dictionary with free text to ISO 8601
:type string_to_iso_d: Dict[str,str)
"""
return CustomAgeColumnMapper(column_name=column_name,
string_to_iso_d=string_to_iso_d)
|
Map a single cell of the table
Parameters:
Name |
Type |
Description |
Default |
cell_contents
|
can be a string | numerical type
|
The text contained in a single cell of the table
|
required
|
Source code in pyphetools/creation/age_column_mapper.py
| @abc.abstractmethod
def map_cell(self, cell_contents) -> PyPheToolsAge:
"""
Map a single cell of the table
:param cell_contents: The text contained in a single cell of the table
:type cell_contents: can be a string or numerical type
"""
pass
|
Create an object for cases where Age is not provided.
Source code in pyphetools/creation/age_column_mapper.py
| @staticmethod
def not_provided():
"""Create an object for cases where Age is not provided.
"""
return NotProvidedAgeColumnMapper(column_name=Constants.NOT_PROVIDED)
|