Source code for gpsea.model._gt

import abc
import bisect
import enum
import typing
from typing import Iterator

import numpy as np

from ._base import SampleLabels


[docs] class Genotype(enum.Enum): """ `Genotype` represents state of a variable locus in a diploid genome. """ NO_CALL = ('.',) HOMOZYGOUS_REFERENCE = ('0/0',) HETEROZYGOUS = ('0/1',) HOMOZYGOUS_ALTERNATE = ('1/1',) HEMIZYGOUS = ('1',) def __init__(self, code: str): self._code = code @property def code(self) -> str: return self._code def __str__(self): return self.code
[docs] class Genotypes(typing.Sized, typing.Iterable): """ `Genotypes` is a container for mapping between sample ID and its genotype. Let's consider a pair of samples: >>> a = SampleLabels('A') >>> b = SampleLabels('B') We can use one of the static methods to create an instance. Either a single genotype: >>> gt = Genotypes.single(a, Genotype.HETEROZYGOUS) or genotypes of several samples: >>> gts = Genotypes.from_mapping({a: Genotype.HETEROZYGOUS, b: Genotype.HOMOZYGOUS_ALTERNATE}) There are 2 genotypes in the container: >>> len(gts) 2 You can get a genotype for a sample ID: >>> g = gts.for_sample(a) >>> g.code '0/1' You will get `None` if the sample is not present: >>> gts.for_sample(SampleLabels('UNKNOWN')) You can iterate over sample-genotype pairs: >>> for sample_id, genotype in gts: ... print(sample_id, genotype) A 0/1 B 1/1 """
[docs] @staticmethod def empty(): return EMPTY
[docs] @staticmethod def single(sample_id: SampleLabels, genotype: Genotype): """ A shortcut for creating `Genotypes` for a single sample: >>> a = SampleLabels('A') >>> gts = Genotypes.single(a, Genotype.HOMOZYGOUS_ALTERNATE) >>> assert len(gts) == 1 >>> assert gts.for_sample(a) == Genotype.HOMOZYGOUS_ALTERNATE """ return Genotypes((sample_id,), (genotype,))
[docs] @staticmethod def from_mapping(mapping: typing.Mapping[SampleLabels, Genotype]): """ Create `Genotypes` from mapping between sample IDs and genotypes. >>> a = SampleLabels('A') >>> b = SampleLabels('B') >>> gts = Genotypes.from_mapping({a: Genotype.HETEROZYGOUS, b: Genotype.HOMOZYGOUS_ALTERNATE}) >>> assert len(gts) == 2 """ return Genotypes(*Genotypes._preprocess_mapping(mapping))
@staticmethod def _preprocess_mapping(genotypes: typing.Mapping[SampleLabels, Genotype]) -> typing.Tuple[typing.Sequence[str], typing.Sequence[Genotype]]: samples = np.empty(shape=(len(genotypes),), dtype=object) gts = np.empty(shape=(len(genotypes),), dtype=object) for i, (sample, gt) in enumerate(genotypes.items()): samples[i] = sample gts[i] = gt indices = np.argsort(samples) return tuple(samples[indices]), tuple(gts[indices]) def __init__(self, samples: typing.Iterable[SampleLabels], genotypes: typing.Iterable[Genotype]): self._samples = tuple(samples) self._gts = tuple(genotypes) if len(self._samples) != len(self._gts): raise ValueError(f'Mismatch between the sample and genotype count: {len(self._samples)} != {len(self._gts)}')
[docs] def for_sample(self, sample_id: SampleLabels) -> typing.Optional[Genotype]: """ Get a genotype for a sample or `None` if the genotype is not present. :param sample_id: a :class:`SampleLabels` with sample's identifier. """ idx = bisect.bisect_left(self._samples, sample_id) if idx != len(self._samples) and self._samples[idx] == sample_id: return self._gts[idx] return None
def __len__(self) -> int: return len(self._samples) def __iter__(self) -> Iterator[typing.Tuple[SampleLabels, Genotype]]: return zip(self._samples, self._gts) def __hash__(self): return hash((self._samples, self._gts)) def __eq__(self, other): return (isinstance(other, Genotypes) and self._samples == other._samples and self._gts == other._gts) def __str__(self): return f'Genotypes({["{}={}".format(sid, gt) for sid, gt in zip(self._samples, self._gts)]})' def __repr__(self): return f'Genotypes(samples={self._samples}, genotypes={self._gts})'
EMPTY = Genotypes((), ())
[docs] class Genotyped(metaclass=abc.ABCMeta): """ `Genotyped` entities """ @property @abc.abstractmethod def genotypes(self) -> Genotypes: pass
[docs] def genotype_for_sample(self, sample_id: SampleLabels) -> typing.Optional[Genotype]: """ Get a genotype for a sample or `None` if the genotype is not present. :param sample_id: a :class:`SampleLabels` with sample's identifier. """ return self.genotypes.for_sample(sample_id)