Skip to content

HgvsVariant

Bases: Variant

This encapsulates variant data that we retrieve from Variant Validator

Parameters:

Name Type Description Default
assembly str

the genome build (one of hg19, hg38)

required
vcf_d Dict[str]

dictionary with values for chr, pos, ref, alt (VCF)

required
symbol str, optional

the gene symbol

None
hgnc str, optional

The Human Gene Nomenclature Comittee (HNGC) identifier, e.g. HGNS:123

None
transcript str, optional

identifier of the transcript for defininf the variant

None
g_hgvs str, optional

genomic hgvs

None
variant_id str, optional

variant identifier

None
Source code in pyphetools/creation/hgvs_variant.py
class HgvsVariant(Variant):
    """
    This encapsulates variant data that we retrieve from Variant Validator

    :param assembly: the genome build (one of hg19, hg38)
    :type assembly: str
    :param vcf_d: dictionary with values for chr, pos, ref, alt (VCF)
    :type vcf_d: Dict[str]
    :param symbol: the gene symbol
    :type symbol: str, optional
    :param hgnc: The Human Gene Nomenclature Comittee (HNGC) identifier, e.g. HGNS:123
    :type hgnc: str, optional
    :param transcript: identifier of the transcript for defininf the variant
    :type transcript: str, optional
    :param g_hgvs: genomic hgvs
    :type g_hgvs: str, optional
    :param variant_id: variant identifier
    :type variant_id: str, optional
    """
    def __init__(self, assembly, vcf_d, symbol=None, hgnc=None, hgvs=None, transcript=None, g_hgvs=None,
                 variant_id=None) -> None:
        super().__init__()
        if not assembly in ACCEPTABLE_GENOMES:
            raise ValueError(f"Malformed assembly: \"{assembly}\"")
        self._assembly = assembly
        if not isinstance(vcf_d, dict):
            raise ValueError(f"vcf_d argument must be dictionary")
        self._chr = vcf_d.get('chr')
        self._position = int(vcf_d.get('pos'))
        self._ref = vcf_d.get('ref')
        self._alt = vcf_d.get('alt')
        self._symbol = symbol
        self._hgnc_id = hgnc
        self._hgvs = hgvs
        self._transcript = transcript
        self._g_hgvs = g_hgvs
        self._genotype = None
        if variant_id is None:
            self._variant_id = "var_" + "".join(random.choices(string.ascii_letters, k=25))
        else:
            self._variant_id = variant_id

    @property
    def assembly(self):
        return self._assembly

    @property
    def chr(self):
        return self._chr

    @property
    def position(self):
        return self._position

    @property
    def ref(self):
        return self._ref

    @property
    def alt(self):
        return self._alt

    @property
    def genotype(self):
        return self._genotype

    def __str__(self):
        return f"{self._hgvs}({self._chr}:{self._position}{self._ref}>{self._alt})"

    def to_string(self):
        return self.__str__()

    def to_ga4gh_variant_interpretation(self, acmg=None):
        """For the new interface. Refactor client code to use this function, which has an unambiguous name
        """
        return self.to_ga4gh(acmg=acmg)

    def to_ga4gh(self, acmg=None):
        """
        Transform this Variant object into a "variantInterpretation" message of the GA4GH Phenopacket schema
        """
        vdescriptor = phenopackets.VariationDescriptor()
        vdescriptor.id = self._variant_id
        if self._hgnc_id is not None and self._symbol is not None:
            vdescriptor.gene_context.value_id = self._hgnc_id
            vdescriptor.gene_context.symbol = self._symbol
        hgvs_expression = phenopackets.Expression()
        if self._hgvs is not None:
            hgvs_expression.syntax = "hgvs.c"
            hgvs_expression.value = self._hgvs
            vdescriptor.expressions.append(hgvs_expression)
        if self._g_hgvs is not None:
            hgvs_expression.syntax = "hgvs.g"
            hgvs_expression.value = self._g_hgvs
            vdescriptor.expressions.append(hgvs_expression)
        vdescriptor.molecule_context =  phenopackets.MoleculeContext.genomic
        if self._genotype is not None:
            if self._genotype == 'heterozygous':
                vdescriptor.allelic_state.id = "GENO:0000135"
                vdescriptor.allelic_state.label = "heterozygous"
            elif self._genotype == 'homozygous':
                vdescriptor.allelic_state.id = "GENO:0000136"
                vdescriptor.allelic_state.label = "homozygous"
            elif self._genotype == 'hemizygous':
                vdescriptor.allelic_state.id = "GENO:0000134"
                vdescriptor.allelic_state.label = "hemizygous"
            else:
                print(f"Did not recognize genotype {self._genotype}")
        vinterpretation = phenopackets.VariantInterpretation()
        if acmg is not None:
            if acmg.lower() == 'benign':
                vinterpretation.acmgPathogenicityClassification = phenopackets.AcmgPathogenicityClassification.BENIGN
            elif acmg.lower == 'likely benign' or acmg.lower() == 'likely_benign':
                vinterpretation.acmgPathogenicityClassification = phenopackets.AcmgPathogenicityClassification.LIKELY_BENIGN
            elif acmg.lower == 'uncertain significance' or acmg.lower() == 'uncertain_significance':
                vinterpretation.acmgPathogenicityClassification = phenopackets.AcmgPathogenicityClassification.UNCERTAIN_SIGNIFICANCE
            elif acmg.lower == 'likely pathogenic' or acmg.lower() == 'likely_pathogenic':
                vinterpretation.acmgPathogenicityClassification = phenopackets.AcmgPathogenicityClassification.LIKELY_PATHOGENIC
            elif acmg.lower == 'pathogenic' or acmg.lower() == 'pathogenic':
                vinterpretation.acmgPathogenicityClassification = phenopackets.AcmgPathogenicityClassification.PATHOGENIC
            else:
                print(f"Warning- did not recognize ACMG category {acmg}")
        vcf_record = phenopackets.VcfRecord()
        vcf_record.genome_assembly =  self._assembly
        vcf_record.chrom = self._chr
        vcf_record.pos = self._position
        vcf_record.ref = self._ref
        vcf_record.alt = self._alt
        vdescriptor.vcf_record.CopyFrom(vcf_record)
        vinterpretation.variation_descriptor.CopyFrom(vdescriptor)
        return vinterpretation

    def to_variant_interpretation_202(self, 
                                      acmg:str=None) -> VariantInterpretation202:
        """
        Transform this Variant object into a "variantInterpretation" message of the GA4GH Phenopacket schema
        """

        vcf_record = VcfRecord202(genome_assembly=self._assembly,
                                  chrom=self._chr,
                                  pos=self._position,
                                  ref=self._ref,
                                  alt=self._alt)
        vdescriptor = VariationDescriptor202(id=self._variant_id, vcf_record=vcf_record, molecule_context=MoleculeContext202.genomic)
        if self._hgnc_id is not None and self._symbol is not None:
            gene_descriptor = GeneDescriptor202(value_id=self._hgnc_id, symbol=self._symbol)
            vdescriptor.gene_context = gene_descriptor
        if self._hgvs is not None:
            hgvs_expression = Expression202(syntax="hgvs.c", value=self._hgvs)
            vdescriptor.expressions.append(hgvs_expression)
        if self._g_hgvs is not None:
            hgvs_expression = Expression202(syntax="hgvs.g", value=self._g_hgvs)
            vdescriptor.expressions.append(hgvs_expression)
        gt_term = Variant._get_genotype_term(self._genotype)
        # it can occur that the genotype is not set when we call this function (it will be set by calling code)
        # therefore it is not necessarily an error if the genotype is None, calling code needs to check this appropriately
        if gt_term is not None:
            vdescriptor.allelic_state = gt_term
        vinterpretation = VariantInterpretation202(variation_descriptor=vdescriptor)
        acmg_code = Variant._get_acmg_classification(acmg=acmg)
        if acmg_code is not None:
            vinterpretation.acmg_pathogenicity_classification = acmg_code
        else:
            print(f"Warning- did not recognize ACMG category {acmg}")

        return vinterpretation

to_ga4gh(acmg=None)

Transform this Variant object into a "variantInterpretation" message of the GA4GH Phenopacket schema

Source code in pyphetools/creation/hgvs_variant.py
def to_ga4gh(self, acmg=None):
    """
    Transform this Variant object into a "variantInterpretation" message of the GA4GH Phenopacket schema
    """
    vdescriptor = phenopackets.VariationDescriptor()
    vdescriptor.id = self._variant_id
    if self._hgnc_id is not None and self._symbol is not None:
        vdescriptor.gene_context.value_id = self._hgnc_id
        vdescriptor.gene_context.symbol = self._symbol
    hgvs_expression = phenopackets.Expression()
    if self._hgvs is not None:
        hgvs_expression.syntax = "hgvs.c"
        hgvs_expression.value = self._hgvs
        vdescriptor.expressions.append(hgvs_expression)
    if self._g_hgvs is not None:
        hgvs_expression.syntax = "hgvs.g"
        hgvs_expression.value = self._g_hgvs
        vdescriptor.expressions.append(hgvs_expression)
    vdescriptor.molecule_context =  phenopackets.MoleculeContext.genomic
    if self._genotype is not None:
        if self._genotype == 'heterozygous':
            vdescriptor.allelic_state.id = "GENO:0000135"
            vdescriptor.allelic_state.label = "heterozygous"
        elif self._genotype == 'homozygous':
            vdescriptor.allelic_state.id = "GENO:0000136"
            vdescriptor.allelic_state.label = "homozygous"
        elif self._genotype == 'hemizygous':
            vdescriptor.allelic_state.id = "GENO:0000134"
            vdescriptor.allelic_state.label = "hemizygous"
        else:
            print(f"Did not recognize genotype {self._genotype}")
    vinterpretation = phenopackets.VariantInterpretation()
    if acmg is not None:
        if acmg.lower() == 'benign':
            vinterpretation.acmgPathogenicityClassification = phenopackets.AcmgPathogenicityClassification.BENIGN
        elif acmg.lower == 'likely benign' or acmg.lower() == 'likely_benign':
            vinterpretation.acmgPathogenicityClassification = phenopackets.AcmgPathogenicityClassification.LIKELY_BENIGN
        elif acmg.lower == 'uncertain significance' or acmg.lower() == 'uncertain_significance':
            vinterpretation.acmgPathogenicityClassification = phenopackets.AcmgPathogenicityClassification.UNCERTAIN_SIGNIFICANCE
        elif acmg.lower == 'likely pathogenic' or acmg.lower() == 'likely_pathogenic':
            vinterpretation.acmgPathogenicityClassification = phenopackets.AcmgPathogenicityClassification.LIKELY_PATHOGENIC
        elif acmg.lower == 'pathogenic' or acmg.lower() == 'pathogenic':
            vinterpretation.acmgPathogenicityClassification = phenopackets.AcmgPathogenicityClassification.PATHOGENIC
        else:
            print(f"Warning- did not recognize ACMG category {acmg}")
    vcf_record = phenopackets.VcfRecord()
    vcf_record.genome_assembly =  self._assembly
    vcf_record.chrom = self._chr
    vcf_record.pos = self._position
    vcf_record.ref = self._ref
    vcf_record.alt = self._alt
    vdescriptor.vcf_record.CopyFrom(vcf_record)
    vinterpretation.variation_descriptor.CopyFrom(vdescriptor)
    return vinterpretation

to_ga4gh_variant_interpretation(acmg=None)

For the new interface. Refactor client code to use this function, which has an unambiguous name

Source code in pyphetools/creation/hgvs_variant.py
def to_ga4gh_variant_interpretation(self, acmg=None):
    """For the new interface. Refactor client code to use this function, which has an unambiguous name
    """
    return self.to_ga4gh(acmg=acmg)

to_variant_interpretation_202(acmg=None)

Transform this Variant object into a "variantInterpretation" message of the GA4GH Phenopacket schema

Source code in pyphetools/creation/hgvs_variant.py
def to_variant_interpretation_202(self, 
                                  acmg:str=None) -> VariantInterpretation202:
    """
    Transform this Variant object into a "variantInterpretation" message of the GA4GH Phenopacket schema
    """

    vcf_record = VcfRecord202(genome_assembly=self._assembly,
                              chrom=self._chr,
                              pos=self._position,
                              ref=self._ref,
                              alt=self._alt)
    vdescriptor = VariationDescriptor202(id=self._variant_id, vcf_record=vcf_record, molecule_context=MoleculeContext202.genomic)
    if self._hgnc_id is not None and self._symbol is not None:
        gene_descriptor = GeneDescriptor202(value_id=self._hgnc_id, symbol=self._symbol)
        vdescriptor.gene_context = gene_descriptor
    if self._hgvs is not None:
        hgvs_expression = Expression202(syntax="hgvs.c", value=self._hgvs)
        vdescriptor.expressions.append(hgvs_expression)
    if self._g_hgvs is not None:
        hgvs_expression = Expression202(syntax="hgvs.g", value=self._g_hgvs)
        vdescriptor.expressions.append(hgvs_expression)
    gt_term = Variant._get_genotype_term(self._genotype)
    # it can occur that the genotype is not set when we call this function (it will be set by calling code)
    # therefore it is not necessarily an error if the genotype is None, calling code needs to check this appropriately
    if gt_term is not None:
        vdescriptor.allelic_state = gt_term
    vinterpretation = VariantInterpretation202(variation_descriptor=vdescriptor)
    acmg_code = Variant._get_acmg_classification(acmg=acmg)
    if acmg_code is not None:
        vinterpretation.acmg_pathogenicity_classification = acmg_code
    else:
        print(f"Warning- did not recognize ACMG category {acmg}")

    return vinterpretation