Skip to content

variant_annotation

gentropy.variant_annotation.VariantAnnotationStep

Variant annotation step.

Variant annotation step produces a dataset of the type VariantAnnotation derived from gnomADs gnomad.genomes.vX.X.X.sites.ht Hail's table. This dataset is used to validate variants and as a source of annotation.

Source code in src/gentropy/variant_annotation.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
class VariantAnnotationStep:
    """Variant annotation step.

    Variant annotation step produces a dataset of the type `VariantAnnotation` derived from gnomADs `gnomad.genomes.vX.X.X.sites.ht` Hail's table. This dataset is used to validate variants and as a source of annotation.
    """

    def __init__(self, session: Session, variant_annotation_path: str) -> None:
        """Run Variant Annotation step.

        Args:
            session (Session): Session object.
            variant_annotation_path (str): Variant annotation dataset path.
        """
        # Initialise hail session.
        hl.init(sc=session.spark.sparkContext, log="/dev/null")
        # Run variant annotation.
        variant_annotation = GnomADVariants().as_variant_annotation()
        # Write data partitioned by chromosome and position.
        (
            variant_annotation.df.write.mode(session.write_mode).parquet(
                variant_annotation_path
            )
        )

__init__(session: Session, variant_annotation_path: str) -> None

Run Variant Annotation step.

Parameters:

Name Type Description Default
session Session

Session object.

required
variant_annotation_path str

Variant annotation dataset path.

required
Source code in src/gentropy/variant_annotation.py
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
def __init__(self, session: Session, variant_annotation_path: str) -> None:
    """Run Variant Annotation step.

    Args:
        session (Session): Session object.
        variant_annotation_path (str): Variant annotation dataset path.
    """
    # Initialise hail session.
    hl.init(sc=session.spark.sparkContext, log="/dev/null")
    # Run variant annotation.
    variant_annotation = GnomADVariants().as_variant_annotation()
    # Write data partitioned by chromosome and position.
    (
        variant_annotation.df.write.mode(session.write_mode).parquet(
            variant_annotation_path
        )
    )