Skip to content

variant_index

gentropy.variant_index.VariantIndexStep

Run variant index step to only variants in study-locus sets.

Using a VariantAnnotation dataset as a reference, this step creates and writes a dataset of the type VariantIndex that includes only variants that have disease-association data with a reduced set of annotations.

Source code in src/gentropy/variant_index.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
class VariantIndexStep:
    """Run variant index step to only variants in study-locus sets.

    Using a `VariantAnnotation` dataset as a reference, this step creates and writes a dataset of the type `VariantIndex` that includes only variants that have disease-association data with a reduced set of annotations.
    """

    def __init__(
        self: VariantIndexStep,
        session: Session,
        variant_annotation_path: str,
        credible_set_path: str,
        variant_index_path: str,
    ) -> None:
        """Run VariantIndex step.

        Args:
            session (Session): Session object.
            variant_annotation_path (str): Variant annotation dataset path.
            credible_set_path (str): Credible set dataset path.
            variant_index_path (str): Variant index dataset path.
        """
        # Extract
        va = VariantAnnotation.from_parquet(session, variant_annotation_path)
        credible_set = StudyLocus.from_parquet(
            session, credible_set_path, recursiveFileLookup=True
        )

        # Transform
        vi = VariantIndex.from_variant_annotation(va, credible_set)

        (
            vi.df.write.partitionBy("chromosome")
            .mode(session.write_mode)
            .parquet(variant_index_path)
        )

__init__(session: Session, variant_annotation_path: str, credible_set_path: str, variant_index_path: str) -> None

Run VariantIndex step.

Parameters:

Name Type Description Default
session Session

Session object.

required
variant_annotation_path str

Variant annotation dataset path.

required
credible_set_path str

Credible set dataset path.

required
variant_index_path str

Variant index dataset path.

required
Source code in src/gentropy/variant_index.py
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
def __init__(
    self: VariantIndexStep,
    session: Session,
    variant_annotation_path: str,
    credible_set_path: str,
    variant_index_path: str,
) -> None:
    """Run VariantIndex step.

    Args:
        session (Session): Session object.
        variant_annotation_path (str): Variant annotation dataset path.
        credible_set_path (str): Credible set dataset path.
        variant_index_path (str): Variant index dataset path.
    """
    # Extract
    va = VariantAnnotation.from_parquet(session, variant_annotation_path)
    credible_set = StudyLocus.from_parquet(
        session, credible_set_path, recursiveFileLookup=True
    )

    # Transform
    vi = VariantIndex.from_variant_annotation(va, credible_set)

    (
        vi.df.write.partitionBy("chromosome")
        .mode(session.write_mode)
        .parquet(variant_index_path)
    )