Skip to content

ld_based_clumping

gentropy.ld_based_clumping.LDBasedClumpingStep

Step to perform LD-based clumping on study locus dataset.

As a first step, study locus is enriched with population specific linked-variants. That's why the study index and the ld index is required for this step. Study loci are flaggged in the resulting dataset, which can be explained by a more significant association from the same study.

Source code in src/gentropy/ld_based_clumping.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
class LDBasedClumpingStep:
    """Step to perform LD-based clumping on study locus dataset.

    As a first step, study locus is enriched with population specific linked-variants.
    That's why the study index and the ld index is required for this step. Study loci are flaggged
    in the resulting dataset, which can be explained by a more significant association
    from the same study.
    """

    def __init__(
        self,
        session: Session,
        study_locus_input_path: str,
        study_index_path: str,
        ld_index_path: str,
        clumped_study_locus_output_path: str,
    ) -> None:
        """Run LD-based clumping step.

        Args:
            session (Session): Session object.
            study_locus_input_path (str): Path to the input study locus.
            study_index_path (str): Path to the study index.
            ld_index_path (str): Path to the LD index.
            clumped_study_locus_output_path (str): path of the resulting, clumped study-locus dataset.
        """
        study_locus = StudyLocus.from_parquet(session, study_locus_input_path)
        ld_index = LDIndex.from_parquet(session, ld_index_path)
        study_index = StudyIndex.from_parquet(session, study_index_path)

        (
            study_locus
            # Annotating study locus with LD information:
            .annotate_ld(study_index, ld_index)
            .clump()
            # Save result:
            .df.write.mode(session.write_mode)
            .parquet(clumped_study_locus_output_path)
        )

__init__(session: Session, study_locus_input_path: str, study_index_path: str, ld_index_path: str, clumped_study_locus_output_path: str) -> None

Run LD-based clumping step.

Parameters:

Name Type Description Default
session Session

Session object.

required
study_locus_input_path str

Path to the input study locus.

required
study_index_path str

Path to the study index.

required
ld_index_path str

Path to the LD index.

required
clumped_study_locus_output_path str

path of the resulting, clumped study-locus dataset.

required
Source code in src/gentropy/ld_based_clumping.py
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
def __init__(
    self,
    session: Session,
    study_locus_input_path: str,
    study_index_path: str,
    ld_index_path: str,
    clumped_study_locus_output_path: str,
) -> None:
    """Run LD-based clumping step.

    Args:
        session (Session): Session object.
        study_locus_input_path (str): Path to the input study locus.
        study_index_path (str): Path to the study index.
        ld_index_path (str): Path to the LD index.
        clumped_study_locus_output_path (str): path of the resulting, clumped study-locus dataset.
    """
    study_locus = StudyLocus.from_parquet(session, study_locus_input_path)
    ld_index = LDIndex.from_parquet(session, ld_index_path)
    study_index = StudyIndex.from_parquet(session, study_index_path)

    (
        study_locus
        # Annotating study locus with LD information:
        .annotate_ld(study_index, ld_index)
        .clump()
        # Save result:
        .df.write.mode(session.write_mode)
        .parquet(clumped_study_locus_output_path)
    )