Skip to content

ld_index

gentropy.ld_index.LDIndexStep

LD index step.

This step is resource intensive

Suggested params: high memory machine, 5TB of boot disk, no SSDs.

Source code in src/gentropy/ld_index.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
class LDIndexStep:
    """LD index step.

    !!! warning "This step is resource intensive"

        Suggested params: high memory machine, 5TB of boot disk, no SSDs.
    """

    def __init__(self, session: Session, min_r2: float, ld_index_out: str) -> None:
        """Run step.

        Args:
            session (Session): Session object.
            min_r2 (float): Minimum r2 to consider when considering variants within a window.
            ld_index_out (str): Output LD index path.
        """
        hl.init(sc=session.spark.sparkContext, log="/dev/null")
        (
            GnomADLDMatrix()
            .as_ld_index(min_r2)
            .df.write.partitionBy("chromosome")
            .mode(session.write_mode)
            .parquet(ld_index_out)
        )
        session.logger.info(ld_index_out)

__init__(session: Session, min_r2: float, ld_index_out: str) -> None

Run step.

Parameters:

Name Type Description Default
session Session

Session object.

required
min_r2 float

Minimum r2 to consider when considering variants within a window.

required
ld_index_out str

Output LD index path.

required
Source code in src/gentropy/ld_index.py
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
def __init__(self, session: Session, min_r2: float, ld_index_out: str) -> None:
    """Run step.

    Args:
        session (Session): Session object.
        min_r2 (float): Minimum r2 to consider when considering variants within a window.
        ld_index_out (str): Output LD index path.
    """
    hl.init(sc=session.spark.sparkContext, log="/dev/null")
    (
        GnomADLDMatrix()
        .as_ld_index(min_r2)
        .df.write.partitionBy("chromosome")
        .mode(session.write_mode)
        .parquet(ld_index_out)
    )
    session.logger.info(ld_index_out)