Skip to content

summary_statistics_qc

gentropy.sumstat_qc_step.SummaryStatisticsQCStep

Step to run GWAS QC.

Source code in src/gentropy/sumstat_qc_step.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
class SummaryStatisticsQCStep:
    """Step to run GWAS QC."""

    def __init__(
        self,
        session: Session,
        gwas_path: str,
        output_path: str,
        pval_threshold: float = 1e-8,
    ) -> None:
        """Calculating quality control metrics on the provided GWAS study.

        Args:
            session (Session): Spark session
            gwas_path (str): Path to the GWAS summary statistics.
            output_path (str): Output path for the QC results.
            pval_threshold (float): P-value threshold for the QC. Default is 1e-8.

        """
        gwas = SummaryStatistics.from_parquet(session, path=gwas_path)

        (
            SummaryStatisticsQC.get_quality_control_metrics(
                gwas=gwas, pval_threshold=pval_threshold
            )
            .write.mode(session.write_mode)
            .parquet(output_path)
        )

__init__(session: Session, gwas_path: str, output_path: str, pval_threshold: float = 1e-08) -> None

Calculating quality control metrics on the provided GWAS study.

Parameters:

Name Type Description Default
session Session

Spark session

required
gwas_path str

Path to the GWAS summary statistics.

required
output_path str

Output path for the QC results.

required
pval_threshold float

P-value threshold for the QC. Default is 1e-8.

1e-08
Source code in src/gentropy/sumstat_qc_step.py
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
def __init__(
    self,
    session: Session,
    gwas_path: str,
    output_path: str,
    pval_threshold: float = 1e-8,
) -> None:
    """Calculating quality control metrics on the provided GWAS study.

    Args:
        session (Session): Spark session
        gwas_path (str): Path to the GWAS summary statistics.
        output_path (str): Output path for the QC results.
        pval_threshold (float): P-value threshold for the QC. Default is 1e-8.

    """
    gwas = SummaryStatistics.from_parquet(session, path=gwas_path)

    (
        SummaryStatisticsQC.get_quality_control_metrics(
            gwas=gwas, pval_threshold=pval_threshold
        )
        .write.mode(session.write_mode)
        .parquet(output_path)
    )