Skip to content

Study Locus Overlap

gentropy.dataset.study_locus_overlap.StudyLocusOverlap dataclass

Bases: Dataset

Study-Locus overlap.

This dataset captures pairs of overlapping StudyLocus: that is associations whose credible sets share at least one tagging variant.

Note

This is a helpful dataset for other downstream analyses, such as colocalisation. This dataset will contain the overlapping signals between studyLocus associations once they have been clumped and fine-mapped.

Source code in src/gentropy/dataset/study_locus_overlap.py
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
@dataclass
class StudyLocusOverlap(Dataset):
    """Study-Locus overlap.

    This dataset captures pairs of overlapping `StudyLocus`: that is associations whose credible sets share at least one tagging variant.

    !!! note

        This is a helpful dataset for other downstream analyses, such as colocalisation. This dataset will contain the overlapping signals between studyLocus associations once they have been clumped and fine-mapped.
    """

    @classmethod
    def get_schema(cls: type[StudyLocusOverlap]) -> StructType:
        """Provides the schema for the StudyLocusOverlap dataset.

        Returns:
            StructType: Schema for the StudyLocusOverlap dataset
        """
        return parse_spark_schema("study_locus_overlap.json")

    @classmethod
    def from_associations(
        cls: type[StudyLocusOverlap], study_locus: StudyLocus
    ) -> StudyLocusOverlap:
        """Find the overlapping signals in a particular set of associations (StudyLocus dataset).

        Args:
            study_locus (StudyLocus): Study-locus associations to find the overlapping signals

        Returns:
            StudyLocusOverlap: Study-locus overlap dataset
        """
        return study_locus.find_overlaps()


    def calculate_beta_ratio(self: StudyLocusOverlap) -> DataFrame:
        """Calculate the beta ratio for the overlapping signals.

        Returns:
            DataFrame: A dataframe containing left and right loci IDs, chromosome
            and the average sign of the beta ratio
        """
        return (
            # Unpack statistics column:
            self.df.select("*", "statistics.*")
            .drop("statistics")
            # Drop any rows where the beta is null or zero
            .filter(
                f.col("left_beta").isNotNull() &
                f.col("right_beta").isNotNull() &
                (f.col("left_beta") != 0) &
                (f.col("right_beta") != 0)
            )
            # Calculate the beta ratio and get the sign, then calculate the average sign across all variants in the locus
            .withColumn(
                "betaRatioSign",
                f.signum(f.col("left_beta") / f.col("right_beta"))
            )
            # Aggregate beta signs:
            .groupBy("leftStudyLocusId","rightStudyLocusId","chromosome")
            .agg(
                f.avg("betaRatioSign").alias("betaRatioSignAverage")
            )
        )

    def _convert_to_square_matrix(self: StudyLocusOverlap) -> StudyLocusOverlap:
        """Convert the dataset to a square matrix.

        Returns:
            StudyLocusOverlap: Square matrix of the dataset
        """
        return StudyLocusOverlap(
            _df=self.df.unionByName(
                self.df.selectExpr(
                    "leftStudyLocusId as rightStudyLocusId",
                    "rightStudyLocusId as leftStudyLocusId",
                    "rightStudyType",
                    "tagVariantId",
                    "chromosome",
                    "statistics",
                )
            ).distinct(),
            _schema=self.get_schema(),
        )

calculate_beta_ratio() -> DataFrame

Calculate the beta ratio for the overlapping signals.

Returns:

Name Type Description
DataFrame DataFrame

A dataframe containing left and right loci IDs, chromosome

DataFrame

and the average sign of the beta ratio

Source code in src/gentropy/dataset/study_locus_overlap.py
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
def calculate_beta_ratio(self: StudyLocusOverlap) -> DataFrame:
    """Calculate the beta ratio for the overlapping signals.

    Returns:
        DataFrame: A dataframe containing left and right loci IDs, chromosome
        and the average sign of the beta ratio
    """
    return (
        # Unpack statistics column:
        self.df.select("*", "statistics.*")
        .drop("statistics")
        # Drop any rows where the beta is null or zero
        .filter(
            f.col("left_beta").isNotNull() &
            f.col("right_beta").isNotNull() &
            (f.col("left_beta") != 0) &
            (f.col("right_beta") != 0)
        )
        # Calculate the beta ratio and get the sign, then calculate the average sign across all variants in the locus
        .withColumn(
            "betaRatioSign",
            f.signum(f.col("left_beta") / f.col("right_beta"))
        )
        # Aggregate beta signs:
        .groupBy("leftStudyLocusId","rightStudyLocusId","chromosome")
        .agg(
            f.avg("betaRatioSign").alias("betaRatioSignAverage")
        )
    )

from_associations(study_locus: StudyLocus) -> StudyLocusOverlap classmethod

Find the overlapping signals in a particular set of associations (StudyLocus dataset).

Parameters:

Name Type Description Default
study_locus StudyLocus

Study-locus associations to find the overlapping signals

required

Returns:

Name Type Description
StudyLocusOverlap StudyLocusOverlap

Study-locus overlap dataset

Source code in src/gentropy/dataset/study_locus_overlap.py
40
41
42
43
44
45
46
47
48
49
50
51
52
@classmethod
def from_associations(
    cls: type[StudyLocusOverlap], study_locus: StudyLocus
) -> StudyLocusOverlap:
    """Find the overlapping signals in a particular set of associations (StudyLocus dataset).

    Args:
        study_locus (StudyLocus): Study-locus associations to find the overlapping signals

    Returns:
        StudyLocusOverlap: Study-locus overlap dataset
    """
    return study_locus.find_overlaps()

get_schema() -> StructType classmethod

Provides the schema for the StudyLocusOverlap dataset.

Returns:

Name Type Description
StructType StructType

Schema for the StudyLocusOverlap dataset

Source code in src/gentropy/dataset/study_locus_overlap.py
31
32
33
34
35
36
37
38
@classmethod
def get_schema(cls: type[StudyLocusOverlap]) -> StructType:
    """Provides the schema for the StudyLocusOverlap dataset.

    Returns:
        StructType: Schema for the StudyLocusOverlap dataset
    """
    return parse_spark_schema("study_locus_overlap.json")

Schema

root
 |-- leftStudyLocusId: string (nullable = false)
 |-- rightStudyLocusId: string (nullable = false)
 |-- rightStudyType: string (nullable = false)
 |-- chromosome: string (nullable = true)
 |-- tagVariantId: string (nullable = false)
 |-- statistics: struct (nullable = true)
 |    |-- left_pValueMantissa: float (nullable = true)
 |    |-- left_pValueExponent: integer (nullable = true)
 |    |-- right_pValueMantissa: float (nullable = true)
 |    |-- right_pValueExponent: integer (nullable = true)
 |    |-- left_beta: double (nullable = true)
 |    |-- right_beta: double (nullable = true)
 |    |-- left_logBF: double (nullable = true)
 |    |-- right_logBF: double (nullable = true)
 |    |-- left_posteriorProbability: double (nullable = true)
 |    |-- right_posteriorProbability: double (nullable = true)