Skip to content

Study Locus Overlap

gentropy.dataset.study_locus_overlap.StudyLocusOverlap dataclass

Bases: Dataset

Study-Locus overlap.

This dataset captures pairs of overlapping StudyLocus: that is associations whose credible sets share at least one tagging variant.

Note

This is a helpful dataset for other downstream analyses, such as colocalisation. This dataset will contain the overlapping signals between studyLocus associations once they have been clumped and fine-mapped.

Source code in src/gentropy/dataset/study_locus_overlap.py
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
@dataclass
class StudyLocusOverlap(Dataset):
    """Study-Locus overlap.

    This dataset captures pairs of overlapping `StudyLocus`: that is associations whose credible sets share at least one tagging variant.

    !!! note

        This is a helpful dataset for other downstream analyses, such as colocalisation. This dataset will contain the overlapping signals between studyLocus associations once they have been clumped and fine-mapped.
    """

    @classmethod
    def get_schema(cls: type[StudyLocusOverlap]) -> StructType:
        """Provides the schema for the StudyLocusOverlap dataset.

        Returns:
            StructType: Schema for the StudyLocusOverlap dataset
        """
        return parse_spark_schema("study_locus_overlap.json")

    @classmethod
    def from_associations(
        cls: type[StudyLocusOverlap], study_locus: StudyLocus, study_index: StudyIndex
    ) -> StudyLocusOverlap:
        """Find the overlapping signals in a particular set of associations (StudyLocus dataset).

        Args:
            study_locus (StudyLocus): Study-locus associations to find the overlapping signals
            study_index (StudyIndex): Study index to find the overlapping signals

        Returns:
            StudyLocusOverlap: Study-locus overlap dataset
        """
        return study_locus.find_overlaps(study_index)

    def _convert_to_square_matrix(self: StudyLocusOverlap) -> StudyLocusOverlap:
        """Convert the dataset to a square matrix.

        Returns:
            StudyLocusOverlap: Square matrix of the dataset
        """
        return StudyLocusOverlap(
            _df=self.df.unionByName(
                self.df.selectExpr(
                    "leftStudyLocusId as rightStudyLocusId",
                    "rightStudyLocusId as leftStudyLocusId",
                    "tagVariantId",
                )
            ).distinct(),
            _schema=self.get_schema(),
        )

from_associations(study_locus: StudyLocus, study_index: StudyIndex) -> StudyLocusOverlap classmethod

Find the overlapping signals in a particular set of associations (StudyLocus dataset).

Parameters:

Name Type Description Default
study_locus StudyLocus

Study-locus associations to find the overlapping signals

required
study_index StudyIndex

Study index to find the overlapping signals

required

Returns:

Name Type Description
StudyLocusOverlap StudyLocusOverlap

Study-locus overlap dataset

Source code in src/gentropy/dataset/study_locus_overlap.py
37
38
39
40
41
42
43
44
45
46
47
48
49
50
@classmethod
def from_associations(
    cls: type[StudyLocusOverlap], study_locus: StudyLocus, study_index: StudyIndex
) -> StudyLocusOverlap:
    """Find the overlapping signals in a particular set of associations (StudyLocus dataset).

    Args:
        study_locus (StudyLocus): Study-locus associations to find the overlapping signals
        study_index (StudyIndex): Study index to find the overlapping signals

    Returns:
        StudyLocusOverlap: Study-locus overlap dataset
    """
    return study_locus.find_overlaps(study_index)

get_schema() -> StructType classmethod

Provides the schema for the StudyLocusOverlap dataset.

Returns:

Name Type Description
StructType StructType

Schema for the StudyLocusOverlap dataset

Source code in src/gentropy/dataset/study_locus_overlap.py
28
29
30
31
32
33
34
35
@classmethod
def get_schema(cls: type[StudyLocusOverlap]) -> StructType:
    """Provides the schema for the StudyLocusOverlap dataset.

    Returns:
        StructType: Schema for the StudyLocusOverlap dataset
    """
    return parse_spark_schema("study_locus_overlap.json")

Schema

root
 |-- leftStudyLocusId: long (nullable = false)
 |-- rightStudyLocusId: long (nullable = false)
 |-- chromosome: string (nullable = true)
 |-- tagVariantId: string (nullable = false)
 |-- statistics: struct (nullable = true)
 |    |-- left_pValueMantissa: float (nullable = true)
 |    |-- left_pValueExponent: integer (nullable = true)
 |    |-- right_pValueMantissa: float (nullable = true)
 |    |-- right_pValueExponent: integer (nullable = true)
 |    |-- left_beta: double (nullable = true)
 |    |-- right_beta: double (nullable = true)
 |    |-- left_logBF: double (nullable = true)
 |    |-- right_logBF: double (nullable = true)
 |    |-- left_posteriorProbability: double (nullable = true)
 |    |-- right_posteriorProbability: double (nullable = true)