Skip to content

colocalisation

gentropy.colocalisation.ColocalisationStep

Colocalisation step.

This workflow runs colocalisation analyses that assess the degree to which independent signals of the association share the same causal variant in a region of the genome, typically limited by linkage disequilibrium (LD).

Source code in src/gentropy/colocalisation.py
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
class ColocalisationStep:
    """Colocalisation step.

    This workflow runs colocalisation analyses that assess the degree to which independent signals of the association share the same causal variant in a region of the genome, typically limited by linkage disequilibrium (LD).
    """

    __coloc_methods__ = {
        method.METHOD_NAME.lower(): method
        for method in ColocalisationMethodInterface.__subclasses__()
    }

    def __init__(
        self,
        session: Session,
        credible_set_path: str,
        coloc_path: str,
        colocalisation_method: str,
        restrict_right_studies: list[str] | None = None,
        gwas_v_qtl_overlap_only: bool = False,
        colocalisation_method_params: dict[str, Any] | None = None,
    ) -> None:
        """Run Colocalisation step.

        This step allows for running two colocalisation methods: ecaviar and coloc. The default behaviour is all gwas vs all gwas plus all gwas vs all molecular-QTLs.

        Args:
            session (Session): Session object.
            credible_set_path (str): Input credible sets path.
            coloc_path (str): Output path.
            colocalisation_method (str): Colocalisation method. Use 'coloc_pip_ecaviar' to run both ColocPIP and eCAVIAR and merge results.
            restrict_right_studies (list[str] | None): List of study IDs to restrict the right side of the colocalisation overlaps to, e.g. all gwas vs a single studyId. Defaults to None.
            gwas_v_qtl_overlap_only (bool): If True, restricts the right side of colocalisation overlaps to only molecular-QTL studies, e.g. all gwas vs all molQTLs. Defaults to False.
            colocalisation_method_params (dict[str, Any] | None): Keyword arguments passed to the colocalise method of Colocalisation class. Defaults to None

        Keyword Args:
            priorc1 (float): Prior on variant being causal for trait 1. Defaults to 1e-4. For coloc method only.
            priorc2 (float): Prior on variant being causal for trait 2. Defaults to 1e-4. For coloc method only.
            priorc12 (float): Prior on variant being causal for both traits. Defaults to 1e-5. For coloc method only.
            overlap_size_cutoff (int): Minimum number of overlapping variants bfore filtering. Defaults to 0.
            posterior_cutoff (float): Minimum overlapping Posterior probability cutoff for small overlaps. Defaults to 0.0.
        """
        colocalisation_method = colocalisation_method.lower()

        # Extract
        credible_set = StudyLocus.from_parquet(
            session, credible_set_path, recusiveFileLookup=True
        )

        if colocalisation_method == "coloc_pip_ecaviar":
            # Transform - find overlaps once
            overlaps = credible_set.find_overlaps(
                restrict_right_studies=restrict_right_studies,
                gwas_v_qtl_overlap_only=gwas_v_qtl_overlap_only,
            )

            # Run ColocPIP

            coloc_pip = ColocPIP.colocalise
            if colocalisation_method_params:
                coloc_pip = partial(coloc_pip, **colocalisation_method_params)
            coloc_pip_results = coloc_pip(overlaps)

            # Run eCAVIAR

            ecaviar_results = ECaviar.colocalise(overlaps)

            # Merge results: join on key columns and combine metrics
            join_keys = [
                "leftStudyLocusId",
                "rightStudyLocusId",
                "chromosome",
                "rightStudyType",
            ]

            colocalisation_results = Colocalisation(
                _df=coloc_pip_results.df.alias("pip")
                .join(
                    ecaviar_results.df.alias("ecav").select(
                        *join_keys,
                        f.col("clpp").alias("clpp_ecaviar"),
                        f.col("numberColocalisingVariants").alias(
                            "numberColocalisingVariants_ecaviar"
                        ),
                    ),
                    on=join_keys,
                    how="inner",
                )
                .select(
                    f.col("pip.leftStudyLocusId"),
                    f.col("pip.rightStudyLocusId"),
                    f.col("pip.rightStudyType"),
                    f.col("pip.chromosome"),
                    # Use a combined method name
                    f.lit("COLOC_PIP_ECAVIAR").alias("colocalisationMethod"),
                    # Use the max number of colocalising variants from both methods
                    f.greatest(
                        f.col("pip.numberColocalisingVariants"),
                        f.col("numberColocalisingVariants_ecaviar"),
                    ).alias("numberColocalisingVariants"),
                    # Keep h3 and h4 from ColocPIP
                    f.col("pip.h3"),
                    f.col("pip.h4"),
                    # Add clpp from eCAVIAR
                    f.col("clpp_ecaviar").alias("clpp"),
                    # Keep beta ratio from ColocPIP
                    f.col("pip.betaRatioSignAverage"),
                ),
                _schema=Colocalisation.get_schema(),
            )
        else:
            colocalisation_class = self._get_colocalisation_class(colocalisation_method)

            if colocalisation_method == Coloc.METHOD_NAME.lower():
                credible_set = credible_set.filter(
                    f.col("finemappingMethod").isin(
                        FinemappingMethod.SUSIE.value, FinemappingMethod.SUSIE_INF.value
                    )
                )

            # Transform
            overlaps = credible_set.find_overlaps(
                restrict_right_studies=restrict_right_studies,
                gwas_v_qtl_overlap_only=gwas_v_qtl_overlap_only,
            )

            # Make a partial caller to ensure that colocalisation_method_params are added to the call only when dict is not empty
            coloc = colocalisation_class.colocalise
            if colocalisation_method_params:
                coloc = partial(coloc, **colocalisation_method_params)
            colocalisation_results = coloc(overlaps)

        # Load
        colocalisation_results.df.coalesce(session.output_partitions).write.mode(
            session.write_mode
        ).parquet(coloc_path)

    @classmethod
    def _get_colocalisation_class(
        cls, method: str
    ) -> type[ColocalisationMethodInterface]:
        """Get colocalisation class.

        Args:
            method (str): Colocalisation method.

        Returns:
            type[ColocalisationMethodInterface]: Class that implements the ColocalisationMethodInterface.

        Raises:
            ValueError: if method not available.

        Examples:
            >>> ColocalisationStep._get_colocalisation_class("ECaviar")
            <class 'gentropy.method.colocalisation.ECaviar'>
        """
        method = method.lower()
        if method not in cls.__coloc_methods__:
            raise ValueError(f"Colocalisation method {method} not available.")
        return cls.__coloc_methods__[method]

__init__(session: Session, credible_set_path: str, coloc_path: str, colocalisation_method: str, restrict_right_studies: list[str] | None = None, gwas_v_qtl_overlap_only: bool = False, colocalisation_method_params: dict[str, Any] | None = None) -> None

Run Colocalisation step.

This step allows for running two colocalisation methods: ecaviar and coloc. The default behaviour is all gwas vs all gwas plus all gwas vs all molecular-QTLs.

Parameters:

Name Type Description Default
session Session

Session object.

required
credible_set_path str

Input credible sets path.

required
coloc_path str

Output path.

required
colocalisation_method str

Colocalisation method. Use 'coloc_pip_ecaviar' to run both ColocPIP and eCAVIAR and merge results.

required
restrict_right_studies list[str] | None

List of study IDs to restrict the right side of the colocalisation overlaps to, e.g. all gwas vs a single studyId. Defaults to None.

None
gwas_v_qtl_overlap_only bool

If True, restricts the right side of colocalisation overlaps to only molecular-QTL studies, e.g. all gwas vs all molQTLs. Defaults to False.

False
colocalisation_method_params dict[str, Any] | None

Keyword arguments passed to the colocalise method of Colocalisation class. Defaults to None

None

Other Parameters:

Name Type Description
priorc1 float

Prior on variant being causal for trait 1. Defaults to 1e-4. For coloc method only.

priorc2 float

Prior on variant being causal for trait 2. Defaults to 1e-4. For coloc method only.

priorc12 float

Prior on variant being causal for both traits. Defaults to 1e-5. For coloc method only.

overlap_size_cutoff int

Minimum number of overlapping variants bfore filtering. Defaults to 0.

posterior_cutoff float

Minimum overlapping Posterior probability cutoff for small overlaps. Defaults to 0.0.

Source code in src/gentropy/colocalisation.py
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
def __init__(
    self,
    session: Session,
    credible_set_path: str,
    coloc_path: str,
    colocalisation_method: str,
    restrict_right_studies: list[str] | None = None,
    gwas_v_qtl_overlap_only: bool = False,
    colocalisation_method_params: dict[str, Any] | None = None,
) -> None:
    """Run Colocalisation step.

    This step allows for running two colocalisation methods: ecaviar and coloc. The default behaviour is all gwas vs all gwas plus all gwas vs all molecular-QTLs.

    Args:
        session (Session): Session object.
        credible_set_path (str): Input credible sets path.
        coloc_path (str): Output path.
        colocalisation_method (str): Colocalisation method. Use 'coloc_pip_ecaviar' to run both ColocPIP and eCAVIAR and merge results.
        restrict_right_studies (list[str] | None): List of study IDs to restrict the right side of the colocalisation overlaps to, e.g. all gwas vs a single studyId. Defaults to None.
        gwas_v_qtl_overlap_only (bool): If True, restricts the right side of colocalisation overlaps to only molecular-QTL studies, e.g. all gwas vs all molQTLs. Defaults to False.
        colocalisation_method_params (dict[str, Any] | None): Keyword arguments passed to the colocalise method of Colocalisation class. Defaults to None

    Keyword Args:
        priorc1 (float): Prior on variant being causal for trait 1. Defaults to 1e-4. For coloc method only.
        priorc2 (float): Prior on variant being causal for trait 2. Defaults to 1e-4. For coloc method only.
        priorc12 (float): Prior on variant being causal for both traits. Defaults to 1e-5. For coloc method only.
        overlap_size_cutoff (int): Minimum number of overlapping variants bfore filtering. Defaults to 0.
        posterior_cutoff (float): Minimum overlapping Posterior probability cutoff for small overlaps. Defaults to 0.0.
    """
    colocalisation_method = colocalisation_method.lower()

    # Extract
    credible_set = StudyLocus.from_parquet(
        session, credible_set_path, recusiveFileLookup=True
    )

    if colocalisation_method == "coloc_pip_ecaviar":
        # Transform - find overlaps once
        overlaps = credible_set.find_overlaps(
            restrict_right_studies=restrict_right_studies,
            gwas_v_qtl_overlap_only=gwas_v_qtl_overlap_only,
        )

        # Run ColocPIP

        coloc_pip = ColocPIP.colocalise
        if colocalisation_method_params:
            coloc_pip = partial(coloc_pip, **colocalisation_method_params)
        coloc_pip_results = coloc_pip(overlaps)

        # Run eCAVIAR

        ecaviar_results = ECaviar.colocalise(overlaps)

        # Merge results: join on key columns and combine metrics
        join_keys = [
            "leftStudyLocusId",
            "rightStudyLocusId",
            "chromosome",
            "rightStudyType",
        ]

        colocalisation_results = Colocalisation(
            _df=coloc_pip_results.df.alias("pip")
            .join(
                ecaviar_results.df.alias("ecav").select(
                    *join_keys,
                    f.col("clpp").alias("clpp_ecaviar"),
                    f.col("numberColocalisingVariants").alias(
                        "numberColocalisingVariants_ecaviar"
                    ),
                ),
                on=join_keys,
                how="inner",
            )
            .select(
                f.col("pip.leftStudyLocusId"),
                f.col("pip.rightStudyLocusId"),
                f.col("pip.rightStudyType"),
                f.col("pip.chromosome"),
                # Use a combined method name
                f.lit("COLOC_PIP_ECAVIAR").alias("colocalisationMethod"),
                # Use the max number of colocalising variants from both methods
                f.greatest(
                    f.col("pip.numberColocalisingVariants"),
                    f.col("numberColocalisingVariants_ecaviar"),
                ).alias("numberColocalisingVariants"),
                # Keep h3 and h4 from ColocPIP
                f.col("pip.h3"),
                f.col("pip.h4"),
                # Add clpp from eCAVIAR
                f.col("clpp_ecaviar").alias("clpp"),
                # Keep beta ratio from ColocPIP
                f.col("pip.betaRatioSignAverage"),
            ),
            _schema=Colocalisation.get_schema(),
        )
    else:
        colocalisation_class = self._get_colocalisation_class(colocalisation_method)

        if colocalisation_method == Coloc.METHOD_NAME.lower():
            credible_set = credible_set.filter(
                f.col("finemappingMethod").isin(
                    FinemappingMethod.SUSIE.value, FinemappingMethod.SUSIE_INF.value
                )
            )

        # Transform
        overlaps = credible_set.find_overlaps(
            restrict_right_studies=restrict_right_studies,
            gwas_v_qtl_overlap_only=gwas_v_qtl_overlap_only,
        )

        # Make a partial caller to ensure that colocalisation_method_params are added to the call only when dict is not empty
        coloc = colocalisation_class.colocalise
        if colocalisation_method_params:
            coloc = partial(coloc, **colocalisation_method_params)
        colocalisation_results = coloc(overlaps)

    # Load
    colocalisation_results.df.coalesce(session.output_partitions).write.mode(
        session.write_mode
    ).parquet(coloc_path)