Skip to content

PICS

PICS Overview:

PICS is a fine-mapping method designed to identify the most likely causal SNPs associated with a trait or disease within a genomic region. It leverages both haplotype information and the observed association patterns from genome-wide association studies (GWAS).

Please refer to the original publication for in-depth details: PICS Publication.

We use PICS for both GWAS clumping results and GWAS curated studies.

gentropy.method.pics.PICS

Probabilistic Identification of Causal SNPs (PICS), an algorithm estimating the probability that an individual variant is causal considering the haplotype structure and observed pattern of association at the genetic locus.

Source code in src/gentropy/method/pics.py
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
class PICS:
    """Probabilistic Identification of Causal SNPs (PICS), an algorithm estimating the probability that an individual variant is causal considering the haplotype structure and observed pattern of association at the genetic locus."""

    # The fields for the picsed locus + ldSet tagVariantId is renamed to variantId:
    PICSED_LOCUS_SCHEMA = t.ArrayType(
        t.StructType(
            [
                t.StructField("variantId", t.StringType(), True),
                t.StructField("r2Overall", t.DoubleType(), True),
                t.StructField("posteriorProbability", t.DoubleType(), True),
                t.StructField("standardError", t.DoubleType(), True),
            ]
        )
    )

    @staticmethod
    def _pics_relative_posterior_probability(
        neglog_p: float, pics_snp_mu: float, pics_snp_std: float
    ) -> float:
        """Compute the PICS posterior probability for a given SNP.

        !!! info "This probability needs to be scaled to take into account the probabilities of the other variants in the locus."

        Args:
            neglog_p (float): Negative log p-value of the lead variant
            pics_snp_mu (float): Mean P value of the association between a SNP and a trait
            pics_snp_std (float): Standard deviation for the P value of the association between a SNP and a trait

        Returns:
            float: Posterior probability of the association between a SNP and a trait

        Examples:
            >>> rel_prob = PICS._pics_relative_posterior_probability(neglog_p=10.0, pics_snp_mu=1.0, pics_snp_std=10.0)
            >>> round(rel_prob, 3)
            0.368
        """
        return float(norm(pics_snp_mu, pics_snp_std).sf(neglog_p) * 2)

    @staticmethod
    def _pics_standard_deviation(neglog_p: float, r2: float, k: float) -> float | None:
        """Compute the PICS standard deviation.

        This distribution is obtained after a series of permutation tests described in the PICS method, and it is only
        valid when the SNP is highly linked with the lead (r2 > 0.5).

        Args:
            neglog_p (float): Negative log p-value of the lead variant
            r2 (float): LD score between a given SNP and the lead variant
            k (float): Empiric constant that can be adjusted to fit the curve, 6.4 recommended.

        Returns:
            float | None: Standard deviation for the P value of the association between a SNP and a trait

        Examples:
            >>> PICS._pics_standard_deviation(neglog_p=1.0, r2=1.0, k=6.4)
            0.0
            >>> round(PICS._pics_standard_deviation(neglog_p=10.0, r2=0.5, k=6.4), 3)
            1.493
            >>> print(PICS._pics_standard_deviation(neglog_p=1.0, r2=0.0, k=6.4))
            None
        """
        return (
            abs(((1 - (r2**0.5) ** k) ** 0.5) * (neglog_p**0.5) / 2)
            if r2 >= 0.5
            else None
        )

    @staticmethod
    def _pics_mu(neglog_p: float, r2: float) -> float | None:
        """Compute the PICS mu that estimates the probability of association between a given SNP and the trait.

        This distribution is obtained after a series of permutation tests described in the PICS method, and it is only
        valid when the SNP is highly linked with the lead (r2 > 0.5).

        Args:
            neglog_p (float): Negative log p-value of the lead variant
            r2 (float): LD score between a given SNP and the lead variant

        Returns:
            float | None: Mean P value of the association between a SNP and a trait

        Examples:
            >>> PICS._pics_mu(neglog_p=1.0, r2=1.0)
            1.0
            >>> PICS._pics_mu(neglog_p=10.0, r2=0.5)
            5.0
            >>> print(PICS._pics_mu(neglog_p=10.0, r2=0.3))
            None
        """
        return neglog_p * r2 if r2 >= 0.5 else None

    @staticmethod
    def _finemap(
        ld_set: list[Row], lead_neglog_p: float, k: float
    ) -> list[dict[str, Any]] | None:
        """Calculates the probability of a variant being causal in a study-locus context by applying the PICS method.

        It is intended to be applied as an UDF in `PICS.finemap`, where each row is a StudyLocus association.
        The function iterates over every SNP in the `ldSet` array, and it returns an updated locus with
        its association signal and causality probability as of PICS.

        Args:
            ld_set (list[Row]): list of tagging variants after expanding the locus
            lead_neglog_p (float): P value of the association signal between the lead variant and the study in the form of -log10.
            k (float): Empiric constant that can be adjusted to fit the curve, 6.4 recommended.

        Returns:
            list[dict[str, Any]] | None: List of tagging variants with an estimation of the association signal and their posterior probability as of PICS.

        Examples:
            >>> from pyspark.sql import Row
            >>> ld_set = [
            ...     Row(variantId="var1", r2Overall=0.8),
            ...     Row(variantId="var2", r2Overall=1),
            ... ]
            >>> PICS._finemap(ld_set, lead_neglog_p=10.0, k=6.4)
            [{'variantId': 'var1', 'r2Overall': 0.8, 'standardError': 0.07420896512708416, 'posteriorProbability': 0.07116959886882368}, {'variantId': 'var2', 'r2Overall': 1, 'standardError': 0.9977000638225533, 'posteriorProbability': 0.9288304011311763}]
            >>> empty_ld_set = []
            >>> PICS._finemap(empty_ld_set, lead_neglog_p=10.0, k=6.4)
            []
            >>> ld_set_with_no_r2 = [
            ...     Row(variantId="var1", r2Overall=None),
            ...     Row(variantId="var2", r2Overall=None),
            ... ]
            >>> PICS._finemap(ld_set_with_no_r2, lead_neglog_p=10.0, k=6.4)
            []
        """
        if ld_set is None:
            return None
        elif not ld_set:
            return []
        tmp_credible_set = []
        new_credible_set = []
        # First iteration: calculation of mu, standard deviation, and the relative posterior probability
        for tag_struct in ld_set:
            tag_dict = (
                tag_struct.asDict()
            )  # tag_struct is of type pyspark.Row, we'll represent it as a dict
            if (
                not tag_dict["r2Overall"]
                or tag_dict["r2Overall"] < 0.5
                or not lead_neglog_p
            ):
                # If PICS cannot be calculated, we drop the variant from the credible set
                continue

            # Chaing chema:
            if "tagVariantId" in tag_dict:
                tag_dict["variantId"] = tag_dict.pop("tagVariantId")

            pics_snp_mu = PICS._pics_mu(lead_neglog_p, tag_dict["r2Overall"])
            pics_snp_std = PICS._pics_standard_deviation(
                lead_neglog_p, tag_dict["r2Overall"], k
            )
            pics_snp_std = 0.001 if pics_snp_std == 0 else pics_snp_std
            if pics_snp_mu is not None and pics_snp_std is not None:
                posterior_probability = PICS._pics_relative_posterior_probability(
                    lead_neglog_p, pics_snp_mu, pics_snp_std
                )
                tag_dict["standardError"] = 10**-pics_snp_std
                tag_dict["relativePosteriorProbability"] = posterior_probability

                tmp_credible_set.append(tag_dict)

        # Second iteration: calculation of the sum of all the posteriors in each study-locus, so that we scale them between 0-1
        total_posteriors = sum(
            tag_dict.get("relativePosteriorProbability", 0)
            for tag_dict in tmp_credible_set
        )

        # Third iteration: calculation of the final posteriorProbability
        for tag_dict in tmp_credible_set:
            if total_posteriors != 0:
                tag_dict["posteriorProbability"] = float(
                    tag_dict.get("relativePosteriorProbability", 0) / total_posteriors
                )
            tag_dict.pop("relativePosteriorProbability")
            new_credible_set.append(tag_dict)
        return new_credible_set

    @classmethod
    def finemap(
        cls: type[PICS], associations: StudyLocus, k: float = 6.4
    ) -> StudyLocus:
        """Run PICS on a study locus.

        !!! info "Study locus needs to be LD annotated"

            The study locus needs to be LD annotated before PICS can be calculated.

        Args:
            associations (StudyLocus): Study locus to finemap using PICS
            k (float): Empiric constant that can be adjusted to fit the curve, 6.4 recommended.

        Returns:
            StudyLocus: Study locus with PICS results
        """
        # Finemapping method is an optional column:
        finemapping_method_expression = (
            f.lit(FinemappingMethod.PICS.value)
            if "finemappingMethod" not in associations.df.columns
            else f.coalesce(
                f.col("finemappingMethod"), f.lit(FinemappingMethod.PICS.value)
            )
        )

        # Registering the UDF to be used in the pipeline:
        finemap_udf = f.udf(
            lambda ld_set, neglog_p: cls._finemap(ld_set, neglog_p, k),
            cls.PICSED_LOCUS_SCHEMA,
        )

        return StudyLocus(
            _df=(
                associations.df
                # Old locus column will be dropped if available
                .select(*[col for col in associations.df.columns if col != "locus"])
                # Estimate neglog_pvalue for the lead variant
                .withColumn("neglog_pvalue", associations.neglog_pvalue())
                # New locus containing the PICS results
                .withColumn(
                    "locus",
                    f.when(
                        f.col("ldSet").isNotNull(),
                        finemap_udf(f.col("ldSet"), f.col("neglog_pvalue")),
                    ),
                )
                # Updating single point statistics in the locus object for the lead variant:
                .withColumn(
                    "locus",
                    f.transform(
                        f.col("locus"),
                        lambda tag: f.when(
                            f.col("variantId") == tag["variantId"],
                            tag.withField("pValueMantissa", f.col("pValueMantissa"))
                            .withField("pValueExponent", f.col("pValueExponent"))
                            .withField("beta", f.col("beta")),
                        ).otherwise(
                            tag.withField(
                                "pValueMantissa", f.lit(None).cast(t.FloatType())
                            )
                            .withField(
                                "pValueExponent", f.lit(None).cast(t.IntegerType())
                            )
                            .withField("beta", f.lit(None).cast(t.DoubleType()))
                        ),
                    ),
                )
                # Flagging all PICS loci with OUT_OF_SAMPLE_LD flag:
                .withColumn(
                    "qualityControls",
                    StudyLocus.update_quality_flag(
                        f.col("qualityControls"),
                        f.lit(True),
                        StudyLocusQualityCheck.OUT_OF_SAMPLE_LD,
                    ),
                )
                .withColumn(
                    "finemappingMethod",
                    finemapping_method_expression,
                )
                .withColumn(
                    "studyLocusId",
                    StudyLocus.assign_study_locus_id(
                        ["studyId", "variantId", "finemappingMethod"]
                    ),
                )
                .drop("neglog_pvalue")
            ),
            _schema=StudyLocus.get_schema(),
        )

finemap(associations: StudyLocus, k: float = 6.4) -> StudyLocus classmethod

Run PICS on a study locus.

Study locus needs to be LD annotated

The study locus needs to be LD annotated before PICS can be calculated.

Parameters:

Name Type Description Default
associations StudyLocus

Study locus to finemap using PICS

required
k float

Empiric constant that can be adjusted to fit the curve, 6.4 recommended.

6.4

Returns:

Name Type Description
StudyLocus StudyLocus

Study locus with PICS results

Source code in src/gentropy/method/pics.py
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
@classmethod
def finemap(
    cls: type[PICS], associations: StudyLocus, k: float = 6.4
) -> StudyLocus:
    """Run PICS on a study locus.

    !!! info "Study locus needs to be LD annotated"

        The study locus needs to be LD annotated before PICS can be calculated.

    Args:
        associations (StudyLocus): Study locus to finemap using PICS
        k (float): Empiric constant that can be adjusted to fit the curve, 6.4 recommended.

    Returns:
        StudyLocus: Study locus with PICS results
    """
    # Finemapping method is an optional column:
    finemapping_method_expression = (
        f.lit(FinemappingMethod.PICS.value)
        if "finemappingMethod" not in associations.df.columns
        else f.coalesce(
            f.col("finemappingMethod"), f.lit(FinemappingMethod.PICS.value)
        )
    )

    # Registering the UDF to be used in the pipeline:
    finemap_udf = f.udf(
        lambda ld_set, neglog_p: cls._finemap(ld_set, neglog_p, k),
        cls.PICSED_LOCUS_SCHEMA,
    )

    return StudyLocus(
        _df=(
            associations.df
            # Old locus column will be dropped if available
            .select(*[col for col in associations.df.columns if col != "locus"])
            # Estimate neglog_pvalue for the lead variant
            .withColumn("neglog_pvalue", associations.neglog_pvalue())
            # New locus containing the PICS results
            .withColumn(
                "locus",
                f.when(
                    f.col("ldSet").isNotNull(),
                    finemap_udf(f.col("ldSet"), f.col("neglog_pvalue")),
                ),
            )
            # Updating single point statistics in the locus object for the lead variant:
            .withColumn(
                "locus",
                f.transform(
                    f.col("locus"),
                    lambda tag: f.when(
                        f.col("variantId") == tag["variantId"],
                        tag.withField("pValueMantissa", f.col("pValueMantissa"))
                        .withField("pValueExponent", f.col("pValueExponent"))
                        .withField("beta", f.col("beta")),
                    ).otherwise(
                        tag.withField(
                            "pValueMantissa", f.lit(None).cast(t.FloatType())
                        )
                        .withField(
                            "pValueExponent", f.lit(None).cast(t.IntegerType())
                        )
                        .withField("beta", f.lit(None).cast(t.DoubleType()))
                    ),
                ),
            )
            # Flagging all PICS loci with OUT_OF_SAMPLE_LD flag:
            .withColumn(
                "qualityControls",
                StudyLocus.update_quality_flag(
                    f.col("qualityControls"),
                    f.lit(True),
                    StudyLocusQualityCheck.OUT_OF_SAMPLE_LD,
                ),
            )
            .withColumn(
                "finemappingMethod",
                finemapping_method_expression,
            )
            .withColumn(
                "studyLocusId",
                StudyLocus.assign_study_locus_id(
                    ["studyId", "variantId", "finemappingMethod"]
                ),
            )
            .drop("neglog_pvalue")
        ),
        _schema=StudyLocus.get_schema(),
    )