Skip to content

From distance

List of features

gentropy.dataset.l2g_features.distance.DistanceSentinelTssFeature dataclass

Bases: L2GFeature

Distance of the sentinel variant to gene TSS. This is not weighted by the causal probability.

Source code in src/gentropy/dataset/l2g_features/distance.py
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
class DistanceSentinelTssFeature(L2GFeature):
    """Distance of the sentinel variant to gene TSS. This is not weighted by the causal probability."""

    feature_dependency_type = VariantIndex
    feature_name = "distanceSentinelTss"

    @classmethod
    def compute(
        cls: type[DistanceSentinelTssFeature],
        study_loci_to_annotate: StudyLocus | L2GGoldStandard,
        feature_dependency: dict[str, Any],
    ) -> DistanceSentinelTssFeature:
        """Computes the feature.

        Args:
            study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation
            feature_dependency (dict[str, Any]): Dataset that contains the distance information

        Returns:
            DistanceSentinelTssFeature: Feature dataset
        """
        distance_type = "distanceFromTss"
        return cls(
            _df=convert_from_wide_to_long(
                common_distance_feature_logic(
                    study_loci_to_annotate,
                    feature_name=cls.feature_name,
                    distance_type=distance_type,
                    **feature_dependency,
                ),
                id_vars=("studyLocusId", "geneId"),
                var_name="featureName",
                value_name="featureValue",
            ),
            _schema=cls.get_schema(),
        )

compute(study_loci_to_annotate: StudyLocus | L2GGoldStandard, feature_dependency: dict[str, Any]) -> DistanceSentinelTssFeature classmethod

Computes the feature.

Parameters:

Name Type Description Default
study_loci_to_annotate StudyLocus | L2GGoldStandard

The dataset containing study loci that will be used for annotation

required
feature_dependency dict[str, Any]

Dataset that contains the distance information

required

Returns:

Name Type Description
DistanceSentinelTssFeature DistanceSentinelTssFeature

Feature dataset

Source code in src/gentropy/dataset/l2g_features/distance.py
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
@classmethod
def compute(
    cls: type[DistanceSentinelTssFeature],
    study_loci_to_annotate: StudyLocus | L2GGoldStandard,
    feature_dependency: dict[str, Any],
) -> DistanceSentinelTssFeature:
    """Computes the feature.

    Args:
        study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation
        feature_dependency (dict[str, Any]): Dataset that contains the distance information

    Returns:
        DistanceSentinelTssFeature: Feature dataset
    """
    distance_type = "distanceFromTss"
    return cls(
        _df=convert_from_wide_to_long(
            common_distance_feature_logic(
                study_loci_to_annotate,
                feature_name=cls.feature_name,
                distance_type=distance_type,
                **feature_dependency,
            ),
            id_vars=("studyLocusId", "geneId"),
            var_name="featureName",
            value_name="featureValue",
        ),
        _schema=cls.get_schema(),
    )

gentropy.dataset.l2g_features.distance.DistanceSentinelTssNeighbourhoodFeature dataclass

Bases: L2GFeature

Distance between the sentinel variant and a gene TSS as a relation of the distnace with all the genes in the vicinity of a studyLocus. This is not weighted by the causal probability.

Source code in src/gentropy/dataset/l2g_features/distance.py
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
class DistanceSentinelTssNeighbourhoodFeature(L2GFeature):
    """Distance between the sentinel variant and a gene TSS as a relation of the distnace with all the genes in the vicinity of a studyLocus. This is not weighted by the causal probability."""

    feature_dependency_type = [VariantIndex, GeneIndex]
    feature_name = "distanceSentinelTssNeighbourhood"

    @classmethod
    def compute(
        cls: type[DistanceSentinelTssNeighbourhoodFeature],
        study_loci_to_annotate: StudyLocus | L2GGoldStandard,
        feature_dependency: dict[str, Any],
    ) -> DistanceSentinelTssNeighbourhoodFeature:
        """Computes the feature.

        Args:
            study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation
            feature_dependency (dict[str, Any]): Dataset that contains the distance information

        Returns:
            DistanceSentinelTssNeighbourhoodFeature: Feature dataset
        """
        distance_type = "distanceFromTss"
        return cls(
            _df=convert_from_wide_to_long(
                common_neighbourhood_distance_feature_logic(
                    study_loci_to_annotate,
                    feature_name=cls.feature_name,
                    distance_type=distance_type,
                    **feature_dependency,
                ),
                id_vars=("studyLocusId", "geneId"),
                var_name="featureName",
                value_name="featureValue",
            ),
            _schema=cls.get_schema(),
        )

compute(study_loci_to_annotate: StudyLocus | L2GGoldStandard, feature_dependency: dict[str, Any]) -> DistanceSentinelTssNeighbourhoodFeature classmethod

Computes the feature.

Parameters:

Name Type Description Default
study_loci_to_annotate StudyLocus | L2GGoldStandard

The dataset containing study loci that will be used for annotation

required
feature_dependency dict[str, Any]

Dataset that contains the distance information

required

Returns:

Name Type Description
DistanceSentinelTssNeighbourhoodFeature DistanceSentinelTssNeighbourhoodFeature

Feature dataset

Source code in src/gentropy/dataset/l2g_features/distance.py
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
@classmethod
def compute(
    cls: type[DistanceSentinelTssNeighbourhoodFeature],
    study_loci_to_annotate: StudyLocus | L2GGoldStandard,
    feature_dependency: dict[str, Any],
) -> DistanceSentinelTssNeighbourhoodFeature:
    """Computes the feature.

    Args:
        study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation
        feature_dependency (dict[str, Any]): Dataset that contains the distance information

    Returns:
        DistanceSentinelTssNeighbourhoodFeature: Feature dataset
    """
    distance_type = "distanceFromTss"
    return cls(
        _df=convert_from_wide_to_long(
            common_neighbourhood_distance_feature_logic(
                study_loci_to_annotate,
                feature_name=cls.feature_name,
                distance_type=distance_type,
                **feature_dependency,
            ),
            id_vars=("studyLocusId", "geneId"),
            var_name="featureName",
            value_name="featureValue",
        ),
        _schema=cls.get_schema(),
    )

gentropy.dataset.l2g_features.distance.DistanceTssMeanFeature dataclass

Bases: L2GFeature

Average distance of all tagging variants to gene TSS.

Source code in src/gentropy/dataset/l2g_features/distance.py
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
class DistanceTssMeanFeature(L2GFeature):
    """Average distance of all tagging variants to gene TSS."""

    feature_dependency_type = VariantIndex
    feature_name = "distanceTssMean"

    @classmethod
    def compute(
        cls: type[DistanceTssMeanFeature],
        study_loci_to_annotate: StudyLocus | L2GGoldStandard,
        feature_dependency: dict[str, Any],
    ) -> DistanceTssMeanFeature:
        """Computes the feature.

        Args:
            study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation
            feature_dependency (dict[str, Any]): Dataset that contains the distance information

        Returns:
            DistanceTssMeanFeature: Feature dataset
        """
        distance_type = "distanceFromTss"
        return cls(
            _df=convert_from_wide_to_long(
                common_distance_feature_logic(
                    study_loci_to_annotate,
                    feature_name=cls.feature_name,
                    distance_type=distance_type,
                    **feature_dependency,
                ).withColumn(
                    cls.feature_name,
                    f.when(f.col(cls.feature_name) < 0, f.lit(0.0)).otherwise(
                        f.col(cls.feature_name)
                    ),
                ),
                id_vars=("studyLocusId", "geneId"),
                var_name="featureName",
                value_name="featureValue",
            ),
            _schema=cls.get_schema(),
        )

compute(study_loci_to_annotate: StudyLocus | L2GGoldStandard, feature_dependency: dict[str, Any]) -> DistanceTssMeanFeature classmethod

Computes the feature.

Parameters:

Name Type Description Default
study_loci_to_annotate StudyLocus | L2GGoldStandard

The dataset containing study loci that will be used for annotation

required
feature_dependency dict[str, Any]

Dataset that contains the distance information

required

Returns:

Name Type Description
DistanceTssMeanFeature DistanceTssMeanFeature

Feature dataset

Source code in src/gentropy/dataset/l2g_features/distance.py
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
@classmethod
def compute(
    cls: type[DistanceTssMeanFeature],
    study_loci_to_annotate: StudyLocus | L2GGoldStandard,
    feature_dependency: dict[str, Any],
) -> DistanceTssMeanFeature:
    """Computes the feature.

    Args:
        study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation
        feature_dependency (dict[str, Any]): Dataset that contains the distance information

    Returns:
        DistanceTssMeanFeature: Feature dataset
    """
    distance_type = "distanceFromTss"
    return cls(
        _df=convert_from_wide_to_long(
            common_distance_feature_logic(
                study_loci_to_annotate,
                feature_name=cls.feature_name,
                distance_type=distance_type,
                **feature_dependency,
            ).withColumn(
                cls.feature_name,
                f.when(f.col(cls.feature_name) < 0, f.lit(0.0)).otherwise(
                    f.col(cls.feature_name)
                ),
            ),
            id_vars=("studyLocusId", "geneId"),
            var_name="featureName",
            value_name="featureValue",
        ),
        _schema=cls.get_schema(),
    )

gentropy.dataset.l2g_features.distance.DistanceTssMeanNeighbourhoodFeature dataclass

Bases: L2GFeature

Minimum mean distance to TSS for all genes in the vicinity of a studyLocus.

Source code in src/gentropy/dataset/l2g_features/distance.py
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
class DistanceTssMeanNeighbourhoodFeature(L2GFeature):
    """Minimum mean distance to TSS for all genes in the vicinity of a studyLocus."""

    feature_dependency_type = [VariantIndex, GeneIndex]
    feature_name = "distanceTssMeanNeighbourhood"

    @classmethod
    def compute(
        cls: type[DistanceTssMeanNeighbourhoodFeature],
        study_loci_to_annotate: StudyLocus | L2GGoldStandard,
        feature_dependency: dict[str, Any],
    ) -> DistanceTssMeanNeighbourhoodFeature:
        """Computes the feature.

        Args:
            study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation
            feature_dependency (dict[str, Any]): Dataset that contains the distance information

        Returns:
            DistanceTssMeanNeighbourhoodFeature: Feature dataset
        """
        distance_type = "distanceFromTss"
        return cls(
            _df=convert_from_wide_to_long(
                common_neighbourhood_distance_feature_logic(
                    study_loci_to_annotate,
                    feature_name=cls.feature_name,
                    distance_type=distance_type,
                    **feature_dependency,
                ),
                id_vars=("studyLocusId", "geneId"),
                var_name="featureName",
                value_name="featureValue",
            ),
            _schema=cls.get_schema(),
        )

compute(study_loci_to_annotate: StudyLocus | L2GGoldStandard, feature_dependency: dict[str, Any]) -> DistanceTssMeanNeighbourhoodFeature classmethod

Computes the feature.

Parameters:

Name Type Description Default
study_loci_to_annotate StudyLocus | L2GGoldStandard

The dataset containing study loci that will be used for annotation

required
feature_dependency dict[str, Any]

Dataset that contains the distance information

required

Returns:

Name Type Description
DistanceTssMeanNeighbourhoodFeature DistanceTssMeanNeighbourhoodFeature

Feature dataset

Source code in src/gentropy/dataset/l2g_features/distance.py
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
@classmethod
def compute(
    cls: type[DistanceTssMeanNeighbourhoodFeature],
    study_loci_to_annotate: StudyLocus | L2GGoldStandard,
    feature_dependency: dict[str, Any],
) -> DistanceTssMeanNeighbourhoodFeature:
    """Computes the feature.

    Args:
        study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation
        feature_dependency (dict[str, Any]): Dataset that contains the distance information

    Returns:
        DistanceTssMeanNeighbourhoodFeature: Feature dataset
    """
    distance_type = "distanceFromTss"
    return cls(
        _df=convert_from_wide_to_long(
            common_neighbourhood_distance_feature_logic(
                study_loci_to_annotate,
                feature_name=cls.feature_name,
                distance_type=distance_type,
                **feature_dependency,
            ),
            id_vars=("studyLocusId", "geneId"),
            var_name="featureName",
            value_name="featureValue",
        ),
        _schema=cls.get_schema(),
    )

gentropy.dataset.l2g_features.distance.DistanceSentinelFootprintFeature dataclass

Bases: L2GFeature

Distance between the sentinel variant and the footprint of a gene.

Source code in src/gentropy/dataset/l2g_features/distance.py
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
class DistanceSentinelFootprintFeature(L2GFeature):
    """Distance between the sentinel variant and the footprint of a gene."""

    feature_dependency_type = VariantIndex
    feature_name = "distanceSentinelFootprint"

    @classmethod
    def compute(
        cls: type[DistanceSentinelFootprintFeature],
        study_loci_to_annotate: StudyLocus | L2GGoldStandard,
        feature_dependency: dict[str, Any],
    ) -> DistanceSentinelFootprintFeature:
        """Computes the feature.

        Args:
            study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation
            feature_dependency (dict[str, Any]): Dataset that contains the distance information

        Returns:
            DistanceSentinelFootprintFeature: Feature dataset
        """
        distance_type = "distanceFromFootprint"
        return cls(
            _df=convert_from_wide_to_long(
                common_distance_feature_logic(
                    study_loci_to_annotate,
                    feature_name=cls.feature_name,
                    distance_type=distance_type,
                    **feature_dependency,
                ),
                id_vars=("studyLocusId", "geneId"),
                var_name="featureName",
                value_name="featureValue",
            ),
            _schema=cls.get_schema(),
        )

compute(study_loci_to_annotate: StudyLocus | L2GGoldStandard, feature_dependency: dict[str, Any]) -> DistanceSentinelFootprintFeature classmethod

Computes the feature.

Parameters:

Name Type Description Default
study_loci_to_annotate StudyLocus | L2GGoldStandard

The dataset containing study loci that will be used for annotation

required
feature_dependency dict[str, Any]

Dataset that contains the distance information

required

Returns:

Name Type Description
DistanceSentinelFootprintFeature DistanceSentinelFootprintFeature

Feature dataset

Source code in src/gentropy/dataset/l2g_features/distance.py
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
@classmethod
def compute(
    cls: type[DistanceSentinelFootprintFeature],
    study_loci_to_annotate: StudyLocus | L2GGoldStandard,
    feature_dependency: dict[str, Any],
) -> DistanceSentinelFootprintFeature:
    """Computes the feature.

    Args:
        study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation
        feature_dependency (dict[str, Any]): Dataset that contains the distance information

    Returns:
        DistanceSentinelFootprintFeature: Feature dataset
    """
    distance_type = "distanceFromFootprint"
    return cls(
        _df=convert_from_wide_to_long(
            common_distance_feature_logic(
                study_loci_to_annotate,
                feature_name=cls.feature_name,
                distance_type=distance_type,
                **feature_dependency,
            ),
            id_vars=("studyLocusId", "geneId"),
            var_name="featureName",
            value_name="featureValue",
        ),
        _schema=cls.get_schema(),
    )

gentropy.dataset.l2g_features.distance.DistanceSentinelFootprintNeighbourhoodFeature dataclass

Bases: L2GFeature

Distance between the sentinel variant and a gene footprint as a relation of the distnace with all the genes in the vicinity of a studyLocus. This is not weighted by the causal probability.

Source code in src/gentropy/dataset/l2g_features/distance.py
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
class DistanceSentinelFootprintNeighbourhoodFeature(L2GFeature):
    """Distance between the sentinel variant and a gene footprint as a relation of the distnace with all the genes in the vicinity of a studyLocus. This is not weighted by the causal probability."""

    feature_dependency_type = [VariantIndex, GeneIndex]
    feature_name = "distanceSentinelFootprintNeighbourhood"

    @classmethod
    def compute(
        cls: type[DistanceSentinelFootprintNeighbourhoodFeature],
        study_loci_to_annotate: StudyLocus | L2GGoldStandard,
        feature_dependency: dict[str, Any],
    ) -> DistanceSentinelFootprintNeighbourhoodFeature:
        """Computes the feature.

        Args:
            study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation
            feature_dependency (dict[str, Any]): Dataset that contains the distance information

        Returns:
            DistanceSentinelFootprintNeighbourhoodFeature: Feature dataset
        """
        distance_type = "distanceFromFootprint"
        return cls(
            _df=convert_from_wide_to_long(
                common_neighbourhood_distance_feature_logic(
                    study_loci_to_annotate,
                    feature_name=cls.feature_name,
                    distance_type=distance_type,
                    **feature_dependency,
                ),
                id_vars=("studyLocusId", "geneId"),
                var_name="featureName",
                value_name="featureValue",
            ),
            _schema=cls.get_schema(),
        )

compute(study_loci_to_annotate: StudyLocus | L2GGoldStandard, feature_dependency: dict[str, Any]) -> DistanceSentinelFootprintNeighbourhoodFeature classmethod

Computes the feature.

Parameters:

Name Type Description Default
study_loci_to_annotate StudyLocus | L2GGoldStandard

The dataset containing study loci that will be used for annotation

required
feature_dependency dict[str, Any]

Dataset that contains the distance information

required

Returns:

Name Type Description
DistanceSentinelFootprintNeighbourhoodFeature DistanceSentinelFootprintNeighbourhoodFeature

Feature dataset

Source code in src/gentropy/dataset/l2g_features/distance.py
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
@classmethod
def compute(
    cls: type[DistanceSentinelFootprintNeighbourhoodFeature],
    study_loci_to_annotate: StudyLocus | L2GGoldStandard,
    feature_dependency: dict[str, Any],
) -> DistanceSentinelFootprintNeighbourhoodFeature:
    """Computes the feature.

    Args:
        study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation
        feature_dependency (dict[str, Any]): Dataset that contains the distance information

    Returns:
        DistanceSentinelFootprintNeighbourhoodFeature: Feature dataset
    """
    distance_type = "distanceFromFootprint"
    return cls(
        _df=convert_from_wide_to_long(
            common_neighbourhood_distance_feature_logic(
                study_loci_to_annotate,
                feature_name=cls.feature_name,
                distance_type=distance_type,
                **feature_dependency,
            ),
            id_vars=("studyLocusId", "geneId"),
            var_name="featureName",
            value_name="featureValue",
        ),
        _schema=cls.get_schema(),
    )

gentropy.dataset.l2g_features.distance.DistanceFootprintMeanFeature dataclass

Bases: L2GFeature

Average distance of all tagging variants to the footprint of a gene.

Source code in src/gentropy/dataset/l2g_features/distance.py
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
class DistanceFootprintMeanFeature(L2GFeature):
    """Average distance of all tagging variants to the footprint of a gene."""

    feature_dependency_type = VariantIndex
    feature_name = "distanceFootprintMean"

    @classmethod
    def compute(
        cls: type[DistanceFootprintMeanFeature],
        study_loci_to_annotate: StudyLocus | L2GGoldStandard,
        feature_dependency: dict[str, Any],
    ) -> DistanceFootprintMeanFeature:
        """Computes the feature.

        Args:
            study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation
            feature_dependency (dict[str, Any]): Dataset that contains the distance information

        Returns:
            DistanceFootprintMeanFeature: Feature dataset
        """
        distance_type = "distanceFromFootprint"
        return cls(
            _df=convert_from_wide_to_long(
                common_distance_feature_logic(
                    study_loci_to_annotate,
                    feature_name=cls.feature_name,
                    distance_type=distance_type,
                    **feature_dependency,
                ).withColumn(
                    cls.feature_name,
                    f.when(f.col(cls.feature_name) < 0, f.lit(0.0)).otherwise(
                        f.col(cls.feature_name)
                    ),
                ),
                id_vars=("studyLocusId", "geneId"),
                var_name="featureName",
                value_name="featureValue",
            ),
            _schema=cls.get_schema(),
        )

compute(study_loci_to_annotate: StudyLocus | L2GGoldStandard, feature_dependency: dict[str, Any]) -> DistanceFootprintMeanFeature classmethod

Computes the feature.

Parameters:

Name Type Description Default
study_loci_to_annotate StudyLocus | L2GGoldStandard

The dataset containing study loci that will be used for annotation

required
feature_dependency dict[str, Any]

Dataset that contains the distance information

required

Returns:

Name Type Description
DistanceFootprintMeanFeature DistanceFootprintMeanFeature

Feature dataset

Source code in src/gentropy/dataset/l2g_features/distance.py
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
@classmethod
def compute(
    cls: type[DistanceFootprintMeanFeature],
    study_loci_to_annotate: StudyLocus | L2GGoldStandard,
    feature_dependency: dict[str, Any],
) -> DistanceFootprintMeanFeature:
    """Computes the feature.

    Args:
        study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation
        feature_dependency (dict[str, Any]): Dataset that contains the distance information

    Returns:
        DistanceFootprintMeanFeature: Feature dataset
    """
    distance_type = "distanceFromFootprint"
    return cls(
        _df=convert_from_wide_to_long(
            common_distance_feature_logic(
                study_loci_to_annotate,
                feature_name=cls.feature_name,
                distance_type=distance_type,
                **feature_dependency,
            ).withColumn(
                cls.feature_name,
                f.when(f.col(cls.feature_name) < 0, f.lit(0.0)).otherwise(
                    f.col(cls.feature_name)
                ),
            ),
            id_vars=("studyLocusId", "geneId"),
            var_name="featureName",
            value_name="featureValue",
        ),
        _schema=cls.get_schema(),
    )

gentropy.dataset.l2g_features.distance.DistanceFootprintMeanNeighbourhoodFeature dataclass

Bases: L2GFeature

Minimum mean distance to footprint for all genes in the vicinity of a studyLocus.

Source code in src/gentropy/dataset/l2g_features/distance.py
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
class DistanceFootprintMeanNeighbourhoodFeature(L2GFeature):
    """Minimum mean distance to footprint for all genes in the vicinity of a studyLocus."""

    feature_dependency_type = [VariantIndex, GeneIndex]
    feature_name = "distanceFootprintMeanNeighbourhood"

    @classmethod
    def compute(
        cls: type[DistanceFootprintMeanNeighbourhoodFeature],
        study_loci_to_annotate: StudyLocus | L2GGoldStandard,
        feature_dependency: dict[str, Any],
    ) -> DistanceFootprintMeanNeighbourhoodFeature:
        """Computes the feature.

        Args:
            study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation
            feature_dependency (dict[str, Any]): Dataset that contains the distance information

        Returns:
            DistanceFootprintMeanNeighbourhoodFeature: Feature dataset
        """
        distance_type = "distanceFromFootprint"
        return cls(
            _df=convert_from_wide_to_long(
                common_neighbourhood_distance_feature_logic(
                    study_loci_to_annotate,
                    feature_name=cls.feature_name,
                    distance_type=distance_type,
                    **feature_dependency,
                ),
                id_vars=("studyLocusId", "geneId"),
                var_name="featureName",
                value_name="featureValue",
            ),
            _schema=cls.get_schema(),
        )

compute(study_loci_to_annotate: StudyLocus | L2GGoldStandard, feature_dependency: dict[str, Any]) -> DistanceFootprintMeanNeighbourhoodFeature classmethod

Computes the feature.

Parameters:

Name Type Description Default
study_loci_to_annotate StudyLocus | L2GGoldStandard

The dataset containing study loci that will be used for annotation

required
feature_dependency dict[str, Any]

Dataset that contains the distance information

required

Returns:

Name Type Description
DistanceFootprintMeanNeighbourhoodFeature DistanceFootprintMeanNeighbourhoodFeature

Feature dataset

Source code in src/gentropy/dataset/l2g_features/distance.py
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
@classmethod
def compute(
    cls: type[DistanceFootprintMeanNeighbourhoodFeature],
    study_loci_to_annotate: StudyLocus | L2GGoldStandard,
    feature_dependency: dict[str, Any],
) -> DistanceFootprintMeanNeighbourhoodFeature:
    """Computes the feature.

    Args:
        study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation
        feature_dependency (dict[str, Any]): Dataset that contains the distance information

    Returns:
        DistanceFootprintMeanNeighbourhoodFeature: Feature dataset
    """
    distance_type = "distanceFromFootprint"
    return cls(
        _df=convert_from_wide_to_long(
            common_neighbourhood_distance_feature_logic(
                study_loci_to_annotate,
                feature_name=cls.feature_name,
                distance_type=distance_type,
                **feature_dependency,
            ),
            id_vars=("studyLocusId", "geneId"),
            var_name="featureName",
            value_name="featureValue",
        ),
        _schema=cls.get_schema(),
    )

Common logic

gentropy.dataset.l2g_features.distance.common_distance_feature_logic(study_loci_to_annotate: StudyLocus | L2GGoldStandard, *, variant_index: VariantIndex, feature_name: str, distance_type: str, genomic_window: int = 500000) -> DataFrame

Calculate the distance feature that correlates a variant in a credible set with a gene.

The distance is weighted by the posterior probability of the variant to factor in its contribution to the trait when we look at the average distance score for all variants in the credible set.

Parameters:

Name Type Description Default
study_loci_to_annotate StudyLocus | L2GGoldStandard

The dataset containing study loci that will be used for annotation

required
variant_index VariantIndex

The dataset containing distance to gene information

required
feature_name str

The name of the feature

required
distance_type str

The type of distance to gene

required
genomic_window int

The maximum window size to consider

500000

Returns:

Name Type Description
DataFrame DataFrame

Feature dataset

Source code in src/gentropy/dataset/l2g_features/distance.py
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
def common_distance_feature_logic(
    study_loci_to_annotate: StudyLocus | L2GGoldStandard,
    *,
    variant_index: VariantIndex,
    feature_name: str,
    distance_type: str,
    genomic_window: int = 500_000,
) -> DataFrame:
    """Calculate the distance feature that correlates a variant in a credible set with a gene.

    The distance is weighted by the posterior probability of the variant to factor in its contribution to the trait when we look at the average distance score for all variants in the credible set.

    Args:
        study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation
        variant_index (VariantIndex): The dataset containing distance to gene information
        feature_name (str): The name of the feature
        distance_type (str): The type of distance to gene
        genomic_window (int): The maximum window size to consider

    Returns:
        DataFrame: Feature dataset
    """
    distances_dataset = variant_index.get_distance_to_gene(distance_type=distance_type)
    if "Mean" in feature_name:
        # Weighting by the SNP contribution is only applied when we are averaging all distances
        df = study_loci_to_annotate.df.withColumn(
            "variantInLocus", f.explode_outer("locus")
        ).select(
            "studyLocusId",
            f.col("variantInLocus.variantId").alias("variantId"),
            f.col("variantInLocus.posteriorProbability").alias("posteriorProbability"),
        )
        distance_score_expr = (
            f.lit(genomic_window) - f.col(distance_type) + f.lit(1)
        ) * f.col("posteriorProbability")
        agg_expr = f.sum(f.col("distance_score"))
    elif "Sentinel" in feature_name:
        df = study_loci_to_annotate.df.select("studyLocusId", "variantId")
        # For minimum distances we calculate the unweighted distance between the sentinel (lead) and the gene.
        distance_score_expr = f.lit(genomic_window) - f.col(distance_type) + f.lit(1)
        agg_expr = f.first(f.col("distance_score"))
    return (
        df.join(
            distances_dataset.withColumnRenamed("targetId", "geneId"),
            on="variantId",
            how="inner",
        )
        .withColumn(
            "distance_score",
            distance_score_expr,
        )
        .groupBy("studyLocusId", "geneId")
        .agg(agg_expr.alias("distance_score_agg"))
        .withColumn(
            feature_name,
            f.log10(f.col("distance_score_agg")) / f.log10(f.lit(genomic_window + 1)),
        )
        .drop("distance_score_agg")
    )

gentropy.dataset.l2g_features.distance.common_neighbourhood_distance_feature_logic(study_loci_to_annotate: StudyLocus | L2GGoldStandard, *, variant_index: VariantIndex, feature_name: str, distance_type: str, gene_index: GeneIndex, genomic_window: int = 500000) -> DataFrame

Calculate the distance feature that correlates any variant in a credible set with any protein coding gene nearby the locus. The distance is weighted by the posterior probability of the variant to factor in its contribution to the trait.

Parameters:

Name Type Description Default
study_loci_to_annotate StudyLocus | L2GGoldStandard

The dataset containing study loci that will be used for annotation

required
variant_index VariantIndex

The dataset containing distance to gene information

required
feature_name str

The name of the feature

required
distance_type str

The type of distance to gene

required
gene_index GeneIndex

The dataset containing gene information

required
genomic_window int

The maximum window size to consider

500000

Returns:

Name Type Description
DataFrame DataFrame

Feature dataset

Source code in src/gentropy/dataset/l2g_features/distance.py
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
def common_neighbourhood_distance_feature_logic(
    study_loci_to_annotate: StudyLocus | L2GGoldStandard,
    *,
    variant_index: VariantIndex,
    feature_name: str,
    distance_type: str,
    gene_index: GeneIndex,
    genomic_window: int = 500_000,
) -> DataFrame:
    """Calculate the distance feature that correlates any variant in a credible set with any protein coding gene nearby the locus. The distance is weighted by the posterior probability of the variant to factor in its contribution to the trait.

    Args:
        study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation
        variant_index (VariantIndex): The dataset containing distance to gene information
        feature_name (str): The name of the feature
        distance_type (str): The type of distance to gene
        gene_index (GeneIndex): The dataset containing gene information
        genomic_window (int): The maximum window size to consider

    Returns:
        DataFrame: Feature dataset
    """
    local_feature_name = feature_name.replace("Neighbourhood", "")
    # First compute mean distances to a gene
    local_metric = common_distance_feature_logic(
        study_loci_to_annotate,
        feature_name=local_feature_name,
        distance_type=distance_type,
        variant_index=variant_index,
        genomic_window=genomic_window,
    )
    return (
        # Then compute mean distance in the vicinity (feature will be the same for any gene associated with a studyLocus)
        local_metric.join(
            gene_index.df.filter(f.col("biotype") == "protein_coding").select("geneId"),
            "geneId",
            "inner",
        )
        .withColumn(
            "regional_max",
            f.max(local_feature_name).over(Window.partitionBy("studyLocusId")),
        )
        .withColumn(
            feature_name,
            f.when(
                (f.col("regional_max").isNotNull()) & (f.col("regional_max") != 0.0),
                f.col(local_feature_name)
                / f.coalesce(f.col("regional_max"), f.lit(0.0)),
            ).otherwise(f.lit(0.0)),
        )
        .withColumn(
            feature_name,
            f.when(f.col(feature_name) < 0, f.lit(0.0))
            .when(f.col(feature_name) > 1, f.lit(1.0))
            .otherwise(f.col(feature_name)),
        )
        .drop("regional_max", local_feature_name)
    )