Skip to content

FinnGen Meta Analysis Study Index

gentropy.datasource.finngen_meta.study_index.FinnGenMetaStudyIndex

FinnGen meta-analysis study index.

Source code in src/gentropy/datasource/finngen_meta/study_index.py
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
class FinnGenMetaStudyIndex:
    """FinnGen meta-analysis study index."""

    @classmethod
    def get_constants(cls) -> dict[str, dict[str, Column]]:
        """Get constants for FinnGen meta-analysis study index.

        Returns:
            dict[str, dict[str, Column]]: Constants for each meta-analysis data source.
        """
        return {
            MetaAnalysisDataSource.FINNGEN_UKBB.value: {
                "initialSampleSize": f.lit(
                    "920,880 (FinnGenR12: nNFE=500,349; pan-UKBB-EUR: nEUR=420,531)"
                ),  # based on https://metaresults-ukbb.finngen.fi/about
                "cohorts": f.array(f.lit("FinnGen"), f.lit("pan-UKBB-EUR")),
                "publicationDate": f.lit("2024-11-01"),
            },
            MetaAnalysisDataSource.FINNGEN_UKBB_MVP.value: {
                "initialSampleSize": f.lit(
                    "1,550,147 (MVP: nEUR=449,042, nAFR=121,177, nAMR=59,048; FinnGenR12: nNFE=500,349; pan-UKBB-EUR: nEUR=420,531)"
                ),  # based on https://mvp-ukbb.finngen.fi/about
                "publicationDate": f.lit("2024-11-01"),
                "cohorts": f.array(
                    f.lit("MVP"), f.lit("FinnGen"), f.lit("pan-UKBB-EUR")
                ),
            },
        }

    @classmethod
    def from_finngen_manifest(
        cls: type[FinnGenMetaStudyIndex],
        manifest: FinnGenMetaManifest,
        efo_mapping: EFOMapping,
    ) -> StudyIndex:
        """Create the FinnGen meta-analysis study index from the manifest.

        Args:
            manifest (FinnGenMetaManifest): FinnGen meta-analysis manifest.
            efo_mapping (EFOMapping): EFO mapping data source.

        Returns:
            StudyIndex: FinnGen meta-analysis study index.
        """
        # Read the mapping
        df = manifest.df.select(
            f.col("studyId"),
            f.col("projectId"),
            f.lit("gwas").alias("studyType"),
            f.col("traitFromSource"),
            f.col("hasSumstats"),
            f.col("summarystatsLocation"),
            f.col("discoverySamples"),
            f.col("nSamples"),
            f.col("nCases"),
            f.col("nControls"),
            # Add constant columns
            *[
                value.alias(key)
                for key, value in cls.get_constants()[manifest.meta.value].items()
            ],
            # Compute the ld structure `ldPopulationStructure` from discovery samples.
            StudyIndex.aggregate_and_map_ancestries(f.col("discoverySamples")).alias(
                "ldPopulationStructure"
            ),
        )

        # Create study index.
        study_index = StudyIndex(_df=df)

        # Add EFO mappings - `traitFromSourceMappedIds`.
        study_index = efo_mapping.annotate_study_index(
            study_index, finngen_release="R12"
        )

        # Coalesce to a single file.
        return StudyIndex(
            _df=study_index.df.coalesce(1),
            _schema=StudyIndex.get_schema(),
        )

from_finngen_manifest(manifest: FinnGenMetaManifest, efo_mapping: EFOMapping) -> StudyIndex classmethod

Create the FinnGen meta-analysis study index from the manifest.

Parameters:

Name Type Description Default
manifest FinnGenMetaManifest

FinnGen meta-analysis manifest.

required
efo_mapping EFOMapping

EFO mapping data source.

required

Returns:

Name Type Description
StudyIndex StudyIndex

FinnGen meta-analysis study index.

Source code in src/gentropy/datasource/finngen_meta/study_index.py
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
@classmethod
def from_finngen_manifest(
    cls: type[FinnGenMetaStudyIndex],
    manifest: FinnGenMetaManifest,
    efo_mapping: EFOMapping,
) -> StudyIndex:
    """Create the FinnGen meta-analysis study index from the manifest.

    Args:
        manifest (FinnGenMetaManifest): FinnGen meta-analysis manifest.
        efo_mapping (EFOMapping): EFO mapping data source.

    Returns:
        StudyIndex: FinnGen meta-analysis study index.
    """
    # Read the mapping
    df = manifest.df.select(
        f.col("studyId"),
        f.col("projectId"),
        f.lit("gwas").alias("studyType"),
        f.col("traitFromSource"),
        f.col("hasSumstats"),
        f.col("summarystatsLocation"),
        f.col("discoverySamples"),
        f.col("nSamples"),
        f.col("nCases"),
        f.col("nControls"),
        # Add constant columns
        *[
            value.alias(key)
            for key, value in cls.get_constants()[manifest.meta.value].items()
        ],
        # Compute the ld structure `ldPopulationStructure` from discovery samples.
        StudyIndex.aggregate_and_map_ancestries(f.col("discoverySamples")).alias(
            "ldPopulationStructure"
        ),
    )

    # Create study index.
    study_index = StudyIndex(_df=df)

    # Add EFO mappings - `traitFromSourceMappedIds`.
    study_index = efo_mapping.annotate_study_index(
        study_index, finngen_release="R12"
    )

    # Coalesce to a single file.
    return StudyIndex(
        _df=study_index.df.coalesce(1),
        _schema=StudyIndex.get_schema(),
    )

get_constants() -> dict[str, dict[str, Column]] classmethod

Get constants for FinnGen meta-analysis study index.

Returns:

Type Description
dict[str, dict[str, Column]]

dict[str, dict[str, Column]]: Constants for each meta-analysis data source.

Source code in src/gentropy/datasource/finngen_meta/study_index.py
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
@classmethod
def get_constants(cls) -> dict[str, dict[str, Column]]:
    """Get constants for FinnGen meta-analysis study index.

    Returns:
        dict[str, dict[str, Column]]: Constants for each meta-analysis data source.
    """
    return {
        MetaAnalysisDataSource.FINNGEN_UKBB.value: {
            "initialSampleSize": f.lit(
                "920,880 (FinnGenR12: nNFE=500,349; pan-UKBB-EUR: nEUR=420,531)"
            ),  # based on https://metaresults-ukbb.finngen.fi/about
            "cohorts": f.array(f.lit("FinnGen"), f.lit("pan-UKBB-EUR")),
            "publicationDate": f.lit("2024-11-01"),
        },
        MetaAnalysisDataSource.FINNGEN_UKBB_MVP.value: {
            "initialSampleSize": f.lit(
                "1,550,147 (MVP: nEUR=449,042, nAFR=121,177, nAMR=59,048; FinnGenR12: nNFE=500,349; pan-UKBB-EUR: nEUR=420,531)"
            ),  # based on https://mvp-ukbb.finngen.fi/about
            "publicationDate": f.lit("2024-11-01"),
            "cohorts": f.array(
                f.lit("MVP"), f.lit("FinnGen"), f.lit("pan-UKBB-EUR")
            ),
        },
    }