Skip to content

FinnGen

Bases: FinnGenStepConfig

FinnGen study table ingestion step.

Source code in src/otg/finngen.py
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
@dataclass
class FinnGenStep(FinnGenStepConfig):
    """FinnGen study table ingestion step."""

    session: Session = Session()

    def run(self: FinnGenStep) -> None:
        """Run FinnGen study table ingestion step."""
        # Read the JSON data from the URL.
        json_data = urlopen(self.finngen_phenotype_table_url).read().decode("utf-8")
        rdd = self.session.spark.sparkContext.parallelize([json_data])
        df = self.session.spark.read.json(rdd)

        # Parse the study index data.
        finngen_studies = StudyIndexFinnGen.from_source(
            df,
            self.finngen_release_prefix,
            self.finngen_sumstat_url_prefix,
            self.finngen_sumstat_url_suffix,
        )

        # Write the output.
        finngen_studies.df.write.mode(self.session.write_mode).parquet(
            self.finngen_study_index_out
        )

run()

Run FinnGen study table ingestion step.

Source code in src/otg/finngen.py
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
def run(self: FinnGenStep) -> None:
    """Run FinnGen study table ingestion step."""
    # Read the JSON data from the URL.
    json_data = urlopen(self.finngen_phenotype_table_url).read().decode("utf-8")
    rdd = self.session.spark.sparkContext.parallelize([json_data])
    df = self.session.spark.read.json(rdd)

    # Parse the study index data.
    finngen_studies = StudyIndexFinnGen.from_source(
        df,
        self.finngen_release_prefix,
        self.finngen_sumstat_url_prefix,
        self.finngen_sumstat_url_suffix,
    )

    # Write the output.
    finngen_studies.df.write.mode(self.session.write_mode).parquet(
        self.finngen_study_index_out
    )

FinnGen study table ingestion step requirements.

Attributes:

Name Type Description
finngen_phenotype_table_url str

FinnGen API for fetching the list of studies.

finngen_release_prefix str

Release prefix pattern.

finngen_sumstat_url_prefix str

URL prefix for summary statistics location.

finngen_sumstat_url_suffix str

URL prefix suffix for summary statistics location.

finngen_study_index_out str

Output path for the FinnGen study index dataset.

Source code in src/otg/config.py
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
@dataclass
class FinnGenStepConfig:
    """FinnGen study table ingestion step requirements.

    Attributes:
        finngen_phenotype_table_url (str): FinnGen API for fetching the list of studies.
        finngen_release_prefix (str): Release prefix pattern.
        finngen_sumstat_url_prefix (str): URL prefix for summary statistics location.
        finngen_sumstat_url_suffix (str): URL prefix suffix for summary statistics location.
        finngen_study_index_out (str): Output path for the FinnGen study index dataset.
    """

    _target_: str = "otg.finngen.FinnGenStep"
    finngen_phenotype_table_url: str = MISSING
    finngen_release_prefix: str = MISSING
    finngen_sumstat_url_prefix: str = MISSING
    finngen_sumstat_url_suffix: str = MISSING
    finngen_study_index_out: str = MISSING