Skip to content

Study index

Bases: Dataset

Study index dataset.

A study index dataset captures all the metadata for all studies including GWAS and Molecular QTL.

Source code in src/otg/dataset/study_index.py
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
@dataclass
class StudyIndex(Dataset):
    """Study index dataset.

    A study index dataset captures all the metadata for all studies including GWAS and Molecular QTL.
    """

    @classmethod
    def get_schema(cls: type[StudyIndex]) -> StructType:
        """Provides the schema for the StudyIndex dataset."""
        return parse_spark_schema("studies.json")

    def study_type_lut(self: StudyIndex) -> DataFrame:
        """Return a lookup table of study type.

        Returns:
            DataFrame: A dataframe containing `studyId` and `studyType` columns.
        """
        return self.df.select("studyId", "studyType")

get_schema() classmethod

Provides the schema for the StudyIndex dataset.

Source code in src/otg/dataset/study_index.py
32
33
34
35
@classmethod
def get_schema(cls: type[StudyIndex]) -> StructType:
    """Provides the schema for the StudyIndex dataset."""
    return parse_spark_schema("studies.json")

study_type_lut()

Return a lookup table of study type.

Returns:

Name Type Description
DataFrame DataFrame

A dataframe containing studyId and studyType columns.

Source code in src/otg/dataset/study_index.py
37
38
39
40
41
42
43
def study_type_lut(self: StudyIndex) -> DataFrame:
    """Return a lookup table of study type.

    Returns:
        DataFrame: A dataframe containing `studyId` and `studyType` columns.
    """
    return self.df.select("studyId", "studyType")

Schema

root
 |-- studyId: string (nullable = false)
 |-- projectId: string (nullable = false)
 |-- studyType: string (nullable = false)
 |-- traitFromSource: string (nullable = false)
 |-- traitFromSourceMappedIds: array (nullable = true)
 |    |-- element: string (containsNull = true)
 |-- pubmedId: string (nullable = true)
 |-- publicationTitle: string (nullable = true)
 |-- publicationFirstAuthor: string (nullable = true)
 |-- publicationDate: string (nullable = true)
 |-- publicationJournal: string (nullable = true)
 |-- backgroundTraitFromSourceMappedIds: array (nullable = true)
 |    |-- element: string (containsNull = true)
 |-- initialSampleSize: string (nullable = true)
 |-- nCases: long (nullable = true)
 |-- nControls: long (nullable = true)
 |-- nSamples: long (nullable = true)
 |-- discoverySamples: array (nullable = true)
 |    |-- element: struct (containsNull = false)
 |    |    |-- sampleSize: string (nullable = true)
 |    |    |-- ancestry: string (nullable = true)
 |-- replicationSamples: array (nullable = true)
 |    |-- element: struct (containsNull = false)
 |    |    |-- sampleSize: string (nullable = true)
 |    |    |-- ancestry: string (nullable = true)
 |-- summarystatsLocation: string (nullable = true)
 |-- hasSumstats: boolean (nullable = true)