Skip to content

gene_index

gentropy.gene_index.GeneIndexStep

Gene index step.

This step generates a gene index dataset from an Open Targets Platform target dataset.

Source code in src/gentropy/gene_index.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
class GeneIndexStep:
    """Gene index step.

    This step generates a gene index dataset from an Open Targets Platform target dataset.
    """

    def __init__(
        self,
        session: Session,
        target_path: str,
        gene_index_path: str,
    ) -> None:
        """Initialize step.

        Args:
            session (Session): Session object.
            target_path (str): Input Open Targets Platform target dataset path.
            gene_index_path (str): Output gene index dataset path.
        """
        platform_target = session.spark.read.parquet(target_path)
        # Transform
        gene_index = OpenTargetsTarget.as_gene_index(platform_target)
        # Load
        gene_index.df.coalesce(session.output_partitions).write.mode(
            session.write_mode
        ).parquet(gene_index_path)

__init__(session: Session, target_path: str, gene_index_path: str) -> None

Initialize step.

Parameters:

Name Type Description Default
session Session

Session object.

required
target_path str

Input Open Targets Platform target dataset path.

required
gene_index_path str

Output gene index dataset path.

required
Source code in src/gentropy/gene_index.py
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
def __init__(
    self,
    session: Session,
    target_path: str,
    gene_index_path: str,
) -> None:
    """Initialize step.

    Args:
        session (Session): Session object.
        target_path (str): Input Open Targets Platform target dataset path.
        gene_index_path (str): Output gene index dataset path.
    """
    platform_target = session.spark.read.parquet(target_path)
    # Transform
    gene_index = OpenTargetsTarget.as_gene_index(platform_target)
    # Load
    gene_index.df.coalesce(session.output_partitions).write.mode(
        session.write_mode
    ).parquet(gene_index_path)