Skip to content

biosample_index

gentropy.biosample_index.BiosampleIndexStep

Biosample index step.

This step generates a Biosample index dataset from the various ontology sources. Currently Cell Ontology and Uberon are supported.

Source code in src/gentropy/biosample_index.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
class BiosampleIndexStep:
    """Biosample index step.

    This step generates a Biosample index dataset from the various ontology sources. Currently Cell Ontology and Uberon are supported.
    """

    def __init__(
        self,
        session: Session,
        cell_ontology_input_path: str,
        uberon_input_path: str,
        efo_input_path: str,
        biosample_index_path: str,
    ) -> None:
        """Run Biosample index generation step.

        Args:
            session (Session): Session object.
            cell_ontology_input_path (str): Input cell ontology dataset path.
            uberon_input_path (str): Input uberon dataset path.
            efo_input_path (str): Input efo dataset path.
            biosample_index_path (str): Output gene index dataset path.
        """
        cell_ontology_index = extract_ontology_from_json(
            cell_ontology_input_path, session.spark
        )
        uberon_index = extract_ontology_from_json(uberon_input_path, session.spark)
        efo_index = extract_ontology_from_json(
            efo_input_path, session.spark
        ).retain_rows_with_ancestor_id(["CL_0000000"])

        biosample_index = cell_ontology_index.merge_indices([uberon_index, efo_index])

        biosample_index.df.coalesce(session.output_partitions).write.mode(
            session.write_mode
        ).parquet(biosample_index_path)

__init__(session: Session, cell_ontology_input_path: str, uberon_input_path: str, efo_input_path: str, biosample_index_path: str) -> None

Run Biosample index generation step.

Parameters:

Name Type Description Default
session Session

Session object.

required
cell_ontology_input_path str

Input cell ontology dataset path.

required
uberon_input_path str

Input uberon dataset path.

required
efo_input_path str

Input efo dataset path.

required
biosample_index_path str

Output gene index dataset path.

required
Source code in src/gentropy/biosample_index.py
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
def __init__(
    self,
    session: Session,
    cell_ontology_input_path: str,
    uberon_input_path: str,
    efo_input_path: str,
    biosample_index_path: str,
) -> None:
    """Run Biosample index generation step.

    Args:
        session (Session): Session object.
        cell_ontology_input_path (str): Input cell ontology dataset path.
        uberon_input_path (str): Input uberon dataset path.
        efo_input_path (str): Input efo dataset path.
        biosample_index_path (str): Output gene index dataset path.
    """
    cell_ontology_index = extract_ontology_from_json(
        cell_ontology_input_path, session.spark
    )
    uberon_index = extract_ontology_from_json(uberon_input_path, session.spark)
    efo_index = extract_ontology_from_json(
        efo_input_path, session.spark
    ).retain_rows_with_ancestor_id(["CL_0000000"])

    biosample_index = cell_ontology_index.merge_indices([uberon_index, efo_index])

    biosample_index.df.coalesce(session.output_partitions).write.mode(
        session.write_mode
    ).parquet(biosample_index_path)