Skip to content

L2g features

Abstract Class

gentropy.dataset.l2g_features.l2g_feature.L2GFeature dataclass

Bases: Dataset, ABC

Locus-to-gene feature dataset that serves as template to generate each of the features that inform about locus to gene assignments.

Source code in src/gentropy/dataset/l2g_features/l2g_feature.py
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
@dataclass
class L2GFeature(Dataset, ABC):
    """Locus-to-gene feature dataset that serves as template to generate each of the features that inform about locus to gene assignments."""

    def __post_init__(
        self: L2GFeature,
        feature_dependency_type: Any = None,
        credible_set: StudyLocus | None = None,
    ) -> None:
        """Initializes a L2GFeature dataset. Any child class of L2GFeature must implement the `compute` method.

        Args:
            feature_dependency_type (Any): The dependency that the L2GFeature dataset depends on. Defaults to None.
            credible_set (StudyLocus | None): The credible set that the L2GFeature dataset is based on. Defaults to None.
        """
        super().__post_init__()
        self.feature_dependency_type = feature_dependency_type
        self.credible_set = credible_set

    @classmethod
    def get_schema(cls: type[L2GFeature]) -> StructType:
        """Provides the schema for the L2GFeature dataset.

        Returns:
            StructType: Schema for the L2GFeature dataset
        """
        return parse_spark_schema("l2g_feature.json")

    @classmethod
    @abstractmethod
    def compute(
        cls: type[L2GFeature],
        study_loci_to_annotate: StudyLocus | L2GGoldStandard,
        feature_dependency: Any,
    ) -> L2GFeature:
        """Computes the L2GFeature dataset.

        Args:
            study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation
            feature_dependency (Any): The dependency that the L2GFeature class needs to compute the feature
        Returns:
            L2GFeature: a L2GFeature dataset

        Raises:
                NotImplementedError: This method must be implemented in the child classes
        """
        raise NotImplementedError("Must be implemented in the child classes")

compute(study_loci_to_annotate: StudyLocus | L2GGoldStandard, feature_dependency: Any) -> L2GFeature abstractmethod classmethod

Computes the L2GFeature dataset.

Parameters:

Name Type Description Default
study_loci_to_annotate StudyLocus | L2GGoldStandard

The dataset containing study loci that will be used for annotation

required
feature_dependency Any

The dependency that the L2GFeature class needs to compute the feature

required

Returns: L2GFeature: a L2GFeature dataset

Raises:

Type Description
NotImplementedError

This method must be implemented in the child classes

Source code in src/gentropy/dataset/l2g_features/l2g_feature.py
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
@classmethod
@abstractmethod
def compute(
    cls: type[L2GFeature],
    study_loci_to_annotate: StudyLocus | L2GGoldStandard,
    feature_dependency: Any,
) -> L2GFeature:
    """Computes the L2GFeature dataset.

    Args:
        study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation
        feature_dependency (Any): The dependency that the L2GFeature class needs to compute the feature
    Returns:
        L2GFeature: a L2GFeature dataset

    Raises:
            NotImplementedError: This method must be implemented in the child classes
    """
    raise NotImplementedError("Must be implemented in the child classes")

get_schema() -> StructType classmethod

Provides the schema for the L2GFeature dataset.

Returns:

Name Type Description
StructType StructType

Schema for the L2GFeature dataset

Source code in src/gentropy/dataset/l2g_features/l2g_feature.py
38
39
40
41
42
43
44
45
@classmethod
def get_schema(cls: type[L2GFeature]) -> StructType:
    """Provides the schema for the L2GFeature dataset.

    Returns:
        StructType: Schema for the L2GFeature dataset
    """
    return parse_spark_schema("l2g_feature.json")

Schema

root
 |-- studyLocusId: string (nullable = false)
 |-- geneId: string (nullable = false)
 |-- featureName: string (nullable = false)
 |-- featureValue: float (nullable = false)