Skip to content

L2G Feature Factory

gentropy.method.l2g.feature_factory.FeatureFactory

Factory class for creating features.

Source code in src/gentropy/method/l2g/feature_factory.py
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
class FeatureFactory:
    """Factory class for creating features."""

    feature_mapper: Mapping[str, type[L2GFeature]] = {
        "distanceSentinelTss": DistanceSentinelTssFeature,
        "distanceSentinelTssNeighbourhood": DistanceSentinelTssNeighbourhoodFeature,
        "distanceSentinelFootprint": DistanceSentinelFootprintFeature,
        "distanceSentinelFootprintNeighbourhood": DistanceSentinelFootprintNeighbourhoodFeature,
        "distanceTssMean": DistanceTssMeanFeature,
        "distanceTssMeanNeighbourhood": DistanceTssMeanNeighbourhoodFeature,
        "distanceFootprintMean": DistanceFootprintMeanFeature,
        "distanceFootprintMeanNeighbourhood": DistanceFootprintMeanNeighbourhoodFeature,
        "eQtlColocClppMaximum": EQtlColocClppMaximumFeature,
        "eQtlColocClppMaximumNeighbourhood": EQtlColocClppMaximumNeighbourhoodFeature,
        "pQtlColocClppMaximum": PQtlColocClppMaximumFeature,
        "pQtlColocClppMaximumNeighbourhood": PQtlColocClppMaximumNeighbourhoodFeature,
        "sQtlColocClppMaximum": SQtlColocClppMaximumFeature,
        "sQtlColocClppMaximumNeighbourhood": SQtlColocClppMaximumNeighbourhoodFeature,
        "eQtlColocH4Maximum": EQtlColocH4MaximumFeature,
        "eQtlColocH4MaximumNeighbourhood": EQtlColocH4MaximumNeighbourhoodFeature,
        "pQtlColocH4Maximum": PQtlColocH4MaximumFeature,
        "pQtlColocH4MaximumNeighbourhood": PQtlColocH4MaximumNeighbourhoodFeature,
        "sQtlColocH4Maximum": SQtlColocH4MaximumFeature,
        "sQtlColocH4MaximumNeighbourhood": SQtlColocH4MaximumNeighbourhoodFeature,
        "vepMean": VepMeanFeature,
        "vepMeanNeighbourhood": VepMeanNeighbourhoodFeature,
        "vepMaximum": VepMaximumFeature,
        "vepMaximumNeighbourhood": VepMaximumNeighbourhoodFeature,
        "geneCount500kb": GeneCountFeature,
        "proteinGeneCount500kb": ProteinGeneCountFeature,
        "isProteinCoding": ProteinCodingFeature,
        "credibleSetConfidence": CredibleSetConfidenceFeature,
    }

    def __init__(
        self: FeatureFactory,
        study_loci_to_annotate: StudyLocus | L2GGoldStandard,
        features_list: list[str],
    ) -> None:
        """Initializes the factory.

        Args:
            study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation
            features_list (list[str]): list of features to compute.
        """
        self.study_loci_to_annotate = study_loci_to_annotate
        self.features_list = features_list

    def generate_features(
        self: FeatureFactory,
        features_input_loader: L2GFeatureInputLoader,
    ) -> list[L2GFeature]:
        """Generates a feature matrix by reading an object with instructions on how to create the features.

        Args:
            features_input_loader (L2GFeatureInputLoader): object with required features dependencies.

        Returns:
            list[L2GFeature]: list of computed features.

        Raises:
            ValueError: If feature not found.
        """
        computed_features = []
        for feature in self.features_list:
            if feature in self.feature_mapper:
                computed_features.append(
                    self.compute_feature(feature, features_input_loader)
                )
            else:
                raise ValueError(f"Feature {feature} not found.")
        return computed_features

    def compute_feature(
        self: FeatureFactory,
        feature_name: str,
        features_input_loader: L2GFeatureInputLoader,
    ) -> L2GFeature:
        """Instantiates feature class.

        Args:
            feature_name (str): name of the feature
            features_input_loader (L2GFeatureInputLoader): Object that contais features input.

        Returns:
            L2GFeature: instantiated feature object
        """
        # Extract feature class and dependency type
        feature_cls = self.feature_mapper[feature_name]
        feature_dependency_type = feature_cls.feature_dependency_type
        return feature_cls.compute(
            study_loci_to_annotate=self.study_loci_to_annotate,
            feature_dependency=features_input_loader.get_dependency_by_type(
                feature_dependency_type
            ),
        )

__init__(study_loci_to_annotate: StudyLocus | L2GGoldStandard, features_list: list[str]) -> None

Initializes the factory.

Parameters:

Name Type Description Default
study_loci_to_annotate StudyLocus | L2GGoldStandard

The dataset containing study loci that will be used for annotation

required
features_list list[str]

list of features to compute.

required
Source code in src/gentropy/method/l2g/feature_factory.py
135
136
137
138
139
140
141
142
143
144
145
146
147
def __init__(
    self: FeatureFactory,
    study_loci_to_annotate: StudyLocus | L2GGoldStandard,
    features_list: list[str],
) -> None:
    """Initializes the factory.

    Args:
        study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation
        features_list (list[str]): list of features to compute.
    """
    self.study_loci_to_annotate = study_loci_to_annotate
    self.features_list = features_list

compute_feature(feature_name: str, features_input_loader: L2GFeatureInputLoader) -> L2GFeature

Instantiates feature class.

Parameters:

Name Type Description Default
feature_name str

name of the feature

required
features_input_loader L2GFeatureInputLoader

Object that contais features input.

required

Returns:

Name Type Description
L2GFeature L2GFeature

instantiated feature object

Source code in src/gentropy/method/l2g/feature_factory.py
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
def compute_feature(
    self: FeatureFactory,
    feature_name: str,
    features_input_loader: L2GFeatureInputLoader,
) -> L2GFeature:
    """Instantiates feature class.

    Args:
        feature_name (str): name of the feature
        features_input_loader (L2GFeatureInputLoader): Object that contais features input.

    Returns:
        L2GFeature: instantiated feature object
    """
    # Extract feature class and dependency type
    feature_cls = self.feature_mapper[feature_name]
    feature_dependency_type = feature_cls.feature_dependency_type
    return feature_cls.compute(
        study_loci_to_annotate=self.study_loci_to_annotate,
        feature_dependency=features_input_loader.get_dependency_by_type(
            feature_dependency_type
        ),
    )

generate_features(features_input_loader: L2GFeatureInputLoader) -> list[L2GFeature]

Generates a feature matrix by reading an object with instructions on how to create the features.

Parameters:

Name Type Description Default
features_input_loader L2GFeatureInputLoader

object with required features dependencies.

required

Returns:

Type Description
list[L2GFeature]

list[L2GFeature]: list of computed features.

Raises:

Type Description
ValueError

If feature not found.

Source code in src/gentropy/method/l2g/feature_factory.py
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
def generate_features(
    self: FeatureFactory,
    features_input_loader: L2GFeatureInputLoader,
) -> list[L2GFeature]:
    """Generates a feature matrix by reading an object with instructions on how to create the features.

    Args:
        features_input_loader (L2GFeatureInputLoader): object with required features dependencies.

    Returns:
        list[L2GFeature]: list of computed features.

    Raises:
        ValueError: If feature not found.
    """
    computed_features = []
    for feature in self.features_list:
        if feature in self.feature_mapper:
            computed_features.append(
                self.compute_feature(feature, features_input_loader)
            )
        else:
            raise ValueError(f"Feature {feature} not found.")
    return computed_features

gentropy.method.l2g.feature_factory.L2GFeatureInputLoader

Loads all input datasets required for the L2GFeature dataset.

Source code in src/gentropy/method/l2g/feature_factory.py
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
class L2GFeatureInputLoader:
    """Loads all input datasets required for the L2GFeature dataset."""

    def __init__(
        self,
        **kwargs: Any,
    ) -> None:
        """Initializes L2GFeatureInputLoader with the provided inputs and returns loaded dependencies as a dictionary.

        Args:
            **kwargs (Any): keyword arguments with the name of the dependency and the dependency itself.
        """
        self.input_dependencies = {k: v for k, v in kwargs.items() if v is not None}

    def get_dependency_by_type(
        self, dependency_type: list[Any] | Any
    ) -> dict[str, Any]:
        """Returns the dependency that matches the provided type.

        Args:
            dependency_type (list[Any] | Any): type(s) of the dependency to return.

        Returns:
            dict[str, Any]: dictionary of dependenci(es) that match the provided type(s).
        """
        if not isinstance(dependency_type, list):
            dependency_type = [dependency_type]
        return {
            k: v
            for k, v in self.input_dependencies.items()
            if isinstance(v, tuple(dependency_type))
        }

    def __iter__(self) -> Iterator[tuple[str, Any]]:
        """Make the class iterable, returning an iterator over key-value pairs.

        Returns:
            Iterator[tuple[str, Any]]: iterator over the dictionary's key-value pairs.
        """
        return iter(self.input_dependencies.items())

    def __repr__(self) -> str:
        """Return a string representation of the input dependencies.

        Useful for understanding the loader content without having to print the object attribute.

        Returns:
            str: string representation of the input dependencies.
        """
        return repr(self.input_dependencies)

__init__(**kwargs: Any) -> None

Initializes L2GFeatureInputLoader with the provided inputs and returns loaded dependencies as a dictionary.

Parameters:

Name Type Description Default
**kwargs Any

keyword arguments with the name of the dependency and the dependency itself.

{}
Source code in src/gentropy/method/l2g/feature_factory.py
52
53
54
55
56
57
58
59
60
61
def __init__(
    self,
    **kwargs: Any,
) -> None:
    """Initializes L2GFeatureInputLoader with the provided inputs and returns loaded dependencies as a dictionary.

    Args:
        **kwargs (Any): keyword arguments with the name of the dependency and the dependency itself.
    """
    self.input_dependencies = {k: v for k, v in kwargs.items() if v is not None}

get_dependency_by_type(dependency_type: list[Any] | Any) -> dict[str, Any]

Returns the dependency that matches the provided type.

Parameters:

Name Type Description Default
dependency_type list[Any] | Any

type(s) of the dependency to return.

required

Returns:

Type Description
dict[str, Any]

dict[str, Any]: dictionary of dependenci(es) that match the provided type(s).

Source code in src/gentropy/method/l2g/feature_factory.py
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
def get_dependency_by_type(
    self, dependency_type: list[Any] | Any
) -> dict[str, Any]:
    """Returns the dependency that matches the provided type.

    Args:
        dependency_type (list[Any] | Any): type(s) of the dependency to return.

    Returns:
        dict[str, Any]: dictionary of dependenci(es) that match the provided type(s).
    """
    if not isinstance(dependency_type, list):
        dependency_type = [dependency_type]
    return {
        k: v
        for k, v in self.input_dependencies.items()
        if isinstance(v, tuple(dependency_type))
    }