Skip to content

L2G Model

gentropy.method.l2g.model.LocusToGeneModel dataclass

Wrapper for the Locus to Gene classifier.

Source code in src/gentropy/method/l2g/model.py
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
@dataclass
class LocusToGeneModel:
    """Wrapper for the Locus to Gene classifier."""

    model: Any = GradientBoostingClassifier(random_state=42)
    features_list: list[str] = field(default_factory=list)
    hyperparameters: dict[str, Any] = field(
        default_factory=lambda: {
            "n_estimators": 100,
            "max_depth": 10,
            "ccp_alpha": 0,
            "learning_rate": 0.1,
            "min_samples_leaf": 5,
            "min_samples_split": 5,
            "subsample": 1,
        }
    )
    training_data: L2GFeatureMatrix | None = None
    label_encoder: dict[str, int] = field(
        default_factory=lambda: {
            "negative": 0,
            "positive": 1,
        }
    )

    def __post_init__(self: LocusToGeneModel) -> None:
        """Post-initialisation to fit the estimator with the provided params."""
        self.model.set_params(**self.hyperparameters_dict)

    @classmethod
    def load_from_disk(
        cls: type[LocusToGeneModel],
        session: Session,
        path: str,
        model_name: str = "classifier.skops",
        **kwargs: Any,
    ) -> LocusToGeneModel:
        """Load a fitted model from disk.

        Args:
            session (Session): Session object that loads the training data
            path (str): Path to the directory containing model and metadata
            model_name (str): Name of the persisted model to load. Defaults to "classifier.skops".
            **kwargs(Any): Keyword arguments to pass to the constructor

        Returns:
            LocusToGeneModel: L2G model loaded from disk

        Raises:
            ValueError: If the model has not been fitted yet
        """
        model_path = (Path(path) / model_name).as_posix()
        if model_path.startswith("gs://"):
            path = model_path.removeprefix("gs://")
            bucket_name = path.split("/")[0]
            blob_name = "/".join(path.split("/")[1:])
            from google.cloud import storage

            client = storage.Client()
            bucket = storage.Bucket(client=client, name=bucket_name)
            blob = storage.Blob(name=blob_name, bucket=bucket)
            data = blob.download_as_string(client=client)
            loaded_model = sio.loads(data, trusted=sio.get_untrusted_types(data=data))
        else:
            loaded_model = sio.load(
                model_path, trusted=sio.get_untrusted_types(file=model_path)
            )
            try:
                # Try loading the training data if it is in the model directory
                training_data = L2GFeatureMatrix(
                    _df=session.spark.createDataFrame(
                        # Parquet is read with Pandas to easily read local files
                        pd.read_parquet(
                            (Path(path) / "training_data.parquet").as_posix()
                        )
                    ),
                    features_list=kwargs.get("features_list"),
                )
            except Exception as e:
                logging.error("Training data set to none. Error: %s", e)
                training_data = None

        if not loaded_model._is_fitted():
            raise ValueError("Model has not been fitted yet.")
        return cls(model=loaded_model, training_data=training_data, **kwargs)

    @classmethod
    def load_from_hub(
        cls: type[LocusToGeneModel],
        session: Session,
        hf_model_id: str,
        hf_model_version: str | None = None,
        hf_token: str | None = None,
    ) -> LocusToGeneModel:
        """Load a model from the Hugging Face Hub. This will download the model from the hub and load it from disk.

        Args:
            session (Session): Session object to load the training data
            hf_model_id (str): Model ID on the Hugging Face Hub
            hf_model_version (str | None): Tag, branch, or commit hash to download the model from the Hub. If None, the latest commit is downloaded.
            hf_token (str | None): Hugging Face Hub token to download the model (only required if private)

        Returns:
            LocusToGeneModel: L2G model loaded from the Hugging Face Hub
        """

        def get_features_list_from_metadata() -> list[str]:
            """Get the features list (in the right order) from the metadata JSON file downloaded from the Hub.

            Returns:
                list[str]: Features list
            """
            import json

            model_config_path = str(Path(local_path) / "config.json")
            with open(model_config_path) as f:
                model_config = json.load(f)
            return [
                column
                for column in model_config["sklearn"]["columns"]
                if column
                not in [
                    "studyLocusId",
                    "geneId",
                    "traitFromSourceMappedId",
                    "goldStandardSet",
                ]
            ]

        local_path = hf_model_id
        hub_utils.download(
            repo_id=hf_model_id,
            dst=local_path,
            token=hf_token,
            revision=hf_model_version,
        )
        features_list = get_features_list_from_metadata()
        return cls.load_from_disk(
            session,
            local_path,
            features_list=features_list,
        )

    @property
    def hyperparameters_dict(self) -> dict[str, Any]:
        """Return hyperparameters as a dictionary.

        Returns:
            dict[str, Any]: Hyperparameters

        Raises:
            ValueError: If hyperparameters have not been set
        """
        if not self.hyperparameters:
            raise ValueError("Hyperparameters have not been set.")
        elif isinstance(self.hyperparameters, dict):
            return self.hyperparameters
        return self.hyperparameters.default_factory()

    def predict(
        self: LocusToGeneModel,
        feature_matrix: L2GFeatureMatrix,
        session: Session,
    ) -> L2GPrediction:
        """Apply the model to a given feature matrix dataframe. The feature matrix needs to be preprocessed first.

        Args:
            feature_matrix (L2GFeatureMatrix): Feature matrix to apply the model to.
            session (Session): Session object to convert data to Spark

        Returns:
            L2GPrediction: Dataset containing credible sets and their L2G scores
        """
        from gentropy.dataset.l2g_prediction import L2GPrediction

        pd_dataframe.iteritems = pd_dataframe.items

        feature_matrix_pdf = feature_matrix._df.toPandas()
        # L2G score is the probability the classifier assigns to the positive class (the second element in the probability array)
        feature_matrix_pdf["score"] = self.model.predict_proba(
            # We drop the fixed columns to only pass the feature values to the classifier
            feature_matrix_pdf.drop(feature_matrix.fixed_cols, axis=1)
            .apply(pd_to_numeric)
            .values
        )[:, 1]
        output_cols = [field.name for field in L2GPrediction.get_schema().fields]
        return L2GPrediction(
            _df=session.spark.createDataFrame(feature_matrix_pdf.filter(output_cols)),
            _schema=L2GPrediction.get_schema(),
            model=self,
        )

    def save(self: LocusToGeneModel, path: str) -> None:
        """Saves fitted model to disk using the skops persistence format.

        Args:
            path (str): Path to save the persisted model. Should end with .skops

        Raises:
            ValueError: If the model has not been fitted yet or if the path does not end with .skops
        """
        if self.model is None:
            raise ValueError("Model has not been fitted yet.")
        if not path.endswith(".skops"):
            raise ValueError("Path should end with .skops")
        if path.startswith("gs://"):
            local_path = path.split("/")[-1]
            sio.dump(self.model, local_path)
            copy_to_gcs(local_path, path)
        else:
            # create directory if path does not exist
            Path(path).parent.mkdir(parents=True, exist_ok=True)
            sio.dump(self.model, path)

    @staticmethod
    def load_feature_matrix_from_wandb(wandb_run_name: str) -> pd.DataFrame:
        """Loads dataset of feature matrix used during a wandb run.

        Args:
            wandb_run_name (str): Name of the wandb run to load the feature matrix from

        Returns:
            pd.DataFrame: Feature matrix used during the wandb run
        """
        with open(wandb_run_name) as f:
            raw_data = json.load(f)

        data = raw_data["data"]
        columns = raw_data["columns"]
        return pd.DataFrame(data, columns=columns)

    def _create_hugging_face_model_card(
        self: LocusToGeneModel,
        local_repo: str,
    ) -> None:
        """Create a model card to document the model in the hub. The model card is saved in the local repo before pushing it to the hub.

        Args:
            local_repo (str): Path to the folder where the README file will be saved to be pushed to the Hugging Face Hub
        """
        from skops import card

        # Define card metadata
        description = """The locus-to-gene (L2G) model derives features to prioritise likely causal genes at each GWAS locus based on genetic and functional genomics features. The main categories of predictive features are:

        - Distance: (from credible set variants to gene)
        - Molecular QTL Colocalization
        - Variant Pathogenicity: (from VEP)

        More information at: https://opentargets.github.io/gentropy/python_api/methods/l2g/_l2g/
        """
        how_to = """To use the model, you can load it using the `LocusToGeneModel.load_from_hub` method. This will return a `LocusToGeneModel` object that can be used to make predictions on a feature matrix.
        The model can then be used to make predictions using the `predict` method.

        More information can be found at: https://opentargets.github.io/gentropy/python_api/methods/l2g/model/
        """
        model_card = card.Card(
            self.model,
            metadata=card.metadata_from_config(Path(local_repo)),
        )
        model_card.add(
            **{
                "Model description": description,
                "Model description/Training Procedure": "Gradient Boosting Classifier",
                "How to Get Started with the Model": how_to,
                "Model Card Authors": "Open Targets",
                "License": "MIT",
                "Citation": "https://doi.org/10.1038/s41588-021-00945-5",
            }
        )
        model_card.delete("Model description/Training Procedure/Model Plot")
        model_card.delete("Model description/Evaluation Results")
        model_card.delete("Model Card Authors")
        model_card.delete("Model Card Contact")
        model_card.save(Path(local_repo) / "README.md")

    def export_to_hugging_face_hub(
        self: LocusToGeneModel,
        model_path: str,
        hf_hub_token: str,
        data: pd_dataframe,
        commit_message: str,
        repo_id: str = "opentargets/locus_to_gene",
        local_repo: str = "locus_to_gene",
    ) -> None:
        """Share the model and training dataset on Hugging Face Hub.

        Args:
            model_path (str): The path to the L2G model file.
            hf_hub_token (str): Hugging Face Hub token
            data (pd_dataframe): Data used to train the model. This is used to have an example input for the model and to store the column order.
            commit_message (str): Commit message for the push
            repo_id (str): The Hugging Face Hub repo id where the model will be stored.
            local_repo (str): Path to the folder where the contents of the model repo + the documentation are located. This is used to push the model to the Hugging Face Hub.

        Raises:
            RuntimeError: If the push to the Hugging Face Hub fails
        """
        from sklearn import __version__ as sklearn_version

        try:
            hub_utils.init(
                model=model_path,
                requirements=[f"scikit-learn={sklearn_version}"],
                dst=local_repo,
                task="tabular-classification",
                data=data,
            )
            self._create_hugging_face_model_card(local_repo)
            data.to_parquet(f"{local_repo}/training_data.parquet")
            hub_utils.push(
                repo_id=repo_id,
                source=local_repo,
                token=hf_hub_token,
                commit_message=commit_message,
                create_remote=True,
            )
        except Exception as e:
            # remove the local repo if the push fails
            if Path(local_repo).exists():
                for p in Path(local_repo).glob("*"):
                    p.unlink()
                Path(local_repo).rmdir()
            raise RuntimeError from e

hyperparameters_dict: dict[str, Any] property

Return hyperparameters as a dictionary.

Returns:

Type Description
dict[str, Any]

dict[str, Any]: Hyperparameters

Raises:

Type Description
ValueError

If hyperparameters have not been set

export_to_hugging_face_hub(model_path: str, hf_hub_token: str, data: pd_dataframe, commit_message: str, repo_id: str = 'opentargets/locus_to_gene', local_repo: str = 'locus_to_gene') -> None

Share the model and training dataset on Hugging Face Hub.

Parameters:

Name Type Description Default
model_path str

The path to the L2G model file.

required
hf_hub_token str

Hugging Face Hub token

required
data DataFrame

Data used to train the model. This is used to have an example input for the model and to store the column order.

required
commit_message str

Commit message for the push

required
repo_id str

The Hugging Face Hub repo id where the model will be stored.

'opentargets/locus_to_gene'
local_repo str

Path to the folder where the contents of the model repo + the documentation are located. This is used to push the model to the Hugging Face Hub.

'locus_to_gene'

Raises:

Type Description
RuntimeError

If the push to the Hugging Face Hub fails

Source code in src/gentropy/method/l2g/model.py
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
def export_to_hugging_face_hub(
    self: LocusToGeneModel,
    model_path: str,
    hf_hub_token: str,
    data: pd_dataframe,
    commit_message: str,
    repo_id: str = "opentargets/locus_to_gene",
    local_repo: str = "locus_to_gene",
) -> None:
    """Share the model and training dataset on Hugging Face Hub.

    Args:
        model_path (str): The path to the L2G model file.
        hf_hub_token (str): Hugging Face Hub token
        data (pd_dataframe): Data used to train the model. This is used to have an example input for the model and to store the column order.
        commit_message (str): Commit message for the push
        repo_id (str): The Hugging Face Hub repo id where the model will be stored.
        local_repo (str): Path to the folder where the contents of the model repo + the documentation are located. This is used to push the model to the Hugging Face Hub.

    Raises:
        RuntimeError: If the push to the Hugging Face Hub fails
    """
    from sklearn import __version__ as sklearn_version

    try:
        hub_utils.init(
            model=model_path,
            requirements=[f"scikit-learn={sklearn_version}"],
            dst=local_repo,
            task="tabular-classification",
            data=data,
        )
        self._create_hugging_face_model_card(local_repo)
        data.to_parquet(f"{local_repo}/training_data.parquet")
        hub_utils.push(
            repo_id=repo_id,
            source=local_repo,
            token=hf_hub_token,
            commit_message=commit_message,
            create_remote=True,
        )
    except Exception as e:
        # remove the local repo if the push fails
        if Path(local_repo).exists():
            for p in Path(local_repo).glob("*"):
                p.unlink()
            Path(local_repo).rmdir()
        raise RuntimeError from e

load_feature_matrix_from_wandb(wandb_run_name: str) -> pd.DataFrame staticmethod

Loads dataset of feature matrix used during a wandb run.

Parameters:

Name Type Description Default
wandb_run_name str

Name of the wandb run to load the feature matrix from

required

Returns:

Type Description
DataFrame

pd.DataFrame: Feature matrix used during the wandb run

Source code in src/gentropy/method/l2g/model.py
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
@staticmethod
def load_feature_matrix_from_wandb(wandb_run_name: str) -> pd.DataFrame:
    """Loads dataset of feature matrix used during a wandb run.

    Args:
        wandb_run_name (str): Name of the wandb run to load the feature matrix from

    Returns:
        pd.DataFrame: Feature matrix used during the wandb run
    """
    with open(wandb_run_name) as f:
        raw_data = json.load(f)

    data = raw_data["data"]
    columns = raw_data["columns"]
    return pd.DataFrame(data, columns=columns)

load_from_disk(session: Session, path: str, model_name: str = 'classifier.skops', **kwargs: Any) -> LocusToGeneModel classmethod

Load a fitted model from disk.

Parameters:

Name Type Description Default
session Session

Session object that loads the training data

required
path str

Path to the directory containing model and metadata

required
model_name str

Name of the persisted model to load. Defaults to "classifier.skops".

'classifier.skops'
**kwargs(Any)

Keyword arguments to pass to the constructor

required

Returns:

Name Type Description
LocusToGeneModel LocusToGeneModel

L2G model loaded from disk

Raises:

Type Description
ValueError

If the model has not been fitted yet

Source code in src/gentropy/method/l2g/model.py
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
@classmethod
def load_from_disk(
    cls: type[LocusToGeneModel],
    session: Session,
    path: str,
    model_name: str = "classifier.skops",
    **kwargs: Any,
) -> LocusToGeneModel:
    """Load a fitted model from disk.

    Args:
        session (Session): Session object that loads the training data
        path (str): Path to the directory containing model and metadata
        model_name (str): Name of the persisted model to load. Defaults to "classifier.skops".
        **kwargs(Any): Keyword arguments to pass to the constructor

    Returns:
        LocusToGeneModel: L2G model loaded from disk

    Raises:
        ValueError: If the model has not been fitted yet
    """
    model_path = (Path(path) / model_name).as_posix()
    if model_path.startswith("gs://"):
        path = model_path.removeprefix("gs://")
        bucket_name = path.split("/")[0]
        blob_name = "/".join(path.split("/")[1:])
        from google.cloud import storage

        client = storage.Client()
        bucket = storage.Bucket(client=client, name=bucket_name)
        blob = storage.Blob(name=blob_name, bucket=bucket)
        data = blob.download_as_string(client=client)
        loaded_model = sio.loads(data, trusted=sio.get_untrusted_types(data=data))
    else:
        loaded_model = sio.load(
            model_path, trusted=sio.get_untrusted_types(file=model_path)
        )
        try:
            # Try loading the training data if it is in the model directory
            training_data = L2GFeatureMatrix(
                _df=session.spark.createDataFrame(
                    # Parquet is read with Pandas to easily read local files
                    pd.read_parquet(
                        (Path(path) / "training_data.parquet").as_posix()
                    )
                ),
                features_list=kwargs.get("features_list"),
            )
        except Exception as e:
            logging.error("Training data set to none. Error: %s", e)
            training_data = None

    if not loaded_model._is_fitted():
        raise ValueError("Model has not been fitted yet.")
    return cls(model=loaded_model, training_data=training_data, **kwargs)

load_from_hub(session: Session, hf_model_id: str, hf_model_version: str | None = None, hf_token: str | None = None) -> LocusToGeneModel classmethod

Load a model from the Hugging Face Hub. This will download the model from the hub and load it from disk.

Parameters:

Name Type Description Default
session Session

Session object to load the training data

required
hf_model_id str

Model ID on the Hugging Face Hub

required
hf_model_version str | None

Tag, branch, or commit hash to download the model from the Hub. If None, the latest commit is downloaded.

None
hf_token str | None

Hugging Face Hub token to download the model (only required if private)

None

Returns:

Name Type Description
LocusToGeneModel LocusToGeneModel

L2G model loaded from the Hugging Face Hub

Source code in src/gentropy/method/l2g/model.py
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
@classmethod
def load_from_hub(
    cls: type[LocusToGeneModel],
    session: Session,
    hf_model_id: str,
    hf_model_version: str | None = None,
    hf_token: str | None = None,
) -> LocusToGeneModel:
    """Load a model from the Hugging Face Hub. This will download the model from the hub and load it from disk.

    Args:
        session (Session): Session object to load the training data
        hf_model_id (str): Model ID on the Hugging Face Hub
        hf_model_version (str | None): Tag, branch, or commit hash to download the model from the Hub. If None, the latest commit is downloaded.
        hf_token (str | None): Hugging Face Hub token to download the model (only required if private)

    Returns:
        LocusToGeneModel: L2G model loaded from the Hugging Face Hub
    """

    def get_features_list_from_metadata() -> list[str]:
        """Get the features list (in the right order) from the metadata JSON file downloaded from the Hub.

        Returns:
            list[str]: Features list
        """
        import json

        model_config_path = str(Path(local_path) / "config.json")
        with open(model_config_path) as f:
            model_config = json.load(f)
        return [
            column
            for column in model_config["sklearn"]["columns"]
            if column
            not in [
                "studyLocusId",
                "geneId",
                "traitFromSourceMappedId",
                "goldStandardSet",
            ]
        ]

    local_path = hf_model_id
    hub_utils.download(
        repo_id=hf_model_id,
        dst=local_path,
        token=hf_token,
        revision=hf_model_version,
    )
    features_list = get_features_list_from_metadata()
    return cls.load_from_disk(
        session,
        local_path,
        features_list=features_list,
    )

predict(feature_matrix: L2GFeatureMatrix, session: Session) -> L2GPrediction

Apply the model to a given feature matrix dataframe. The feature matrix needs to be preprocessed first.

Parameters:

Name Type Description Default
feature_matrix L2GFeatureMatrix

Feature matrix to apply the model to.

required
session Session

Session object to convert data to Spark

required

Returns:

Name Type Description
L2GPrediction L2GPrediction

Dataset containing credible sets and their L2G scores

Source code in src/gentropy/method/l2g/model.py
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
def predict(
    self: LocusToGeneModel,
    feature_matrix: L2GFeatureMatrix,
    session: Session,
) -> L2GPrediction:
    """Apply the model to a given feature matrix dataframe. The feature matrix needs to be preprocessed first.

    Args:
        feature_matrix (L2GFeatureMatrix): Feature matrix to apply the model to.
        session (Session): Session object to convert data to Spark

    Returns:
        L2GPrediction: Dataset containing credible sets and their L2G scores
    """
    from gentropy.dataset.l2g_prediction import L2GPrediction

    pd_dataframe.iteritems = pd_dataframe.items

    feature_matrix_pdf = feature_matrix._df.toPandas()
    # L2G score is the probability the classifier assigns to the positive class (the second element in the probability array)
    feature_matrix_pdf["score"] = self.model.predict_proba(
        # We drop the fixed columns to only pass the feature values to the classifier
        feature_matrix_pdf.drop(feature_matrix.fixed_cols, axis=1)
        .apply(pd_to_numeric)
        .values
    )[:, 1]
    output_cols = [field.name for field in L2GPrediction.get_schema().fields]
    return L2GPrediction(
        _df=session.spark.createDataFrame(feature_matrix_pdf.filter(output_cols)),
        _schema=L2GPrediction.get_schema(),
        model=self,
    )

save(path: str) -> None

Saves fitted model to disk using the skops persistence format.

Parameters:

Name Type Description Default
path str

Path to save the persisted model. Should end with .skops

required

Raises:

Type Description
ValueError

If the model has not been fitted yet or if the path does not end with .skops

Source code in src/gentropy/method/l2g/model.py
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
def save(self: LocusToGeneModel, path: str) -> None:
    """Saves fitted model to disk using the skops persistence format.

    Args:
        path (str): Path to save the persisted model. Should end with .skops

    Raises:
        ValueError: If the model has not been fitted yet or if the path does not end with .skops
    """
    if self.model is None:
        raise ValueError("Model has not been fitted yet.")
    if not path.endswith(".skops"):
        raise ValueError("Path should end with .skops")
    if path.startswith("gs://"):
        local_path = path.split("/")[-1]
        sio.dump(self.model, local_path)
        copy_to_gcs(local_path, path)
    else:
        # create directory if path does not exist
        Path(path).parent.mkdir(parents=True, exist_ok=True)
        sio.dump(self.model, path)