Skip to content

VersionEngine

VersionEngine:

Version engine allows for registering datasource specific version seeker class to retrieve datasource version used as input to gentropy steps. Currently implemented only for GnomAD datasource.

This class can be then used to produce automation over output directory versioning.

gentropy.common.version_engine.VersionEngine

Seek version from the datasource.

Source code in src/gentropy/common/version_engine.py
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
class VersionEngine:
    """Seek version from the datasource."""

    def __init__(self, datasource: DataSourceType) -> None:
        """Initialize VersionEngine.

        Args:
            datasource (DataSourceType): datasource to seek the version from
        """
        self.datasource = datasource

    @staticmethod
    def version_seekers() -> dict[DataSourceType, DatasourceVersionSeeker]:
        """List version seekers.

        Returns:
            dict[DataSourceType, DatasourceVersionSeeker]: list of available data sources.
        """
        return {
            "gnomad": GnomADVersionSeeker(),
        }

    def seek(self, text: str | Path) -> str:
        """Interface for inferring the version from text by using registered data source version iner method.

        Args:
            text (str | Path): text to seek version from

        Returns:
            str: inferred version

        Raises:
            TypeError: if version can not be found in the text

        Examples:
            >>> VersionEngine("gnomad").seek("gs://gcp-public-data--gnomad/release/2.1.1/vcf/genomes/gnomad.genomes.r2.1.1.sites.vcf.bgz")
            '2.1.1'
        """
        match text:
            case Path() | str():
                text = str(text)
            case _:
                msg = f"Can not find version in {text}"
                raise TypeError(msg)
        infer_method = self._get_version_seek_method()
        return infer_method(text)

    def _get_version_seek_method(self) -> Callable[[str], str]:
        """Method that gets the version seeker for the datasource.

        Returns:
            Callable[[str], str]: Method to seek version based on the initialized datasource

        Raises:
            ValueError: if datasource is not registered in the list of version seekers
        """
        if self.datasource not in self.version_seekers():
            raise ValueError(f"Invalid datasource {self.datasource}")
        return self.version_seekers()[self.datasource].seek_version

    def amend_version(
        self, analysis_input_path: str | Path, analysis_output_path: str | Path
    ) -> str:
        """Amend version to the analysis output path if it is not already present.

        Path can be path to g3:// or Path object, absolute or relative.
        The analysis_input_path has to contain the version number.
        If the analysis_output_path contains the same version as inferred from input version already,
        then it will not be appended.

        Args:
            analysis_input_path (str | Path): step input path
            analysis_output_path (str | Path): step output path

        Returns:
            str: Path with the ammended version, does not return Path object!

        Examples:
            >>> VersionEngine("gnomad").amend_version("gs://gcp-public-data--gnomad/release/2.1.1/vcf/genomes/gnomad.genomes.r2.1.1.sites.vcf.bgz", "/some/path/without/version")
            '/some/path/without/version/2.1.1'
        """
        version = self.seek(analysis_input_path)
        output_path = str(analysis_output_path)
        if version in output_path:
            return output_path
        if output_path.endswith("/"):
            return f"{analysis_output_path}{version}"
        return f"{analysis_output_path}/{version}"

__init__(datasource: DataSourceType) -> None

Initialize VersionEngine.

Parameters:

Name Type Description Default
datasource DataSourceType

datasource to seek the version from

required
Source code in src/gentropy/common/version_engine.py
16
17
18
19
20
21
22
def __init__(self, datasource: DataSourceType) -> None:
    """Initialize VersionEngine.

    Args:
        datasource (DataSourceType): datasource to seek the version from
    """
    self.datasource = datasource

amend_version(analysis_input_path: str | Path, analysis_output_path: str | Path) -> str

Amend version to the analysis output path if it is not already present.

Path can be path to g3:// or Path object, absolute or relative. The analysis_input_path has to contain the version number. If the analysis_output_path contains the same version as inferred from input version already, then it will not be appended.

Parameters:

Name Type Description Default
analysis_input_path str | Path

step input path

required
analysis_output_path str | Path

step output path

required

Returns:

Name Type Description
str str

Path with the ammended version, does not return Path object!

Examples:

>>> VersionEngine("gnomad").amend_version("gs://gcp-public-data--gnomad/release/2.1.1/vcf/genomes/gnomad.genomes.r2.1.1.sites.vcf.bgz", "/some/path/without/version")
'/some/path/without/version/2.1.1'
Source code in src/gentropy/common/version_engine.py
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
def amend_version(
    self, analysis_input_path: str | Path, analysis_output_path: str | Path
) -> str:
    """Amend version to the analysis output path if it is not already present.

    Path can be path to g3:// or Path object, absolute or relative.
    The analysis_input_path has to contain the version number.
    If the analysis_output_path contains the same version as inferred from input version already,
    then it will not be appended.

    Args:
        analysis_input_path (str | Path): step input path
        analysis_output_path (str | Path): step output path

    Returns:
        str: Path with the ammended version, does not return Path object!

    Examples:
        >>> VersionEngine("gnomad").amend_version("gs://gcp-public-data--gnomad/release/2.1.1/vcf/genomes/gnomad.genomes.r2.1.1.sites.vcf.bgz", "/some/path/without/version")
        '/some/path/without/version/2.1.1'
    """
    version = self.seek(analysis_input_path)
    output_path = str(analysis_output_path)
    if version in output_path:
        return output_path
    if output_path.endswith("/"):
        return f"{analysis_output_path}{version}"
    return f"{analysis_output_path}/{version}"

seek(text: str | Path) -> str

Interface for inferring the version from text by using registered data source version iner method.

Parameters:

Name Type Description Default
text str | Path

text to seek version from

required

Returns:

Name Type Description
str str

inferred version

Raises:

Type Description
TypeError

if version can not be found in the text

Examples:

>>> VersionEngine("gnomad").seek("gs://gcp-public-data--gnomad/release/2.1.1/vcf/genomes/gnomad.genomes.r2.1.1.sites.vcf.bgz")
'2.1.1'
Source code in src/gentropy/common/version_engine.py
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
def seek(self, text: str | Path) -> str:
    """Interface for inferring the version from text by using registered data source version iner method.

    Args:
        text (str | Path): text to seek version from

    Returns:
        str: inferred version

    Raises:
        TypeError: if version can not be found in the text

    Examples:
        >>> VersionEngine("gnomad").seek("gs://gcp-public-data--gnomad/release/2.1.1/vcf/genomes/gnomad.genomes.r2.1.1.sites.vcf.bgz")
        '2.1.1'
    """
    match text:
        case Path() | str():
            text = str(text)
        case _:
            msg = f"Can not find version in {text}"
            raise TypeError(msg)
    infer_method = self._get_version_seek_method()
    return infer_method(text)

version_seekers() -> dict[DataSourceType, DatasourceVersionSeeker] staticmethod

List version seekers.

Returns:

Type Description
dict[DataSourceType, DatasourceVersionSeeker]

dict[DataSourceType, DatasourceVersionSeeker]: list of available data sources.

Source code in src/gentropy/common/version_engine.py
24
25
26
27
28
29
30
31
32
33
@staticmethod
def version_seekers() -> dict[DataSourceType, DatasourceVersionSeeker]:
    """List version seekers.

    Returns:
        dict[DataSourceType, DatasourceVersionSeeker]: list of available data sources.
    """
    return {
        "gnomad": GnomADVersionSeeker(),
    }

gentropy.common.version_engine.GnomADVersionSeeker

Bases: DatasourceVersionSeeker

Seek version from GnomAD datasource.

Source code in src/gentropy/common/version_engine.py
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
class GnomADVersionSeeker(DatasourceVersionSeeker):
    """Seek version from GnomAD datasource."""

    @staticmethod
    def seek_version(text: str) -> str:
        """Seek GnomAD version from provided text by using regex.

        Up to 3 digits are allowed in the version number.
        Historically gnomAD version numbers have been in the format
        2.1.1, 3.1, etc. as of 2024-05. GnomAD versions can be found by
        running `"gs://gcp-public-data--gnomad/release/*/*/*"`

        Args:
            text (str): text to seek version from

        Raises:
            ValueError: if version can not be seeked

        Returns:
            str: seeked version

        Examples:
            >>> GnomADVersionSeeker.seek_version("gs://gcp-public-data--gnomad/release/2.1.1/vcf/genomes/gnomad.genomes.r2.1.1.sites.vcf.bgz")
            '2.1.1'
        """
        result = re.search(r"v?((\d+){1}\.(\d+){1}\.?(\d+)?)", text)
        match result:
            case None:
                raise ValueError(f"No GnomAD version found in provided text: {text}")
            case _:
                return result.group(1)

seek_version(text: str) -> str staticmethod

Seek GnomAD version from provided text by using regex.

Up to 3 digits are allowed in the version number. Historically gnomAD version numbers have been in the format 2.1.1, 3.1, etc. as of 2024-05. GnomAD versions can be found by running "gs://gcp-public-data--gnomad/release/*/*/*"

Parameters:

Name Type Description Default
text str

text to seek version from

required

Raises:

Type Description
ValueError

if version can not be seeked

Returns:

Name Type Description
str str

seeked version

Examples:

>>> GnomADVersionSeeker.seek_version("gs://gcp-public-data--gnomad/release/2.1.1/vcf/genomes/gnomad.genomes.r2.1.1.sites.vcf.bgz")
'2.1.1'
Source code in src/gentropy/common/version_engine.py
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
@staticmethod
def seek_version(text: str) -> str:
    """Seek GnomAD version from provided text by using regex.

    Up to 3 digits are allowed in the version number.
    Historically gnomAD version numbers have been in the format
    2.1.1, 3.1, etc. as of 2024-05. GnomAD versions can be found by
    running `"gs://gcp-public-data--gnomad/release/*/*/*"`

    Args:
        text (str): text to seek version from

    Raises:
        ValueError: if version can not be seeked

    Returns:
        str: seeked version

    Examples:
        >>> GnomADVersionSeeker.seek_version("gs://gcp-public-data--gnomad/release/2.1.1/vcf/genomes/gnomad.genomes.r2.1.1.sites.vcf.bgz")
        '2.1.1'
    """
    result = re.search(r"v?((\d+){1}\.(\d+){1}\.?(\d+)?)", text)
    match result:
        case None:
            raise ValueError(f"No GnomAD version found in provided text: {text}")
        case _:
            return result.group(1)