Skip to content

Genomic Region

gentropy.common.genomic_region.KnownGenomicRegions

Bases: Enum

Known genomic regions in the human genome in string format.

Source code in src/gentropy/common/genomic_region.py
6
7
8
9
class KnownGenomicRegions(Enum):
    """Known genomic regions in the human genome in string format."""

    MHC = "chr6:25726063-33400556"

gentropy.common.genomic_region.GenomicRegion

Genomic regions of interest.

Attributes:

Name Type Description
chromosome str

Chromosome.

start int

Start position.

end int
Source code in src/gentropy/common/genomic_region.py
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
class GenomicRegion:
    """Genomic regions of interest.

    Attributes:
        chromosome (str): Chromosome.
        start (int): Start position.
        end (int):
    """

    def __init__(self, chromosome: str, start: int, end: int) -> None:
        """Class constructor.

        Args:
            chromosome (str): Chromosome.
            start (int): Start position.
            end (int): End position.
        """
        self.chromosome = chromosome
        self.start = start
        self.end = end

    def __str__(self) -> str:
        """String representation of the genomic region.

        Returns:
            str: Genomic region in chr:start-end format.
        """
        return f"{self.chromosome}:{self.start}-{self.end}"

    @classmethod
    def from_string(cls: type["GenomicRegion"], region: str) -> "GenomicRegion":
        """Parse region string to chr:start-end.

        Args:
            region (str): Genomic region expected to follow chr##:#,###-#,### format or ##:####-#####.

        Returns:
            GenomicRegion: Genomic region object.

        Raises:
            ValueError: If the end and start positions cannot be casted to integer or not all three values value error is raised.

        Examples:
            >>> print(GenomicRegion.from_string('chr6:28,510,120-33,480,577'))
            6:28510120-33480577
            >>> print(GenomicRegion.from_string('6:28510120-33480577'))
            6:28510120-33480577
            >>> print(GenomicRegion.from_string('6:28510120'))
            Traceback (most recent call last):
                ...
            ValueError: Genomic region should follow a ##:####-#### format.
            >>> print(GenomicRegion.from_string('6:28510120-foo'))
            Traceback (most recent call last):
                ...
            ValueError: Start and the end position of the region has to be integer.
        """
        region = region.replace(":", "-").replace(",", "")
        try:
            chromosome, start_position, end_position = region.split("-")
        except ValueError as err:
            raise ValueError(
                "Genomic region should follow a ##:####-#### format."
            ) from err

        try:
            return cls(
                chromosome=chromosome.replace("chr", ""),
                start=int(start_position),
                end=int(end_position),
            )
        except ValueError as err:
            raise ValueError(
                "Start and the end position of the region has to be integer."
            ) from err

    @classmethod
    def from_known_genomic_region(
        cls: type["GenomicRegion"], region: KnownGenomicRegions
    ) -> "GenomicRegion":
        """Get known genomic region.

        Args:
            region (KnownGenomicRegions): Known genomic region.

        Returns:
            GenomicRegion: Genomic region object.

        Examples:
            >>> print(GenomicRegion.from_known_genomic_region(KnownGenomicRegions.MHC))
            6:25726063-33400556
        """
        return GenomicRegion.from_string(region.value)

__init__(chromosome: str, start: int, end: int) -> None

Class constructor.

Parameters:

Name Type Description Default
chromosome str

Chromosome.

required
start int

Start position.

required
end int

End position.

required
Source code in src/gentropy/common/genomic_region.py
21
22
23
24
25
26
27
28
29
30
31
def __init__(self, chromosome: str, start: int, end: int) -> None:
    """Class constructor.

    Args:
        chromosome (str): Chromosome.
        start (int): Start position.
        end (int): End position.
    """
    self.chromosome = chromosome
    self.start = start
    self.end = end

from_known_genomic_region(region: KnownGenomicRegions) -> GenomicRegion classmethod

Get known genomic region.

Parameters:

Name Type Description Default
region KnownGenomicRegions

Known genomic region.

required

Returns:

Name Type Description
GenomicRegion GenomicRegion

Genomic region object.

Examples:

>>> print(GenomicRegion.from_known_genomic_region(KnownGenomicRegions.MHC))
6:25726063-33400556
Source code in src/gentropy/common/genomic_region.py
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
@classmethod
def from_known_genomic_region(
    cls: type["GenomicRegion"], region: KnownGenomicRegions
) -> "GenomicRegion":
    """Get known genomic region.

    Args:
        region (KnownGenomicRegions): Known genomic region.

    Returns:
        GenomicRegion: Genomic region object.

    Examples:
        >>> print(GenomicRegion.from_known_genomic_region(KnownGenomicRegions.MHC))
        6:25726063-33400556
    """
    return GenomicRegion.from_string(region.value)

from_string(region: str) -> GenomicRegion classmethod

Parse region string to chr:start-end.

Parameters:

Name Type Description Default
region str

Genomic region expected to follow chr##:#,###-#,### format or ##:####-#####.

required

Returns:

Name Type Description
GenomicRegion GenomicRegion

Genomic region object.

Raises:

Type Description
ValueError

If the end and start positions cannot be casted to integer or not all three values value error is raised.

Examples:

>>> print(GenomicRegion.from_string('chr6:28,510,120-33,480,577'))
6:28510120-33480577
>>> print(GenomicRegion.from_string('6:28510120-33480577'))
6:28510120-33480577
>>> print(GenomicRegion.from_string('6:28510120'))
Traceback (most recent call last):
    ...
ValueError: Genomic region should follow a ##:####-#### format.
>>> print(GenomicRegion.from_string('6:28510120-foo'))
Traceback (most recent call last):
    ...
ValueError: Start and the end position of the region has to be integer.
Source code in src/gentropy/common/genomic_region.py
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
@classmethod
def from_string(cls: type["GenomicRegion"], region: str) -> "GenomicRegion":
    """Parse region string to chr:start-end.

    Args:
        region (str): Genomic region expected to follow chr##:#,###-#,### format or ##:####-#####.

    Returns:
        GenomicRegion: Genomic region object.

    Raises:
        ValueError: If the end and start positions cannot be casted to integer or not all three values value error is raised.

    Examples:
        >>> print(GenomicRegion.from_string('chr6:28,510,120-33,480,577'))
        6:28510120-33480577
        >>> print(GenomicRegion.from_string('6:28510120-33480577'))
        6:28510120-33480577
        >>> print(GenomicRegion.from_string('6:28510120'))
        Traceback (most recent call last):
            ...
        ValueError: Genomic region should follow a ##:####-#### format.
        >>> print(GenomicRegion.from_string('6:28510120-foo'))
        Traceback (most recent call last):
            ...
        ValueError: Start and the end position of the region has to be integer.
    """
    region = region.replace(":", "-").replace(",", "")
    try:
        chromosome, start_position, end_position = region.split("-")
    except ValueError as err:
        raise ValueError(
            "Genomic region should follow a ##:####-#### format."
        ) from err

    try:
        return cls(
            chromosome=chromosome.replace("chr", ""),
            start=int(start_position),
            end=int(end_position),
        )
    except ValueError as err:
        raise ValueError(
            "Start and the end position of the region has to be integer."
        ) from err