Target Index
gentropy.dataset.target_index.TargetIndex
dataclass
¶
Bases: Dataset
Target index dataset.
Gene-based annotation.
Source code in src/gentropy/dataset/target_index.py
17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
|
filter_by_biotypes(biotypes: list[str]) -> TargetIndex
¶
Filter by approved biotypes.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
biotypes
|
list[str]
|
List of Ensembl biotypes to keep. |
required |
Returns:
Name | Type | Description |
---|---|---|
TargetIndex |
TargetIndex
|
Target index dataset filtered by biotypes. |
Source code in src/gentropy/dataset/target_index.py
33 34 35 36 37 38 39 40 41 42 43 |
|
get_schema() -> StructType
classmethod
¶
Provides the schema for the TargetIndex dataset.
Returns:
Name | Type | Description |
---|---|---|
StructType |
StructType
|
Schema for the TargetIndex dataset |
Source code in src/gentropy/dataset/target_index.py
24 25 26 27 28 29 30 31 |
|
locations_lut() -> DataFrame
¶
Gene location information.
Returns:
Name | Type | Description |
---|---|---|
DataFrame |
DataFrame
|
Gene LUT including genomic location information. |
Source code in src/gentropy/dataset/target_index.py
45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
|
symbols_lut() -> DataFrame
¶
Gene symbol lookup table.
Pre-processess gene/target dataset to create lookup table of gene symbols, including obsoleted gene symbols.
Returns:
Name | Type | Description |
---|---|---|
DataFrame |
DataFrame
|
Gene LUT for symbol mapping containing |
Source code in src/gentropy/dataset/target_index.py
60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
|
Schema¶
root
|-- id: string (nullable = false)
|-- approvedSymbol: string (nullable = true)
|-- biotype: string (nullable = true)
|-- transcriptIds: array (nullable = true)
| |-- element: string (containsNull = true)
|-- canonicalTranscript: struct (nullable = true)
| |-- id: string (nullable = true)
| |-- chromosome: string (nullable = true)
| |-- start: long (nullable = true)
| |-- end: long (nullable = true)
| |-- strand: string (nullable = true)
|-- canonicalExons: array (nullable = true)
| |-- element: string (containsNull = true)
|-- genomicLocation: struct (nullable = true)
| |-- chromosome: string (nullable = true)
| |-- start: long (nullable = true)
| |-- end: long (nullable = true)
| |-- strand: integer (nullable = true)
|-- alternativeGenes: array (nullable = true)
| |-- element: string (containsNull = true)
|-- approvedName: string (nullable = true)
|-- go: array (nullable = true)
| |-- element: struct (containsNull = true)
| | |-- id: string (nullable = true)
| | |-- source: string (nullable = true)
| | |-- evidence: string (nullable = true)
| | |-- aspect: string (nullable = true)
| | |-- geneProduct: string (nullable = true)
| | |-- ecoId: string (nullable = true)
|-- hallmarks: struct (nullable = true)
| |-- attributes: array (nullable = true)
| | |-- element: struct (containsNull = true)
| | | |-- pmid: long (nullable = true)
| | | |-- description: string (nullable = true)
| | | |-- attribute_name: string (nullable = true)
| |-- cancerHallmarks: array (nullable = true)
| | |-- element: struct (containsNull = true)
| | | |-- pmid: long (nullable = true)
| | | |-- description: string (nullable = true)
| | | |-- impact: string (nullable = true)
| | | |-- label: string (nullable = true)
|-- synonyms: array (nullable = true)
| |-- element: struct (containsNull = true)
| | |-- label: string (nullable = true)
| | |-- source: string (nullable = true)
|-- symbolSynonyms: array (nullable = true)
| |-- element: struct (containsNull = true)
| | |-- label: string (nullable = true)
| | |-- source: string (nullable = true)
|-- nameSynonyms: array (nullable = true)
| |-- element: struct (containsNull = true)
| | |-- label: string (nullable = true)
| | |-- source: string (nullable = true)
|-- functionDescriptions: array (nullable = true)
| |-- element: string (containsNull = true)
|-- subcellularLocations: array (nullable = true)
| |-- element: struct (containsNull = true)
| | |-- location: string (nullable = true)
| | |-- source: string (nullable = true)
| | |-- termSL: string (nullable = true)
| | |-- labelSL: string (nullable = true)
|-- targetClass: array (nullable = true)
| |-- element: struct (containsNull = true)
| | |-- id: long (nullable = true)
| | |-- label: string (nullable = true)
| | |-- level: string (nullable = true)
|-- obsoleteSymbols: array (nullable = true)
| |-- element: struct (containsNull = true)
| | |-- label: string (nullable = true)
| | |-- source: string (nullable = true)
|-- obsoleteNames: array (nullable = true)
| |-- element: struct (containsNull = true)
| | |-- label: string (nullable = true)
| | |-- source: string (nullable = true)
|-- constraint: array (nullable = true)
| |-- element: struct (containsNull = true)
| | |-- constraintType: string (nullable = true)
| | |-- score: float (nullable = true)
| | |-- exp: float (nullable = true)
| | |-- obs: integer (nullable = true)
| | |-- oe: float (nullable = true)
| | |-- oeLower: float (nullable = true)
| | |-- oeUpper: float (nullable = true)
| | |-- upperRank: integer (nullable = true)
| | |-- upperBin: integer (nullable = true)
| | |-- upperBin6: integer (nullable = true)
|-- tep: struct (nullable = true)
| |-- targetFromSourceId: string (nullable = true)
| |-- description: string (nullable = true)
| |-- therapeuticArea: string (nullable = true)
| |-- url: string (nullable = true)
|-- proteinIds: array (nullable = true)
| |-- element: struct (containsNull = true)
| | |-- id: string (nullable = true)
| | |-- source: string (nullable = true)
|-- dbXrefs: array (nullable = true)
| |-- element: struct (containsNull = true)
| | |-- id: string (nullable = true)
| | |-- source: string (nullable = true)
|-- chemicalProbes: array (nullable = true)
| |-- element: struct (containsNull = true)
| | |-- control: string (nullable = true)
| | |-- drugId: string (nullable = true)
| | |-- id: string (nullable = true)
| | |-- isHighQuality: boolean (nullable = true)
| | |-- mechanismOfAction: array (nullable = true)
| | | |-- element: string (containsNull = true)
| | |-- origin: array (nullable = true)
| | | |-- element: string (containsNull = true)
| | |-- probeMinerScore: long (nullable = true)
| | |-- probesDrugsScore: long (nullable = true)
| | |-- scoreInCells: long (nullable = true)
| | |-- scoreInOrganisms: long (nullable = true)
| | |-- targetFromSourceId: string (nullable = true)
| | |-- urls: array (nullable = true)
| | | |-- element: struct (containsNull = true)
| | | | |-- niceName: string (nullable = true)
| | | | |-- url: string (nullable = true)
|-- homologues: array (nullable = true)
| |-- element: struct (containsNull = true)
| | |-- speciesId: string (nullable = true)
| | |-- speciesName: string (nullable = true)
| | |-- homologyType: string (nullable = true)
| | |-- targetGeneId: string (nullable = true)
| | |-- isHighConfidence: string (nullable = true)
| | |-- targetGeneSymbol: string (nullable = true)
| | |-- queryPercentageIdentity: double (nullable = true)
| | |-- targetPercentageIdentity: double (nullable = true)
| | |-- priority: integer (nullable = true)
|-- tractability: array (nullable = true)
| |-- element: struct (containsNull = true)
| | |-- modality: string (nullable = true)
| | |-- id: string (nullable = true)
| | |-- value: boolean (nullable = true)
|-- safetyLiabilities: array (nullable = true)
| |-- element: struct (containsNull = true)
| | |-- event: string (nullable = true)
| | |-- eventId: string (nullable = true)
| | |-- effects: array (nullable = true)
| | | |-- element: struct (containsNull = true)
| | | | |-- direction: string (nullable = true)
| | | | |-- dosing: string (nullable = true)
| | |-- biosamples: array (nullable = true)
| | | |-- element: struct (containsNull = true)
| | | | |-- cellFormat: string (nullable = true)
| | | | |-- cellLabel: string (nullable = true)
| | | | |-- tissueId: string (nullable = true)
| | | | |-- tissueLabel: string (nullable = true)
| | |-- datasource: string (nullable = true)
| | |-- literature: string (nullable = true)
| | |-- url: string (nullable = true)
| | |-- studies: array (nullable = true)
| | | |-- element: struct (containsNull = true)
| | | | |-- description: string (nullable = true)
| | | | |-- name: string (nullable = true)
| | | | |-- type: string (nullable = true)
|-- pathways: array (nullable = true)
| |-- element: struct (containsNull = true)
| | |-- pathwayId: string (nullable = true)
| | |-- pathway: string (nullable = true)
| | |-- topLevelTerm: string (nullable = true)
|-- tss: long (nullable = true)