Source code for skgenome.rangelabel

"""Handle text genomic ranges as named tuples.

A range specification should look like ``chromosome:start-end``, e.g.
``chr1:1234-5678``, with 1-indexed integer coordinates. We also allow
``chr1:1234-`` or ``chr1:-5678``, where missing start becomes 0 and missing end
becomes None.
"""

import collections
import re
from typing import Union
from collections.abc import Sequence

Region = collections.namedtuple("Region", "chromosome start end")
NamedRegion = collections.namedtuple("NamedRegion", "chromosome start end gene")

re_label = re.compile(r"(\w[\w.]*)?:(\d+)?-(\d+)?\s*(\S+)?")


[docs] def from_label(text: str, keep_gene: bool = True) -> Union[Region, NamedRegion]: """Parse a chromosomal range specification. Parameters ---------- text : string Range specification, which should look like ``chr1:1234-5678`` or ``chr1:1234-`` or ``chr1:-5678``, where missing start becomes 0 and missing end becomes None. keep_gene : bool If True, include gene names as a 4th field where available; otherwise return a 3-field Region of chromosomal coordinates without gene labels. """ match = re_label.match(text) if not match: raise ValueError( f"Invalid range spec: {text} (should be like: chr1:2333000-2444000)" ) chrom, start, end, gene = match.groups() start = int(start) - 1 if start else None end = int(end) if end else None if keep_gene: gene = gene or "" return NamedRegion(chrom, start, end, gene) return Region(chrom, start, end)
[docs] def to_label(row: Region) -> str: """Convert a Region tuple to a region label.""" return f"{row.chromosome}:{row.start + 1}-{row.end}"
[docs] def unpack_range(a_range: Union[str, Sequence]) -> Region: """Extract chromosome, start, end from a string or tuple. Examples:: "chr1" -> ("chr1", None, None) "chr1:100-123" -> ("chr1", 99, 123) ("chr1", 100, 123) -> ("chr1", 100, 123) """ if not a_range: return Region(None, None, None) if isinstance(a_range, str): if ":" in a_range and "-" in a_range: return from_label(a_range, keep_gene=False) # type: ignore return Region(a_range, None, None) if isinstance(a_range, list | tuple): if len(a_range) == 3: return Region(*a_range) if len(a_range) == 4: return Region(*a_range[:3]) raise ValueError(f"Not a range: {a_range!r}")