Source code for skgenome.combiners

"""Combiner functions for Python list-like input."""

from typing import TYPE_CHECKING, Any, Optional
from collections.abc import Callable
from collections.abc import Iterable, Sequence

import pandas as pd

if TYPE_CHECKING:
    from pandas.core.frame import DataFrame


[docs] def get_combiners( table: pd.DataFrame, stranded: bool = False, combine: Optional[dict[str, Callable]] = None, ) -> dict[str, Callable]: """Get a `combine` lookup suitable for `table`. Parameters ---------- table : DataFrame stranded : bool combine : dict or None Column names to their value-combining functions, replacing or in addition to the defaults. Returns ------- dict: Column names to their value-combining functions. """ cmb = { "chromosome": first_of, "start": first_of, "end": max, "gene": join_strings, "accession": join_strings, "weight": sum, "probes": sum, } if combine: cmb.update(combine) if "strand" not in cmb: cmb["strand"] = first_of if stranded else merge_strands return {k: v for k, v in cmb.items() if k in table.columns} # type: ignore
[docs] def first_of(elems: Sequence) -> Any: """Return the first element of the input.""" return elems[0]
[docs] def last_of(elems: Sequence) -> Any: """Return the last element of the input.""" return elems[-1]
max_of = max
[docs] def join_strings(elems: Iterable, sep: str = ",") -> str: """Join a Series of strings by commas.""" # ENH if elements are also comma-separated, split+uniq those too return sep.join(pd.unique(elems))
[docs] def merge_strands(elems: Sequence) -> str: """Summarize the given strands as '+', '-', or '.' (both/mixed)""" strands = set(elems) if len(strands) > 1: return "." return elems[0]
[docs] def make_const(val: Any) -> Callable: """Return a function that simply returns the value given as input here.""" def const(_elems): return val return const