Source code for cnvlib.cmdutil

"""Functions reused within command-line implementations."""
from __future__ import absolute_import, division, print_function

import logging
import sys

from skgenome import tabio

from .cnary import CopyNumArray as CNA


[docs]def read_cna(infile, sample_id=None, meta=None): """Read a CNVkit file (.cnn, .cnr, .cns) to create a CopyNumArray object.""" return tabio.read(infile, into=CNA, sample_id=sample_id, meta=meta)
[docs]def load_het_snps(vcf_fname, sample_id=None, normal_id=None, min_variant_depth=20, zygosity_freq=None, tumor_boost=False): if vcf_fname is None: return None varr = tabio.read(vcf_fname, 'vcf', sample_id=sample_id, normal_id=normal_id, min_depth=min_variant_depth, skip_somatic=True) if (zygosity_freq is None and 'n_zygosity' in varr and not varr['n_zygosity'].any()): # Mutect2 sets all normal genotypes to 0/0 -- work around it logging.warning("VCF normal sample's genotypes are all 0/0 or missing; " "inferring genotypes from allele frequency instead") zygosity_freq = 0.25 if zygosity_freq is not None: varr = varr.zygosity_from_freq(zygosity_freq, 1 - zygosity_freq) if 'n_zygosity' in varr: # Infer & drop (more) somatic loci based on genotype somatic_idx = (varr['zygosity'] != 0.0) & (varr['n_zygosity'] == 0.0) if somatic_idx.any() and not somatic_idx.all(): logging.info("Skipping %d additional somatic records based on " "T/N genotypes", somatic_idx.sum()) varr = varr[~somatic_idx] orig_len = len(varr) varr = varr.heterozygous() logging.info("Kept %d heterozygous of %d VCF records", len(varr), orig_len) # TODO use/explore tumor_boost option if tumor_boost: varr['alt_freq'] = varr.tumor_boost() return varr
[docs]def verify_sample_sex(cnarr, sex_arg, is_male_reference): is_sample_female = cnarr.guess_xx(is_male_reference, verbose=False) if sex_arg: is_sample_female_given = (sex_arg.lower() not in ['y', 'm', 'male']) if is_sample_female != is_sample_female_given: logging.warning("Sample sex specified as %s " "but chromosomal X/Y ploidy looks like %s", "female" if is_sample_female_given else "male", "female" if is_sample_female else "male") is_sample_female = is_sample_female_given logging.info("Treating sample %s as %s", cnarr.sample_id or '', "female" if is_sample_female else "male") return is_sample_female
[docs]def write_tsv(outfname, rows, colnames=None): """Write rows, with optional column header, to tabular file.""" with tabio.safe_write(outfname or sys.stdout) as handle: if colnames: header = '\t'.join(colnames) + '\n' handle.write(header) handle.writelines('\t'.join(map(str, row)) + '\n' for row in rows)
[docs]def write_text(outfname, text, *more_texts): """Write one or more strings (blocks of text) to a file.""" with tabio.safe_write(outfname or sys.stdout) as handle: handle.write(text) if more_texts: for mtext in more_texts: handle.write(mtext)
[docs]def write_dataframe(outfname, dframe, header=True): """Write a pandas.DataFrame to a tabular file.""" with tabio.safe_write(outfname or sys.stdout) as handle: dframe.to_csv(handle, header=header, index=False, sep='\t', float_format='%.6g')