## ----setup, echo = FALSE------------------------------------------------------ knitr::opts_chunk$set(message = FALSE, crop = NULL) ## ----load-packages------------------------------------------------------------ library(VariantAnnotation) library(rtracklayer) library(extraChIPs) library(transmogR) library(BSgenome.Hsapiens.UCSC.hg38) ## ----chr1--------------------------------------------------------------------- chr1 <- getSeq(BSgenome.Hsapiens.UCSC.hg38, "chr1") chr1 <- as(chr1, "DNAStringSet") names(chr1) <- "chr1" chr1 ## ----var---------------------------------------------------------------------- sq <- seqinfo(chr1) genome(sq) <- "GRCh38" vcf <- system.file("extdata/1000GP_subset.vcf.gz", package = "transmogR") vcf_param <- ScanVcfParam(fixed = "ALT", info = NA, which = GRanges(sq)) var <- rowRanges(readVcf(vcf, param = vcf_param)) seqinfo(var) <- sq var ## ----gtf---------------------------------------------------------------------- f <- system.file("extdata/gencode.v44.subset.gtf.gz", package = "transmogR") gtf <- import.gff(f, which = GRanges(sq)) seqinfo(gtf) <- sq gtf ## ----gtf-split---------------------------------------------------------------- gtf <- splitAsList(gtf, gtf$type) ## ----upset-var, fig.cap = "Included variants which overlap exonic sequences, summarised by unique gene ids"---- upsetVarByCol(gtf$exon, var, mcol = "gene_id") ## ----overlaps-by-var---------------------------------------------------------- regions <- defineRegions(gtf$gene, gtf$transcript, gtf$exon, proximal = 0) regions$exon <- granges(gtf$exon) overlapsByVar(regions, var) ## ----chr1-mod----------------------------------------------------------------- chr1_mod <- genomogrify(chr1, var, tag = "mod") chr1_mod ## ----trans-mod---------------------------------------------------------------- trans_mod <- transmogrify( chr1, var, gtf$exon, tag = "mod", var_tags = TRUE ) trans_mod names(trans_mod)[grepl("_si", names(trans_mod))] ## ----par-y-------------------------------------------------------------------- sq_hg38 <- seqinfo(BSgenome.Hsapiens.UCSC.hg38) parY(sq_hg38) ## ----sj----------------------------------------------------------------------- ec <- c("transcript_id", "transcript_name", "gene_id", "gene_name") sj <- sjFromExons(gtf$exon, extra_cols = ec) sj ## ----chop-sj------------------------------------------------------------------ chopMC(sj) ## ----sj-as-gi----------------------------------------------------------------- sj <- sjFromExons(gtf$exon, extra_cols = ec, as = "GInteractions") subset(sj, transcript_name == "DDX11L17-201") ## ----sessionInfo, echo=FALSE-------------------------------------------------- sessionInfo()