Use the Data in this Data Package

This data package contains code to generate various filtered forms of a normalised dataset based on downloading the accessions in inst/hsapiens_colData_transitions_v3.5.csv for species “hsapiens” from DEE2, and it also has a way (for reproducibility and also ease of use) to use AnnotationHub (as the data files were too large to fit inside of the data package itself) to fetch data pregenerated at the initial package development time.

In order to prepare for downloading the data, you will need the following setup:

library(DESeq2)
#> Loading required package: S4Vectors
#> Loading required package: stats4
#> Loading required package: BiocGenerics
#> 
#> Attaching package: 'BiocGenerics'
#> The following objects are masked from 'package:stats':
#> 
#>     IQR, mad, sd, var, xtabs
#> The following objects are masked from 'package:base':
#> 
#>     Filter, Find, Map, Position, Reduce, anyDuplicated, aperm, append,
#>     as.data.frame, basename, cbind, colnames, dirname, do.call,
#>     duplicated, eval, evalq, get, grep, grepl, intersect, is.unsorted,
#>     lapply, mapply, match, mget, order, paste, pmax, pmax.int, pmin,
#>     pmin.int, rank, rbind, rownames, sapply, setdiff, table, tapply,
#>     union, unique, unsplit, which.max, which.min
#> 
#> Attaching package: 'S4Vectors'
#> The following object is masked from 'package:utils':
#> 
#>     findMatches
#> The following objects are masked from 'package:base':
#> 
#>     I, expand.grid, unname
#> Loading required package: IRanges
#> Loading required package: GenomicRanges
#> Loading required package: GenomeInfoDb
#> Loading required package: SummarizedExperiment
#> Loading required package: MatrixGenerics
#> Loading required package: matrixStats
#> 
#> Attaching package: 'MatrixGenerics'
#> The following objects are masked from 'package:matrixStats':
#> 
#>     colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
#>     colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
#>     colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
#>     colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
#>     colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
#>     colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
#>     colWeightedMeans, colWeightedMedians, colWeightedSds,
#>     colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
#>     rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
#>     rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
#>     rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
#>     rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
#>     rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
#>     rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
#>     rowWeightedSds, rowWeightedVars
#> Loading required package: Biobase
#> Welcome to Bioconductor
#> 
#>     Vignettes contain introductory material; view with
#>     'browseVignettes()'. To cite Bioconductor, see
#>     'citation("Biobase")', and for packages 'citation("pkgname")'.
#> 
#> Attaching package: 'Biobase'
#> The following object is masked from 'package:MatrixGenerics':
#> 
#>     rowMedians
#> The following objects are masked from 'package:matrixStats':
#> 
#>     anyMissing, rowMedians
library(S4Vectors)
library(Biobase)
library(SummarizedExperiment)
library(getDEE2)
library(devtools)
#> Loading required package: usethis
library(CellScore)
library(homosapienDEE2CellScore)

All of the permutations of filtering and normalising the data can be downloaded in a labeled list like so:

the_data<-downloadAllTheData()
#> see ?homosapienDEE2CellScore and browseVignettes('homosapienDEE2CellScore') for documentation
#> loading from cache
#> see ?homosapienDEE2CellScore and browseVignettes('homosapienDEE2CellScore') for documentation
#> loading from cache
#> see ?homosapienDEE2CellScore and browseVignettes('homosapienDEE2CellScore') for documentation
#> loading from cache
#> see ?homosapienDEE2CellScore and browseVignettes('homosapienDEE2CellScore') for documentation
#> loading from cache
#> see ?homosapienDEE2CellScore and browseVignettes('homosapienDEE2CellScore') for documentation
#> loading from cache
#> see ?homosapienDEE2CellScore and browseVignettes('homosapienDEE2CellScore') for documentation
#> loading from cache
#> see ?homosapienDEE2CellScore and browseVignettes('homosapienDEE2CellScore') for documentation
#> loading from cache
#> see ?homosapienDEE2CellScore and browseVignettes('homosapienDEE2CellScore') for documentation
#> loading from cache

We can then get the data we are after - non-normalised data including samples with quality control warnings - and use it in CellScore to calculate the on/off score for cell transitions from fibroblast to embryonic stem cells:

sm <- the_data$HomosapienDEE2_QC_WARN_Raw
## We could have just run `sm <- homosapienDEE2CellScore::readInSEZip(homosapienDEE2CellScore::HomosapienDEE2_QC_PASS_Raw())`
## instead of downloading all the data.

# Here we want to analyse all of the raw data to calculate the
# on/off score for cell transitions from fibroblast to embryonic stem cells
test1 <- sm[, sm$category == 'test']
standard <- sm[, sm$category == 'standard']
sm1 <- cbind(test1, standard)
cell.change <- data.frame(start=c("FIB"), test=c("nESC"), target=c("ESC"))
group.OnOff <- OnOff(sm1, cell.change, out.put="marker.list")
#> Warning in .calculateGroupOnOff(score.comparisons, calls, pdata, annot, :
#> Multiple array platforms exist in the phenotype data.