## ----style, echo = FALSE, results = 'hide'------------------------------------ BiocStyle::markdown() ## ----------------------------------------------------------------------------- # if (!require("BiocManager", quietly = TRUE)) # install.packages("BiocManager") # # BiocManager::install("CytoMDS") ## ----rlibs, results=FALSE----------------------------------------------------- suppressPackageStartupMessages(library(HDCytoData)) library(CytoMDS) library(ggplot2) library(patchwork) ## ----loadDataSet-------------------------------------------------------------- BCRXL_fs <- HDCytoData::Bodenmiller_BCR_XL_flowSet() BCRXL_fs ## ----convertPhenoData--------------------------------------------------------- phenoData <- flowCore::pData(BCRXL_fs) additionalPhenoData <- keyword(BCRXL_fs[[1]], "EXPERIMENT_INFO")$EXPERIMENT_INFO phenoData <- cbind(phenoData, additionalPhenoData) flowCore::pData(BCRXL_fs) <- phenoData ## ----selectMarkers------------------------------------------------------------ markerInfo <- keyword(BCRXL_fs[[1]], "MARKER_INFO")$MARKER_INFO chClass <- markerInfo$marker_class table(chClass) chLabels <- markerInfo$channel_name[chClass != "none"] (chMarkers <- markerInfo$marker_name[chClass != "none"]) ## ----scaleTransform----------------------------------------------------------- trans <- arcsinhTransform( transformationId="ArcsinhTransform", a = 0, b = 1/5, c = 0) BCRXL_fs_trans <- transform( BCRXL_fs, transformList(chLabels, trans)) ## ----distCalc----------------------------------------------------------------- pwDist <- pairwiseEMDDist( BCRXL_fs_trans, channels = chMarkers, verbose = FALSE ) ## ----distShow----------------------------------------------------------------- round(pwDist[1:10, 1:10], 2) ## ----distShowHist, fig.align='center', fig.wide = TRUE------------------------ distVec <- pwDist[upper.tri(pwDist)] distVecDF <- data.frame(dist = distVec) pHist <- ggplot(distVecDF, mapping = aes(x=dist)) + geom_histogram(fill = "darkgrey", col = "black", bins = 15) + theme_bw() + ggtitle("EMD distances for Bodenmiller2012 dataset") pHist ## ----MDSCalc------------------------------------------------------------------ mdsObj <- computeMetricMDS(pwDist, seed = 0) show(mdsObj) ## ----plotMDS1_simplest, fig.align='center', fig.asp = 1, fig.wide = TRUE------ ggplotSampleMDS(mdsObj) ## ----plotMDS_colours_shapes_1_2, fig.align='center', fig.asp = 0.9, fig.wide = TRUE---- p12 <- ggplotSampleMDS( mdsObj, pData = phenoData, projectionAxes = c(1,2), pDataForColour = "group_id", pDataForLabel = "patient_id" ) p12 ## ----plotMDS_colours_shapes_2_3_4, fig.align='center', fig.width = 9, fig.height = 12---- p23 <- ggplotSampleMDS( mdsObj, pData = phenoData, projectionAxes = c(2,3), pDataForColour = "group_id", pDataForLabel = "patient_id" ) p34 <- ggplotSampleMDS( mdsObj, pData = phenoData, projectionAxes = c(3,4), pDataForColour = "group_id", pDataForLabel = "patient_id" ) p23 / p34 ## ----plotMDSShepard, fig.align='center', fig.wide = TRUE, fig.asp = 1--------- ggplotSampleMDSShepard(mdsObj) ## ----MDSCalc.nDim2, fig.align='center', fig.asp = 0.85, fig.wide = TRUE------- mdsObj2 <- CytoMDS::computeMetricMDS(pwDist, seed = 0, nDim = 2) ggplotSampleMDS(mdsObj2, pData = phenoData, projectionAxes = c(1,2), pDataForColour = "group_id", pDataForLabel = "patient_id", flipYAxis = TRUE) ## ----MDSCalc.Rsq99, fig.align='center', fig.asp = 0.85, fig.wide = TRUE------- mdsObj3 <- CytoMDS::computeMetricMDS(pwDist, seed = 0, targetPseudoRSq = 0.99) ggplotSampleMDS(mdsObj3, pData = phenoData, projectionAxes = c(1,2), pDataForColour = "group_id", pDataForLabel = "patient_id") ## ----plotMDSShepard.Rsq99, fig.align='center', fig.wide = TRUE, fig.asp = 1---- ggplotSampleMDSShepard(mdsObj3) ## ----plotMDS_medians---------------------------------------------------------- medians <- channelSummaryStats(BCRXL_fs_trans, channels = chLabels, statFUNs = median) ## ----plotMDS_biplot, fig.align='center', fig.asp = 0.8, fig.wide = TRUE------- ggplotSampleMDS(mdsObj, pData = phenoData, pDataForColour = "group_id", displayPointLabels = FALSE, displayArrowLabels = TRUE, repelArrowLabels = TRUE, biplot = TRUE, extVariables = medians) ## ----plotMDS_biplot_0.6, fig.align='center', fig.asp = 0.8, fig.wide = TRUE---- ggplotSampleMDS(mdsObj, pData = phenoData, pDataForColour = "group_id", displayPointLabels = FALSE, displayArrowLabels = TRUE, repelArrowLabels = TRUE, biplot = TRUE, extVariables = medians, arrowThreshold = 0.6) ## ----plotMarginalDensities, fig.align='center', fig.asp = 1.2, fig.wide = TRUE---- ggplotMarginalDensities( BCRXL_fs_trans, channels = chLabels, pDataForColour = "group_id", pDataForGroup = "sample_id") ## ----plotMDS_stats------------------------------------------------------------ statFUNs = c("median" = stats::median, "Q25" = function(x, na.rm) { stats::quantile(x, probs = 0.25) }, "Q75" = function(x, na.rm) { stats::quantile(x, probs = 0.75) }, "standard deviation" = stats::sd) chStats <- channelSummaryStats(BCRXL_fs_trans, channels = chLabels, statFUNs = statFUNs) ## ----plotMDS_biplot_facetting, fig.align='center', fig.asp = 1, fig.wide = TRUE---- ggplotSampleMDSWrapBiplots( mdsObj, extVariableList = chStats, ncol = 2, pData = phenoData, pDataForColour = "group_id", displayPointLabels = FALSE, displayArrowLabels = TRUE, repelArrowLabels = TRUE, displayLegend = FALSE) ## ----plotMDS_biplot_stddev, fig.align='center', fig.asp = 1.0, fig.wide = TRUE---- stdDevs <- list( "std dev of channels 1 to 6" = chStats[["standard deviation"]][,1:6], "std dev of channels 7 to 12" = chStats[["standard deviation"]][,7:12], "std dev of channels 13 to 18" = chStats[["standard deviation"]][,13:18], "std dev of channels 19 to 24" = chStats[["standard deviation"]][,19:24] ) ggplotSampleMDSWrapBiplots( mdsObj, ncol = 2, extVariableList = stdDevs, pData = phenoData, pDataForColour = "group_id", displayPointLabels = FALSE, displayArrowLabels = TRUE, repelArrowLabels = TRUE) ## ----temporary_store---------------------------------------------------------- storageLocation <- suppressMessages(base::tempdir()) nSample <- length(BCRXL_fs_trans) fileNames <- file.path( storageLocation, paste0("BodenMiller2012_TransformedSample", sprintf("%02d.rds", seq_len(nSample)))) for (i in seq_len(nSample)) { saveRDS(BCRXL_fs_trans[[i]], file = fileNames[i]) } ## ----distance_calc_loading_otf_parallel--------------------------------------- bp <- BiocParallel::SnowParam( stop.on.error = FALSE, progressbar = TRUE) pwDistLast <- suppressWarnings(pairwiseEMDDist( x = nSample, channels = chMarkers, loadExprMatrixFUN = function(exprMatrixIndex, files, channels, markers){ ff <- readRDS(file = files[exprMatrixIndex ]) exprMat <- flowCore::exprs(ff)[, channels, drop = FALSE] colnames(exprMat) <- markers exprMat }, loadExprMatrixFUNArgs = list( files = fileNames, channels = chLabels, markers = chMarkers), BPPARAM = bp)) ## ----dist1ShowHistAgain, fig.align='center', fig.wide = TRUE------------------ distVecLast <- pwDistLast[upper.tri(pwDistLast)] distVecDFLast <- data.frame(dist = distVecLast) pHistLast <- ggplot(distVecDFLast, mapping = aes(x=dist)) + geom_histogram(fill = "darkgrey", col = "black", bins = 15) + theme_bw() + ggtitle( "EMD distances for Bodenmiller2012 dataset", subtitle = "on the fly memory loading and parallel computation") pHistLast ## ----simulExprMatrices-------------------------------------------------------- nSample <- 10 conditions <- factor(c(rep("conditionA", 5), rep("conditionB", 5))) nRow <- 1000 nFeat <- 10 nDiffFeat <- 3 diffFeats = c(2, 3, 9) stdDevFactor = 1.5 exprMatrixList <- mapply( seq(nSample), conditions, FUN = function(i, condition) { exprMatrix <- matrix(rnorm(nRow*nFeat), nrow = nRow) if (condition == "conditionB") { exprMatrix[, diffFeats] <- exprMatrix[, diffFeats] * stdDevFactor } colnames(exprMatrix) <- paste0("Feat", seq(nFeat)) exprMatrix }, SIMPLIFY = FALSE ) names(exprMatrixList) <- paste0("sample", seq(nSample)) ## ----distExprMatrices--------------------------------------------------------- pwDistExpr <- pairwiseEMDDist( exprMatrixList ) ## ----distExprMatShowHist, fig.align='center', fig.wide = TRUE----------------- distVecExpr <- pwDistExpr[upper.tri(pwDistExpr)] distVecDFExpr <- data.frame(dist = distVecExpr) pHistExpr <- ggplot(distVecDFExpr, mapping = aes(x=dist)) + geom_histogram(fill = "darkgrey", col = "black", bins = 15) + theme_bw() + ggtitle( "EMD distances for simulated expression matrices") pHistExpr ## ----MDSCalcExprMat----------------------------------------------------------- mdsObjExpr <- computeMetricMDS(pwDistExpr, seed = 0) show(mdsObjExpr) ## ----plotMDSExprMat, fig.align='center', fig.asp = 0.9, fig.wide = TRUE------- phenoDataExpr <- data.frame(sampleId = seq(nSample), cond = conditions) p12 <- ggplotSampleMDS( mdsObjExpr, pData = phenoDataExpr, projectionAxes = c(1,2), pDataForColour = "cond", pDataForLabel = "sampleId" ) p12 ## ----biplotsMDSExprMat, fig.align='center', fig.asp = 0.9, fig.wide = TRUE---- statFunctions <- list(mean = base::mean, std_dev = stats::sd) chStatsExpr <- channelSummaryStats(exprMatrixList, statFUNs = statFunctions) p <- CytoMDS::ggplotSampleMDSWrapBiplots( mdsObj = mdsObjExpr, extVariableList = chStatsExpr, pData = phenoDataExpr, projectionAxes = c(1,2), pDataForColour = "cond", displayPointLabels = FALSE, repelArrowLabels = TRUE, ncol = 2, arrowThreshold = 0.9 ) p ## ----sessioninfo, echo=FALSE-------------------------------------------------- sessionInfo()