Load required packages
library(readr)
library(DESeq2)
library(EnhancedVolcano)
library(dplyr)
library(tibble)
library(gridExtra)
chrRNA_counts_path <- "/path/to/folder/with/iXistChrX_YY1-FKBP_chrRNA/data/"
data_path <- "/path/to/folder/with/chrRNA/data/"
aF1_chrRNA_ES_d0_un <- read_table2(paste0(chrRNA_counts_path,"aF1_chrRNA_ES_d0_un.sortn.count"), skip = 1)
aF1_chrRNA_ES_2dDox_un <- read_table2(paste0(chrRNA_counts_path,"aF1_chrRNA_ES_2dDox_un.sortn.count"), skip = 1)
aF1_chrRNA_ES_6dDox_un <- read_table2(paste0(chrRNA_counts_path,"aF1_chrRNA_ES_6dDox_un.sortn.count"), skip = 1)
aF1_chrRNA_ES_d0_dTag52h <- read_table2(paste0(chrRNA_counts_path,"aF1_chrRNA_ES_d0_dTag52h.sortn.count"), skip = 1)
aF1_chrRNA_ES_2dDox_dTag52h <- read_table2(paste0(chrRNA_counts_path,"aF1_chrRNA_ES_2dDox_dTag52h.sortn.count"), skip = 1)
aF1_chrRNA_ES_6dDox_dTag52h <- read_table2(paste0(chrRNA_counts_path,"aF1_chrRNA_ES_6dDox_dTag52h.sortn.count"), skip = 1)
cC3_chrRNA_ES_d0_un <- read_table2(paste0(chrRNA_counts_path,"cC3_chrRNA_ES_d0_un.sortn.count"), skip = 1)
cC3_chrRNA_ES_2dDox_un <- read_table2(paste0(chrRNA_counts_path,"cC3_chrRNA_ES_2dDox_un.sortn.count"), skip = 1)
cC3_chrRNA_ES_6dDox_un <- read_table2(paste0(chrRNA_counts_path,"cC3_chrRNA_ES_6dDox_un.sortn.count"), skip = 1)
cC3_chrRNA_ES_d0_dTag52h <- read_table2(paste0(chrRNA_counts_path,"cC3_chrRNA_ES_d0_dTag52h.sortn.count"), skip = 1)
cC3_chrRNA_ES_2dDox_dTag52h <- read_table2(paste0(chrRNA_counts_path,"cC3_chrRNA_ES_2dDox_dTag52h.sortn.count"), skip = 1)
cC3_chrRNA_ES_6dDox_dTag52h <- read_table2(paste0(chrRNA_counts_path,"cC3_chrRNA_ES_6dDox_dTag52h.sortn.count"), skip = 1)
aF1 (iXist-ChrX-Dom YY1-FKBP12-3xT7)
#make cts
cts_aF1 <- cbind(aF1_chrRNA_ES_d0_un[7], aF1_chrRNA_ES_2dDox_un[7], aF1_chrRNA_ES_6dDox_un[7],
aF1_chrRNA_ES_d0_dTag52h[7], aF1_chrRNA_ES_2dDox_dTag52h[7], aF1_chrRNA_ES_6dDox_dTag52h[7])
genelist <- as.list(aF1_chrRNA_ES_d0_un[,1])
rownames(cts_aF1) <- genelist$Geneid
sample <- c("aF1_d0_un", "aF1_d2_un","aF1_d6_un",
"aF1_d0_dTag", "aF1_d2_dTag","aF1_d6_dTag")
colnames(cts_aF1) <- sample
dTAG <- c(rep("un", times=3), rep("dTAG", times=3))
coldata <- cbind(sample, dTAG)
#make DESeq data set
dds_aF1 <- DESeqDataSetFromMatrix(countData = cts_aF1,
colData = coldata,
design= ~ dTAG)
#Set untreated "un" as the reference factor
dds_aF1$dTAG <- relevel(dds_aF1$dTAG, ref = "un")
dds_aF1 <- DESeq(dds_aF1)
res <- results(dds_aF1)
plotMA(res, ylim=c(-3,3))
#perform shrinkage
resLFC_aF1 <- lfcShrink(dds_aF1, coef="dTAG_dTAG_vs_un", type="apeglm")
plotMA(resLFC_aF1, ylim=c(-3,3))
aF1_plotMA <- plotMA(resLFC_aF1, ylim=c(-3,3), returnData = TRUE)
cC3 (iXist-ChrX-Cast YY1-FKBP12-3xT7)
#make cts
cts_cC3 <- cbind(cC3_chrRNA_ES_d0_un[7], cC3_chrRNA_ES_2dDox_un[7], cC3_chrRNA_ES_6dDox_un[7],
cC3_chrRNA_ES_d0_dTag52h[7], cC3_chrRNA_ES_2dDox_dTag52h[7], cC3_chrRNA_ES_6dDox_dTag52h[7])
genelist <- as.list(cC3_chrRNA_ES_d0_un[,1])
rownames(cts_cC3) <- genelist$Geneid
sample <- c("cC3_d0_un", "cC3_d2_un","cC3_d6_un",
"cC3_d0_dTag", "cC3_d2_dTag","cC3_d6_dTag")
colnames(cts_cC3) <- sample
#make DESeq data set
dds_cC3 <- DESeqDataSetFromMatrix(countData = cts_cC3,
colData = coldata,
design= ~ dTAG)
#Set untreated "un" as the reference factor
dds_cC3$dTAG <- relevel(dds_cC3$dTAG, ref = "un")
dds_cC3 <- DESeq(dds_cC3)
res <- results(dds_cC3)
plotMA(res, ylim=c(-3,3))
#perform shrinkage
resLFC_cC3 <- lfcShrink(dds_cC3, coef="dTAG_dTAG_vs_un", type="apeglm")
plotMA(resLFC_cC3, ylim=c(-3,3))
cC3_plotMA <- plotMA(resLFC_cC3, ylim=c(-3,3), returnData = TRUE)
Are the same gene differentially regulated in aF1 and cC3?
aF1_down <- aF1_plotMA[aF1_plotMA$isDE == TRUE & aF1_plotMA$lfc < 0,]
cC3_down <- cC3_plotMA[cC3_plotMA$isDE == TRUE & cC3_plotMA$lfc < 0,]
both_down <- sum(rownames(cC3_down) %in% rownames(aF1_down) == TRUE)
paste0(both_down, " of ", nrow(cC3_down), " downregulated genes in cC3 are also downregulated in aF1 (",100*(both_down/nrow(cC3_down)),"%)")
## [1] "916 of 1219 downregulated genes in cC3 are also downregulated in aF1 (75.143560295324%)"
aF1_up <- aF1_plotMA[aF1_plotMA$isDE == TRUE & aF1_plotMA$lfc > 0,]
cC3_up <- cC3_plotMA[cC3_plotMA$isDE == TRUE & cC3_plotMA$lfc > 0,]
both_up <- sum(rownames(cC3_up) %in% rownames(aF1_up) == TRUE)
paste0(both_up, " of ", nrow(cC3_up), " upregulated genes in cC3 are also upregulated in aF1 (",100*(both_up/nrow(cC3_up)),"%)")
## [1] "537 of 863 upregulated genes in cC3 are also upregulated in aF1 (62.2247972190035%)"
Volcano plots for aF1 and cC3 lines separately
# plot only signficant genes on volcano plot
padj.cutoff <- 0.01
lfc.cutoff <- 0.58 #this is equivalent to a fold change of 1.5
#for cC3
resLFC_cC3_tb <- resLFC_cC3 %>%
data.frame() %>%
rownames_to_column(var="gene") %>%
as_tibble()
sig_resLFC_cC3 <- resLFC_cC3_tb %>%
filter(padj < padj.cutoff & abs(log2FoldChange) > lfc.cutoff)
signif_up_cC3 <- subset(sig_resLFC_cC3, log2FoldChange > 0)
signif_down_cC3 <- subset(sig_resLFC_cC3, log2FoldChange < 0)
keyvals.colour_cC3 <- ifelse(
rownames(resLFC_cC3) %in% signif_up_cC3$gene, 'orange',
ifelse(rownames(resLFC_cC3) %in% signif_down_cC3$gene, "blue",
'black'))
keyvals.colour_cC3[is.na(keyvals.colour_cC3)] <- 'black'
names(keyvals.colour_cC3)[keyvals.colour_cC3 == 'orange'] <- paste("up n=",nrow(signif_up_cC3))
names(keyvals.colour_cC3)[keyvals.colour_cC3 == 'blue'] <- paste("up n=",nrow(signif_down_cC3))
names(keyvals.colour_cC3)[keyvals.colour_cC3 == 'black'] <- 'non-sig'
volcano_cC3 <- EnhancedVolcano(resLFC_cC3,
title = 'cC3 - dTAG vs untreated',
lab = NA,
xlim = c(-4,3),
ylim = c(0,50),
x = 'log2FoldChange',
y = 'pvalue',
pCutoff = 10e-4,
FCcutoff = 0.58,
colCustom = keyvals.colour_cC3,
gridlines.minor = FALSE)
volcano_cC3
#for aF1
resLFC_aF1_tb <- resLFC_aF1 %>%
data.frame() %>%
rownames_to_column(var="gene") %>%
as_tibble()
sig_resLFC_aF1 <- resLFC_aF1_tb %>%
filter(padj < padj.cutoff & abs(log2FoldChange) > lfc.cutoff)
signif_up_aF1 <- subset(sig_resLFC_aF1, log2FoldChange > 0)
signif_down_aF1 <- subset(sig_resLFC_aF1, log2FoldChange < 0)
keyvals.colour_aF1 <- ifelse(
rownames(resLFC_aF1) %in% signif_up_aF1$gene, 'orange',
ifelse(rownames(resLFC_aF1) %in% signif_down_aF1$gene, "blue",
'black'))
keyvals.colour_aF1[is.na(keyvals.colour_aF1)] <- 'black'
names(keyvals.colour_aF1)[keyvals.colour_aF1 == 'orange'] <- paste("up n=",nrow(signif_up_aF1))
names(keyvals.colour_aF1)[keyvals.colour_aF1 == 'blue'] <- paste("up n=",nrow(signif_down_aF1))
names(keyvals.colour_aF1)[keyvals.colour_aF1 == 'black'] <- 'non-sig'
volcano_aF1 <- EnhancedVolcano(resLFC_aF1,
title = 'aF1 - dTAG vs untreated',
lab = NA,
xlim = c(-4,3),
ylim = c(0,50),
pCutoff = 10e-4,
FCcutoff = 0.58,
x = 'log2FoldChange',
y = 'pvalue',
colCustom = keyvals.colour_aF1,
gridlines.minor = FALSE)
volcano_aF1
#make cts
cts <- cbind(aF1_chrRNA_ES_d0_un[7], aF1_chrRNA_ES_2dDox_un[7], aF1_chrRNA_ES_6dDox_un[7],
cC3_chrRNA_ES_d0_un[7], cC3_chrRNA_ES_2dDox_un[7], cC3_chrRNA_ES_6dDox_un[7],
aF1_chrRNA_ES_d0_dTag52h[7], aF1_chrRNA_ES_2dDox_dTag52h[7], aF1_chrRNA_ES_6dDox_dTag52h[7],
cC3_chrRNA_ES_d0_dTag52h[7], cC3_chrRNA_ES_2dDox_dTag52h[7], cC3_chrRNA_ES_6dDox_dTag52h[7])
genelist <- as.list(aF1_chrRNA_ES_d0_un[,1])
rownames(cts) <- genelist$Geneid
sample <- c("aF1_d0_un", "aF1_d2_un","aF1_d6_un",
"cC3_d0_un", "cC3_d2_un","cC3_d6_un",
"aF1_d0_dTag", "aF1_d2_dTag","aF1_d6_dTag",
"cC3_d0_dTag", "cC3_d2_dTag","cC3_d6_dTag")
colnames(cts) <- sample
dTAG <- c(rep("un", times=6), rep("dTAG", times=6))
coldata <- cbind(sample, dTAG)
#make DESeq data set
dds <- DESeqDataSetFromMatrix(countData = cts,
colData = coldata,
design= ~ dTAG)
#Set untreated "un" as the reference factor
dds$dTAG <- relevel(dds$dTAG, ref = "un")
dds <- DESeq(dds)
res <- results(dds)
plotMA(res, ylim=c(-3,3))
#perform shrinkage
resLFC <- lfcShrink(dds, coef="dTAG_dTAG_vs_un", type="apeglm")
plotMA(resLFC, ylim=c(-3,3))
Make volcano plot all samples together
#volcano plot
padj.cutoff <- 0.01
lfc.cutoff <- 0.58 #this is equivalent to a fold change of 1.5
resLFC_tb <- resLFC %>%
data.frame() %>%
rownames_to_column(var="gene") %>%
as_tibble()
sig_resLFC <- resLFC_tb %>%
filter(padj < padj.cutoff & abs(log2FoldChange) > lfc.cutoff)
signif_up <- subset(sig_resLFC, log2FoldChange > 0)
signif_down <- subset(sig_resLFC, log2FoldChange < 0)
keyvals.colour <- ifelse(
rownames(resLFC) %in% signif_up$gene, 'orange',
ifelse(rownames(resLFC) %in% signif_down$gene, "blue",
'black'))
keyvals.colour[is.na(keyvals.colour)] <- 'black'
names(keyvals.colour)[keyvals.colour == 'orange'] <- paste("up n=",nrow(signif_up))
names(keyvals.colour)[keyvals.colour == 'blue'] <- paste("down n=",nrow(signif_down))
names(keyvals.colour)[keyvals.colour == 'black'] <- 'non-sig'
volcano_6samples <- EnhancedVolcano(resLFC,
title = 'both lines: dTAG vs untreated',
lab = NA,
xlim = c(-4,3),
ylim = c(0,50),
x = 'log2FoldChange',
y = 'pvalue',
colCustom = keyvals.colour,
pCutoff = 10e-4,
FCcutoff = 0.58,
gridlines.minor = FALSE)
volcano_6samples
All samples, but only plot YY1 targets
GeneTSS_YY1prom <- read.delim(paste0(data_path,"GeneTSS_YY1prom.bed"), header=FALSE)
YY1targets <- rownames(resLFC[rownames(resLFC) %in% GeneTSS_YY1prom$V4,])
resLFC_YY1 <- resLFC[rownames(resLFC[rownames(resLFC) %in% GeneTSS_YY1prom$V4,]),]
#volcano plot
padj.cutoff <- 0.01
lfc.cutoff <- 0.58 #this is equivalent to a fold change of 1.5
resLFC_YY1_tb <- resLFC_YY1 %>%
data.frame() %>%
rownames_to_column(var="gene") %>%
as_tibble()
sig_resLFC_YY1 <- resLFC_YY1_tb %>%
filter(padj < padj.cutoff & abs(log2FoldChange) > lfc.cutoff)
signif_up_YY1 <- subset(sig_resLFC_YY1, log2FoldChange > 0)
signif_down_YY1 <- subset(sig_resLFC_YY1, log2FoldChange < 0)
keyvals.colour <- ifelse(
rownames(resLFC_YY1) %in% signif_up_YY1$gene, 'orange',
ifelse(rownames(resLFC_YY1) %in% signif_down_YY1$gene, "blue",
'black'))
keyvals.colour[is.na(keyvals.colour)] <- 'black'
names(keyvals.colour)[keyvals.colour == 'orange'] <- paste("up n=",nrow(signif_up_YY1))
names(keyvals.colour)[keyvals.colour == 'blue'] <- paste("down n=",nrow(signif_down_YY1))
names(keyvals.colour)[keyvals.colour == 'black'] <- 'non-sig'
volcano_6samples_YY1 <- EnhancedVolcano(resLFC_YY1,
title = 'both lines: dTAG vs untreated',
lab = NA,
xlim = c(-4,3),
ylim = c(0,50),
x = 'log2FoldChange',
y = 'pvalue',
colCustom = keyvals.colour,
pCutoff = 10e-4,
FCcutoff = 0.58,
gridlines.minor = FALSE)
#plot whole-genome and YY1 targets
volcano_6samples_YY1
All samples, but split by ChrX vs Autosomes
xlinked_genes <- resLFC[rownames(resLFC) %in% aF1_chrRNA_ES_d0_un[(aF1_chrRNA_ES_d0_un$Chr %in% "chrX"),]$Geneid,]
xlinked_gene_names <- rownames(xlinked_genes)
resLFC_chrX <- resLFC[rownames(resLFC[rownames(resLFC) %in% xlinked_gene_names,]),]
#volcano plot
padj.cutoff <- 0.01
lfc.cutoff <- 0.58 #this is equivalent to a fold change of 1.5
resLFC_chrX_tb <- resLFC_chrX %>%
data.frame() %>%
rownames_to_column(var="gene") %>%
as_tibble()
sig_resLFC_chrX <- resLFC_chrX_tb %>%
filter(padj < padj.cutoff & abs(log2FoldChange) > lfc.cutoff)
signif_up_chrX <- subset(sig_resLFC_chrX, log2FoldChange > 0)
signif_down_chrX <- subset(sig_resLFC_chrX, log2FoldChange < 0)
keyvals.colour <- ifelse(
rownames(resLFC_chrX) %in% signif_up_chrX$gene, 'orange',
ifelse(rownames(resLFC_chrX) %in% signif_down_chrX$gene, "blue",
'black'))
keyvals.colour[is.na(keyvals.colour)] <- 'black'
names(keyvals.colour)[keyvals.colour == 'orange'] <- paste("up n=",nrow(signif_up_chrX))
names(keyvals.colour)[keyvals.colour == 'blue'] <- paste("down n=",nrow(signif_down_chrX))
names(keyvals.colour)[keyvals.colour == 'black'] <- 'non-sig'
volcano_6samples_chrX <- EnhancedVolcano(resLFC_chrX,
title = 'both lines: dTAG vs untreated',
lab = rownames(resLFC_chrX),
xlim = c(-4,3),
ylim = c(0,50),
x = 'log2FoldChange',
y = 'pvalue',
colCustom = keyvals.colour,
pCutoff = 10e-4,
FCcutoff = 0.58,
gridlines.minor = FALSE)
volcano_6samples_chrX